diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/python/scandir | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/scandir')
-rw-r--r-- | third_party/python/scandir/LICENSE.txt | 27 | ||||
-rw-r--r-- | third_party/python/scandir/MANIFEST.in | 6 | ||||
-rw-r--r-- | third_party/python/scandir/PKG-INFO | 238 | ||||
-rw-r--r-- | third_party/python/scandir/README.rst | 211 | ||||
-rw-r--r-- | third_party/python/scandir/_scandir.c | 1833 | ||||
-rw-r--r-- | third_party/python/scandir/benchmark.py | 192 | ||||
-rw-r--r-- | third_party/python/scandir/osdefs.h | 48 | ||||
-rw-r--r-- | third_party/python/scandir/scandir.py | 693 | ||||
-rw-r--r-- | third_party/python/scandir/setup.cfg | 4 | ||||
-rw-r--r-- | third_party/python/scandir/setup.py | 80 | ||||
-rw-r--r-- | third_party/python/scandir/test/run_tests.py | 25 | ||||
-rw-r--r-- | third_party/python/scandir/test/test_scandir.py | 320 | ||||
-rw-r--r-- | third_party/python/scandir/test/test_walk.py | 213 | ||||
-rw-r--r-- | third_party/python/scandir/winreparse.h | 53 |
14 files changed, 3943 insertions, 0 deletions
diff --git a/third_party/python/scandir/LICENSE.txt b/third_party/python/scandir/LICENSE.txt new file mode 100644 index 0000000000..0759f503f2 --- /dev/null +++ b/third_party/python/scandir/LICENSE.txt @@ -0,0 +1,27 @@ +Copyright (c) 2012, Ben Hoyt +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +* Neither the name of Ben Hoyt nor the names of its contributors may be used +to endorse or promote products derived from this software without specific +prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/python/scandir/MANIFEST.in b/third_party/python/scandir/MANIFEST.in new file mode 100644 index 0000000000..7524c5385a --- /dev/null +++ b/third_party/python/scandir/MANIFEST.in @@ -0,0 +1,6 @@ +include *.py +include *.c +include *.h +include *.txt +include *.rst +include test/*.py diff --git a/third_party/python/scandir/PKG-INFO b/third_party/python/scandir/PKG-INFO new file mode 100644 index 0000000000..e4625662b1 --- /dev/null +++ b/third_party/python/scandir/PKG-INFO @@ -0,0 +1,238 @@ +Metadata-Version: 1.1 +Name: scandir +Version: 1.9.0 +Summary: scandir, a better directory iterator and faster os.walk() +Home-page: https://github.com/benhoyt/scandir +Author: Ben Hoyt +Author-email: benhoyt@gmail.com +License: New BSD License +Description-Content-Type: UNKNOWN +Description: + scandir, a better directory iterator and faster os.walk() + ========================================================= + + .. image:: https://img.shields.io/pypi/v/scandir.svg + :target: https://pypi.python.org/pypi/scandir + :alt: scandir on PyPI (Python Package Index) + + .. image:: https://travis-ci.org/benhoyt/scandir.svg?branch=master + :target: https://travis-ci.org/benhoyt/scandir + :alt: Travis CI tests (Linux) + + .. image:: https://ci.appveyor.com/api/projects/status/github/benhoyt/scandir?branch=master&svg=true + :target: https://ci.appveyor.com/project/benhoyt/scandir + :alt: Appveyor tests (Windows) + + + ``scandir()`` is a directory iteration function like ``os.listdir()``, + except that instead of returning a list of bare filenames, it yields + ``DirEntry`` objects that include file type and stat information along + with the name. Using ``scandir()`` increases the speed of ``os.walk()`` + by 2-20 times (depending on the platform and file system) by avoiding + unnecessary calls to ``os.stat()`` in most cases. + + + Now included in a Python near you! + ---------------------------------- + + ``scandir`` has been included in the Python 3.5 standard library as + ``os.scandir()``, and the related performance improvements to + ``os.walk()`` have also been included. So if you're lucky enough to be + using Python 3.5 (release date September 13, 2015) you get the benefit + immediately, otherwise just + `download this module from PyPI <https://pypi.python.org/pypi/scandir>`_, + install it with ``pip install scandir``, and then do something like + this in your code: + + .. code-block:: python + + # Use the built-in version of scandir/walk if possible, otherwise + # use the scandir module version + try: + from os import scandir, walk + except ImportError: + from scandir import scandir, walk + + `PEP 471 <https://www.python.org/dev/peps/pep-0471/>`_, which is the + PEP that proposes including ``scandir`` in the Python standard library, + was `accepted <https://mail.python.org/pipermail/python-dev/2014-July/135561.html>`_ + in July 2014 by Victor Stinner, the BDFL-delegate for the PEP. + + This ``scandir`` module is intended to work on Python 2.6+ and Python + 3.2+ (and it has been tested on those versions). + + + Background + ---------- + + Python's built-in ``os.walk()`` is significantly slower than it needs to be, + because -- in addition to calling ``listdir()`` on each directory -- it calls + ``stat()`` on each file to determine whether the filename is a directory or not. + But both ``FindFirstFile`` / ``FindNextFile`` on Windows and ``readdir`` on Linux/OS + X already tell you whether the files returned are directories or not, so + no further ``stat`` system calls are needed. In short, you can reduce the number + of system calls from about 2N to N, where N is the total number of files and + directories in the tree. + + In practice, removing all those extra system calls makes ``os.walk()`` about + **7-50 times as fast on Windows, and about 3-10 times as fast on Linux and Mac OS + X.** So we're not talking about micro-optimizations. See more benchmarks + in the "Benchmarks" section below. + + Somewhat relatedly, many people have also asked for a version of + ``os.listdir()`` that yields filenames as it iterates instead of returning them + as one big list. This improves memory efficiency for iterating very large + directories. + + So as well as a faster ``walk()``, scandir adds a new ``scandir()`` function. + They're pretty easy to use, but see "The API" below for the full docs. + + + Benchmarks + ---------- + + Below are results showing how many times as fast ``scandir.walk()`` is than + ``os.walk()`` on various systems, found by running ``benchmark.py`` with no + arguments: + + ==================== ============== ============= + System version Python version Times as fast + ==================== ============== ============= + Windows 7 64-bit 2.7.7 64-bit 10.4 + Windows 7 64-bit SSD 2.7.7 64-bit 10.3 + Windows 7 64-bit NFS 2.7.6 64-bit 36.8 + Windows 7 64-bit SSD 3.4.1 64-bit 9.9 + Windows 7 64-bit SSD 3.5.0 64-bit 9.5 + CentOS 6.2 64-bit 2.6.6 64-bit 3.9 + Ubuntu 14.04 64-bit 2.7.6 64-bit 5.8 + Mac OS X 10.9.3 2.7.5 64-bit 3.8 + ==================== ============== ============= + + All of the above tests were done using the fast C version of scandir + (source code in ``_scandir.c``). + + Note that the gains are less than the above on smaller directories and greater + on larger directories. This is why ``benchmark.py`` creates a test directory + tree with a standardized size. + + + The API + ------- + + walk() + ~~~~~~ + + The API for ``scandir.walk()`` is exactly the same as ``os.walk()``, so just + `read the Python docs <https://docs.python.org/3.5/library/os.html#os.walk>`_. + + scandir() + ~~~~~~~~~ + + The full docs for ``scandir()`` and the ``DirEntry`` objects it yields are + available in the `Python documentation here <https://docs.python.org/3.5/library/os.html#os.scandir>`_. + But below is a brief summary as well. + + scandir(path='.') -> iterator of DirEntry objects for given path + + Like ``listdir``, ``scandir`` calls the operating system's directory + iteration system calls to get the names of the files in the given + ``path``, but it's different from ``listdir`` in two ways: + + * Instead of returning bare filename strings, it returns lightweight + ``DirEntry`` objects that hold the filename string and provide + simple methods that allow access to the additional data the + operating system may have returned. + + * It returns a generator instead of a list, so that ``scandir`` acts + as a true iterator instead of returning the full list immediately. + + ``scandir()`` yields a ``DirEntry`` object for each file and + sub-directory in ``path``. Just like ``listdir``, the ``'.'`` + and ``'..'`` pseudo-directories are skipped, and the entries are + yielded in system-dependent order. Each ``DirEntry`` object has the + following attributes and methods: + + * ``name``: the entry's filename, relative to the scandir ``path`` + argument (corresponds to the return values of ``os.listdir``) + + * ``path``: the entry's full path name (not necessarily an absolute + path) -- the equivalent of ``os.path.join(scandir_path, entry.name)`` + + * ``is_dir(*, follow_symlinks=True)``: similar to + ``pathlib.Path.is_dir()``, but the return value is cached on the + ``DirEntry`` object; doesn't require a system call in most cases; + don't follow symbolic links if ``follow_symlinks`` is False + + * ``is_file(*, follow_symlinks=True)``: similar to + ``pathlib.Path.is_file()``, but the return value is cached on the + ``DirEntry`` object; doesn't require a system call in most cases; + don't follow symbolic links if ``follow_symlinks`` is False + + * ``is_symlink()``: similar to ``pathlib.Path.is_symlink()``, but the + return value is cached on the ``DirEntry`` object; doesn't require a + system call in most cases + + * ``stat(*, follow_symlinks=True)``: like ``os.stat()``, but the + return value is cached on the ``DirEntry`` object; does not require a + system call on Windows (except for symlinks); don't follow symbolic links + (like ``os.lstat()``) if ``follow_symlinks`` is False + + * ``inode()``: return the inode number of the entry; the return value + is cached on the ``DirEntry`` object + + Here's a very simple example of ``scandir()`` showing use of the + ``DirEntry.name`` attribute and the ``DirEntry.is_dir()`` method: + + .. code-block:: python + + def subdirs(path): + """Yield directory names not starting with '.' under given path.""" + for entry in os.scandir(path): + if not entry.name.startswith('.') and entry.is_dir(): + yield entry.name + + This ``subdirs()`` function will be significantly faster with scandir + than ``os.listdir()`` and ``os.path.isdir()`` on both Windows and POSIX + systems, especially on medium-sized or large directories. + + + Further reading + --------------- + + * `The Python docs for scandir <https://docs.python.org/3.5/library/os.html#os.scandir>`_ + * `PEP 471 <https://www.python.org/dev/peps/pep-0471/>`_, the + (now-accepted) Python Enhancement Proposal that proposed adding + ``scandir`` to the standard library -- a lot of details here, + including rejected ideas and previous discussion + + + Flames, comments, bug reports + ----------------------------- + + Please send flames, comments, and questions about scandir to Ben Hoyt: + + http://benhoyt.com/ + + File bug reports for the version in the Python 3.5 standard library + `here <https://docs.python.org/3.5/bugs.html>`_, or file bug reports + or feature requests for this module at the GitHub project page: + + https://github.com/benhoyt/scandir + +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Operating System :: OS Independent +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python +Classifier: Topic :: System :: Filesystems +Classifier: Topic :: System :: Operating System +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: Implementation :: CPython diff --git a/third_party/python/scandir/README.rst b/third_party/python/scandir/README.rst new file mode 100644 index 0000000000..a5537517dd --- /dev/null +++ b/third_party/python/scandir/README.rst @@ -0,0 +1,211 @@ + +scandir, a better directory iterator and faster os.walk() +========================================================= + +.. image:: https://img.shields.io/pypi/v/scandir.svg + :target: https://pypi.python.org/pypi/scandir + :alt: scandir on PyPI (Python Package Index) + +.. image:: https://travis-ci.org/benhoyt/scandir.svg?branch=master + :target: https://travis-ci.org/benhoyt/scandir + :alt: Travis CI tests (Linux) + +.. image:: https://ci.appveyor.com/api/projects/status/github/benhoyt/scandir?branch=master&svg=true + :target: https://ci.appveyor.com/project/benhoyt/scandir + :alt: Appveyor tests (Windows) + + +``scandir()`` is a directory iteration function like ``os.listdir()``, +except that instead of returning a list of bare filenames, it yields +``DirEntry`` objects that include file type and stat information along +with the name. Using ``scandir()`` increases the speed of ``os.walk()`` +by 2-20 times (depending on the platform and file system) by avoiding +unnecessary calls to ``os.stat()`` in most cases. + + +Now included in a Python near you! +---------------------------------- + +``scandir`` has been included in the Python 3.5 standard library as +``os.scandir()``, and the related performance improvements to +``os.walk()`` have also been included. So if you're lucky enough to be +using Python 3.5 (release date September 13, 2015) you get the benefit +immediately, otherwise just +`download this module from PyPI <https://pypi.python.org/pypi/scandir>`_, +install it with ``pip install scandir``, and then do something like +this in your code: + +.. code-block:: python + + # Use the built-in version of scandir/walk if possible, otherwise + # use the scandir module version + try: + from os import scandir, walk + except ImportError: + from scandir import scandir, walk + +`PEP 471 <https://www.python.org/dev/peps/pep-0471/>`_, which is the +PEP that proposes including ``scandir`` in the Python standard library, +was `accepted <https://mail.python.org/pipermail/python-dev/2014-July/135561.html>`_ +in July 2014 by Victor Stinner, the BDFL-delegate for the PEP. + +This ``scandir`` module is intended to work on Python 2.6+ and Python +3.2+ (and it has been tested on those versions). + + +Background +---------- + +Python's built-in ``os.walk()`` is significantly slower than it needs to be, +because -- in addition to calling ``listdir()`` on each directory -- it calls +``stat()`` on each file to determine whether the filename is a directory or not. +But both ``FindFirstFile`` / ``FindNextFile`` on Windows and ``readdir`` on Linux/OS +X already tell you whether the files returned are directories or not, so +no further ``stat`` system calls are needed. In short, you can reduce the number +of system calls from about 2N to N, where N is the total number of files and +directories in the tree. + +In practice, removing all those extra system calls makes ``os.walk()`` about +**7-50 times as fast on Windows, and about 3-10 times as fast on Linux and Mac OS +X.** So we're not talking about micro-optimizations. See more benchmarks +in the "Benchmarks" section below. + +Somewhat relatedly, many people have also asked for a version of +``os.listdir()`` that yields filenames as it iterates instead of returning them +as one big list. This improves memory efficiency for iterating very large +directories. + +So as well as a faster ``walk()``, scandir adds a new ``scandir()`` function. +They're pretty easy to use, but see "The API" below for the full docs. + + +Benchmarks +---------- + +Below are results showing how many times as fast ``scandir.walk()`` is than +``os.walk()`` on various systems, found by running ``benchmark.py`` with no +arguments: + +==================== ============== ============= +System version Python version Times as fast +==================== ============== ============= +Windows 7 64-bit 2.7.7 64-bit 10.4 +Windows 7 64-bit SSD 2.7.7 64-bit 10.3 +Windows 7 64-bit NFS 2.7.6 64-bit 36.8 +Windows 7 64-bit SSD 3.4.1 64-bit 9.9 +Windows 7 64-bit SSD 3.5.0 64-bit 9.5 +CentOS 6.2 64-bit 2.6.6 64-bit 3.9 +Ubuntu 14.04 64-bit 2.7.6 64-bit 5.8 +Mac OS X 10.9.3 2.7.5 64-bit 3.8 +==================== ============== ============= + +All of the above tests were done using the fast C version of scandir +(source code in ``_scandir.c``). + +Note that the gains are less than the above on smaller directories and greater +on larger directories. This is why ``benchmark.py`` creates a test directory +tree with a standardized size. + + +The API +------- + +walk() +~~~~~~ + +The API for ``scandir.walk()`` is exactly the same as ``os.walk()``, so just +`read the Python docs <https://docs.python.org/3.5/library/os.html#os.walk>`_. + +scandir() +~~~~~~~~~ + +The full docs for ``scandir()`` and the ``DirEntry`` objects it yields are +available in the `Python documentation here <https://docs.python.org/3.5/library/os.html#os.scandir>`_. +But below is a brief summary as well. + + scandir(path='.') -> iterator of DirEntry objects for given path + +Like ``listdir``, ``scandir`` calls the operating system's directory +iteration system calls to get the names of the files in the given +``path``, but it's different from ``listdir`` in two ways: + +* Instead of returning bare filename strings, it returns lightweight + ``DirEntry`` objects that hold the filename string and provide + simple methods that allow access to the additional data the + operating system may have returned. + +* It returns a generator instead of a list, so that ``scandir`` acts + as a true iterator instead of returning the full list immediately. + +``scandir()`` yields a ``DirEntry`` object for each file and +sub-directory in ``path``. Just like ``listdir``, the ``'.'`` +and ``'..'`` pseudo-directories are skipped, and the entries are +yielded in system-dependent order. Each ``DirEntry`` object has the +following attributes and methods: + +* ``name``: the entry's filename, relative to the scandir ``path`` + argument (corresponds to the return values of ``os.listdir``) + +* ``path``: the entry's full path name (not necessarily an absolute + path) -- the equivalent of ``os.path.join(scandir_path, entry.name)`` + +* ``is_dir(*, follow_symlinks=True)``: similar to + ``pathlib.Path.is_dir()``, but the return value is cached on the + ``DirEntry`` object; doesn't require a system call in most cases; + don't follow symbolic links if ``follow_symlinks`` is False + +* ``is_file(*, follow_symlinks=True)``: similar to + ``pathlib.Path.is_file()``, but the return value is cached on the + ``DirEntry`` object; doesn't require a system call in most cases; + don't follow symbolic links if ``follow_symlinks`` is False + +* ``is_symlink()``: similar to ``pathlib.Path.is_symlink()``, but the + return value is cached on the ``DirEntry`` object; doesn't require a + system call in most cases + +* ``stat(*, follow_symlinks=True)``: like ``os.stat()``, but the + return value is cached on the ``DirEntry`` object; does not require a + system call on Windows (except for symlinks); don't follow symbolic links + (like ``os.lstat()``) if ``follow_symlinks`` is False + +* ``inode()``: return the inode number of the entry; the return value + is cached on the ``DirEntry`` object + +Here's a very simple example of ``scandir()`` showing use of the +``DirEntry.name`` attribute and the ``DirEntry.is_dir()`` method: + +.. code-block:: python + + def subdirs(path): + """Yield directory names not starting with '.' under given path.""" + for entry in os.scandir(path): + if not entry.name.startswith('.') and entry.is_dir(): + yield entry.name + +This ``subdirs()`` function will be significantly faster with scandir +than ``os.listdir()`` and ``os.path.isdir()`` on both Windows and POSIX +systems, especially on medium-sized or large directories. + + +Further reading +--------------- + +* `The Python docs for scandir <https://docs.python.org/3.5/library/os.html#os.scandir>`_ +* `PEP 471 <https://www.python.org/dev/peps/pep-0471/>`_, the + (now-accepted) Python Enhancement Proposal that proposed adding + ``scandir`` to the standard library -- a lot of details here, + including rejected ideas and previous discussion + + +Flames, comments, bug reports +----------------------------- + +Please send flames, comments, and questions about scandir to Ben Hoyt: + +http://benhoyt.com/ + +File bug reports for the version in the Python 3.5 standard library +`here <https://docs.python.org/3.5/bugs.html>`_, or file bug reports +or feature requests for this module at the GitHub project page: + +https://github.com/benhoyt/scandir diff --git a/third_party/python/scandir/_scandir.c b/third_party/python/scandir/_scandir.c new file mode 100644 index 0000000000..b35f17041d --- /dev/null +++ b/third_party/python/scandir/_scandir.c @@ -0,0 +1,1833 @@ +/* C speedups for scandir module + +This is divided into four sections (each prefixed with a "SECTION:" +comment): + +1) Python 2/3 compatibility +2) Helper utilities from posixmodule.c, fileutils.h, etc +3) SECTION: Main DirEntry and scandir implementation, taken from + Python 3.5's posixmodule.c +4) Module and method definitions and initialization code + +*/ + +#include <Python.h> +#include <structseq.h> +#include <structmember.h> +#include "osdefs.h" + +#ifdef MS_WINDOWS +#include <windows.h> +#include "winreparse.h" +#else +#include <dirent.h> +#ifndef HAVE_DIRENT_H +#define HAVE_DIRENT_H 1 +#endif +#endif + +#define MODNAME "scandir" + + +/* SECTION: Python 2/3 compatibility */ + +#if PY_MAJOR_VERSION >= 3 +#define INIT_ERROR return NULL +#else +#define INIT_ERROR return +// Because on PyPy, Py_FileSystemDefaultEncoding is (was) defined to be NULL +// (see PyPy Bitbucket issue #2669) +#define FS_ENCODING (Py_FileSystemDefaultEncoding ? Py_FileSystemDefaultEncoding : "UTF-8") +#endif + +#if PY_MAJOR_VERSION < 3 || PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION <= 2 +#define _Py_IDENTIFIER(name) static char * PyId_##name = #name; +#define _PyObject_GetAttrId(obj, pyid_name) PyObject_GetAttrString((obj), *(pyid_name)) +#define PyExc_FileNotFoundError PyExc_OSError +#define PyUnicode_AsUnicodeAndSize(unicode, addr_length) \ + PyUnicode_AsUnicode(unicode); *(addr_length) = PyUnicode_GetSize(unicode) +#endif + +// Because on PyPy not working without +#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION > 2 && defined(PYPY_VERSION_NUM) +#define _Py_IDENTIFIER(name) static char * PyId_##name = #name; +#define _PyObject_GetAttrId(obj, pyid_name) PyObject_GetAttrString((obj), *(pyid_name)) +#endif + +/* SECTION: Helper utilities from posixmodule.c, fileutils.h, etc */ + +#if !defined(MS_WINDOWS) && defined(DT_UNKNOWN) +#define HAVE_DIRENT_D_TYPE 1 +#endif + +#ifdef HAVE_DIRENT_H +#include <dirent.h> +#define NAMLEN(dirent) strlen((dirent)->d_name) +#else +#if defined(__WATCOMC__) && !defined(__QNX__) +#include <direct.h> +#define NAMLEN(dirent) strlen((dirent)->d_name) +#else +#define dirent direct +#define NAMLEN(dirent) (dirent)->d_namlen +#endif +#ifdef HAVE_SYS_NDIR_H +#include <sys/ndir.h> +#endif +#ifdef HAVE_SYS_DIR_H +#include <sys/dir.h> +#endif +#ifdef HAVE_NDIR_H +#include <ndir.h> +#endif +#endif + +#ifndef Py_CLEANUP_SUPPORTED +#define Py_CLEANUP_SUPPORTED 0x20000 +#endif + +#ifndef S_IFLNK +/* Windows doesn't define S_IFLNK but posixmodule.c maps + * IO_REPARSE_TAG_SYMLINK to S_IFLNK */ +# define S_IFLNK 0120000 +#endif + +// _Py_stat_struct is already defined in fileutils.h on Python 3.5+ +// But not in PyPy +#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 5) || defined(PYPY_VERSION_NUM) +#ifdef MS_WINDOWS +struct _Py_stat_struct { + unsigned long st_dev; + unsigned __int64 st_ino; + unsigned short st_mode; + int st_nlink; + int st_uid; + int st_gid; + unsigned long st_rdev; + __int64 st_size; + time_t st_atime; + int st_atime_nsec; + time_t st_mtime; + int st_mtime_nsec; + time_t st_ctime; + int st_ctime_nsec; + unsigned long st_file_attributes; +}; +#else +# define _Py_stat_struct stat +#endif +#endif + +/* choose the appropriate stat and fstat functions and return structs */ +#undef STAT +#undef FSTAT +#undef STRUCT_STAT +#ifdef MS_WINDOWS +# define STAT win32_stat +# define LSTAT win32_lstat +# define FSTAT _Py_fstat_noraise +# define STRUCT_STAT struct _Py_stat_struct +#else +# define STAT stat +# define LSTAT lstat +# define FSTAT fstat +# define STRUCT_STAT struct stat +#endif + +#ifdef MS_WINDOWS + +static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */ + +static void +FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) +{ + /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */ + /* Cannot simply cast and dereference in_ptr, + since it might not be aligned properly */ + __int64 in; + memcpy(&in, in_ptr, sizeof(in)); + *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ + *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); +} + +/* Below, we *know* that ugo+r is 0444 */ +#if _S_IREAD != 0400 +#error Unsupported C library +#endif +static int +attributes_to_mode(DWORD attr) +{ + int m = 0; + if (attr & FILE_ATTRIBUTE_DIRECTORY) + m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */ + else + m |= _S_IFREG; + if (attr & FILE_ATTRIBUTE_READONLY) + m |= 0444; + else + m |= 0666; + return m; +} + +void +_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, + struct _Py_stat_struct *result) +{ + memset(result, 0, sizeof(*result)); + result->st_mode = attributes_to_mode(info->dwFileAttributes); + result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow; + result->st_dev = info->dwVolumeSerialNumber; + result->st_rdev = result->st_dev; + FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_nlink = info->nNumberOfLinks; + result->st_ino = (((unsigned __int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow; + if (reparse_tag == IO_REPARSE_TAG_SYMLINK) { + /* first clear the S_IFMT bits */ + result->st_mode ^= (result->st_mode & S_IFMT); + /* now set the bits that make this a symlink */ + result->st_mode |= S_IFLNK; + } + result->st_file_attributes = info->dwFileAttributes; +} + +static BOOL +get_target_path(HANDLE hdl, wchar_t **target_path) +{ + int buf_size, result_length; + wchar_t *buf; + + /* We have a good handle to the target, use it to determine + the target path name (then we'll call lstat on it). */ + buf_size = GetFinalPathNameByHandleW(hdl, 0, 0, + VOLUME_NAME_DOS); + if(!buf_size) + return FALSE; + + buf = PyMem_New(wchar_t, buf_size+1); + if (!buf) { + SetLastError(ERROR_OUTOFMEMORY); + return FALSE; + } + + result_length = GetFinalPathNameByHandleW(hdl, + buf, buf_size, VOLUME_NAME_DOS); + + if(!result_length) { + PyMem_Free(buf); + return FALSE; + } + + if(!CloseHandle(hdl)) { + PyMem_Free(buf); + return FALSE; + } + + buf[result_length] = 0; + + *target_path = buf; + return TRUE; +} + +static int +win32_get_reparse_tag(HANDLE reparse_point_handle, ULONG *reparse_tag) +{ + char target_buffer[MAXIMUM_REPARSE_DATA_BUFFER_SIZE]; + REPARSE_DATA_BUFFER *rdb = (REPARSE_DATA_BUFFER *)target_buffer; + DWORD n_bytes_returned; + + if (0 == DeviceIoControl( + reparse_point_handle, + FSCTL_GET_REPARSE_POINT, + NULL, 0, /* in buffer */ + target_buffer, sizeof(target_buffer), + &n_bytes_returned, + NULL)) /* we're not using OVERLAPPED_IO */ + return FALSE; + + if (reparse_tag) + *reparse_tag = rdb->ReparseTag; + + return TRUE; +} + +static void +find_data_to_file_info_w(WIN32_FIND_DATAW *pFileData, + BY_HANDLE_FILE_INFORMATION *info, + ULONG *reparse_tag) +{ + memset(info, 0, sizeof(*info)); + info->dwFileAttributes = pFileData->dwFileAttributes; + info->ftCreationTime = pFileData->ftCreationTime; + info->ftLastAccessTime = pFileData->ftLastAccessTime; + info->ftLastWriteTime = pFileData->ftLastWriteTime; + info->nFileSizeHigh = pFileData->nFileSizeHigh; + info->nFileSizeLow = pFileData->nFileSizeLow; +/* info->nNumberOfLinks = 1; */ + if (pFileData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + *reparse_tag = pFileData->dwReserved0; + else + *reparse_tag = 0; +} + +static BOOL +attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag) +{ + HANDLE hFindFile; + WIN32_FIND_DATAW FileData; + hFindFile = FindFirstFileW(pszFile, &FileData); + if (hFindFile == INVALID_HANDLE_VALUE) + return FALSE; + FindClose(hFindFile); + find_data_to_file_info_w(&FileData, info, reparse_tag); + return TRUE; +} + +static int +win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result, + BOOL traverse) +{ + int code; + HANDLE hFile, hFile2; + BY_HANDLE_FILE_INFORMATION info; + ULONG reparse_tag = 0; + wchar_t *target_path; + const wchar_t *dot; + + hFile = CreateFileW( + path, + FILE_READ_ATTRIBUTES, /* desired access */ + 0, /* share mode */ + NULL, /* security attributes */ + OPEN_EXISTING, + /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ + /* FILE_FLAG_OPEN_REPARSE_POINT does not follow the symlink. + Because of this, calls like GetFinalPathNameByHandle will return + the symlink path again and not the actual final path. */ + FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS| + FILE_FLAG_OPEN_REPARSE_POINT, + NULL); + + if (hFile == INVALID_HANDLE_VALUE) { + /* Either the target doesn't exist, or we don't have access to + get a handle to it. If the former, we need to return an error. + If the latter, we can use attributes_from_dir. */ + if (GetLastError() != ERROR_SHARING_VIOLATION) + return -1; + /* Could not get attributes on open file. Fall back to + reading the directory. */ + if (!attributes_from_dir_w(path, &info, &reparse_tag)) + /* Very strange. This should not fail now */ + return -1; + if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { + if (traverse) { + /* Should traverse, but could not open reparse point handle */ + SetLastError(ERROR_SHARING_VIOLATION); + return -1; + } + } + } else { + if (!GetFileInformationByHandle(hFile, &info)) { + CloseHandle(hFile); + return -1; + } + if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { + if (!win32_get_reparse_tag(hFile, &reparse_tag)) + return -1; + + /* Close the outer open file handle now that we're about to + reopen it with different flags. */ + if (!CloseHandle(hFile)) + return -1; + + if (traverse) { + /* In order to call GetFinalPathNameByHandle we need to open + the file without the reparse handling flag set. */ + hFile2 = CreateFileW( + path, FILE_READ_ATTRIBUTES, FILE_SHARE_READ, + NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS, + NULL); + if (hFile2 == INVALID_HANDLE_VALUE) + return -1; + + if (!get_target_path(hFile2, &target_path)) + return -1; + + code = win32_xstat_impl_w(target_path, result, FALSE); + PyMem_Free(target_path); + return code; + } + } else + CloseHandle(hFile); + } + _Py_attribute_data_to_stat(&info, reparse_tag, result); + + /* Set S_IEXEC if it is an .exe, .bat, ... */ + dot = wcsrchr(path, '.'); + if (dot) { + if (_wcsicmp(dot, L".bat") == 0 || _wcsicmp(dot, L".cmd") == 0 || + _wcsicmp(dot, L".exe") == 0 || _wcsicmp(dot, L".com") == 0) + result->st_mode |= 0111; + } + return 0; +} + +static int +win32_xstat_w(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) +{ + /* Protocol violation: we explicitly clear errno, instead of + setting it to a POSIX error. Callers should use GetLastError. */ + int code = win32_xstat_impl_w(path, result, traverse); + errno = 0; + return code; +} + +static int +win32_lstat_w(const wchar_t* path, struct _Py_stat_struct *result) +{ + return win32_xstat_w(path, result, FALSE); +} + +static int +win32_stat_w(const wchar_t* path, struct _Py_stat_struct *result) +{ + return win32_xstat_w(path, result, TRUE); +} + +#endif /* MS_WINDOWS */ + +static PyTypeObject StatResultType; + +static PyObject *billion = NULL; + +static newfunc structseq_new; + +static PyObject * +statresult_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyStructSequence *result; + int i; + + result = (PyStructSequence*)structseq_new(type, args, kwds); + if (!result) + return NULL; + /* If we have been initialized from a tuple, + st_?time might be set to None. Initialize it + from the int slots. */ + for (i = 7; i <= 9; i++) { + if (result->ob_item[i+3] == Py_None) { + Py_DECREF(Py_None); + Py_INCREF(result->ob_item[i]); + result->ob_item[i+3] = result->ob_item[i]; + } + } + return (PyObject*)result; +} + +/* If true, st_?time is float. */ +static int _stat_float_times = 1; + +static void +fill_time(PyObject *v, int index, time_t sec, unsigned long nsec) +{ +#if SIZEOF_TIME_T > SIZEOF_LONG + PyObject *s = PyLong_FromLongLong((PY_LONG_LONG)sec); +#else +#if PY_MAJOR_VERSION >= 3 + PyObject *s = PyLong_FromLong((long)sec); +#else + PyObject *s = PyInt_FromLong((long)sec); +#endif +#endif + PyObject *ns_fractional = PyLong_FromUnsignedLong(nsec); + PyObject *s_in_ns = NULL; + PyObject *ns_total = NULL; + PyObject *float_s = NULL; + + if (!(s && ns_fractional)) + goto exit; + + s_in_ns = PyNumber_Multiply(s, billion); + if (!s_in_ns) + goto exit; + + ns_total = PyNumber_Add(s_in_ns, ns_fractional); + if (!ns_total) + goto exit; + + if (_stat_float_times) { + float_s = PyFloat_FromDouble(sec + 1e-9*nsec); + if (!float_s) + goto exit; + } + else { + float_s = s; + Py_INCREF(float_s); + } + + PyStructSequence_SET_ITEM(v, index, s); + PyStructSequence_SET_ITEM(v, index+3, float_s); + PyStructSequence_SET_ITEM(v, index+6, ns_total); + s = NULL; + float_s = NULL; + ns_total = NULL; +exit: + Py_XDECREF(s); + Py_XDECREF(ns_fractional); + Py_XDECREF(s_in_ns); + Py_XDECREF(ns_total); + Py_XDECREF(float_s); +} + +#ifdef MS_WINDOWS +#define HAVE_STAT_NSEC 1 +#define HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES 1 +#endif + +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE +#define ST_BLKSIZE_IDX 16 +#else +#define ST_BLKSIZE_IDX 15 +#endif + +#ifdef HAVE_STRUCT_STAT_ST_BLOCKS +#define ST_BLOCKS_IDX (ST_BLKSIZE_IDX+1) +#else +#define ST_BLOCKS_IDX ST_BLKSIZE_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_RDEV +#define ST_RDEV_IDX (ST_BLOCKS_IDX+1) +#else +#define ST_RDEV_IDX ST_BLOCKS_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_FLAGS +#define ST_FLAGS_IDX (ST_RDEV_IDX+1) +#else +#define ST_FLAGS_IDX ST_RDEV_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_GEN +#define ST_GEN_IDX (ST_FLAGS_IDX+1) +#else +#define ST_GEN_IDX ST_FLAGS_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#define ST_BIRTHTIME_IDX (ST_GEN_IDX+1) +#else +#define ST_BIRTHTIME_IDX ST_GEN_IDX +#endif + +#ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES +#define ST_FILE_ATTRIBUTES_IDX (ST_BIRTHTIME_IDX+1) +#else +#define ST_FILE_ATTRIBUTES_IDX ST_BIRTHTIME_IDX +#endif + +#ifdef HAVE_LONG_LONG +# define _PyLong_FromDev PyLong_FromLongLong +#else +# define _PyLong_FromDev PyLong_FromLong +#endif + +#ifndef MS_WINDOWS +PyObject * +_PyLong_FromUid(uid_t uid) +{ + if (uid == (uid_t)-1) + return PyLong_FromLong(-1); + return PyLong_FromUnsignedLong(uid); +} + +PyObject * +_PyLong_FromGid(gid_t gid) +{ + if (gid == (gid_t)-1) + return PyLong_FromLong(-1); + return PyLong_FromUnsignedLong(gid); +} +#endif + +/* pack a system stat C structure into the Python stat tuple + (used by posix_stat() and posix_fstat()) */ +static PyObject* +_pystat_fromstructstat(STRUCT_STAT *st) +{ + unsigned long ansec, mnsec, cnsec; + PyObject *v = PyStructSequence_New(&StatResultType); + if (v == NULL) + return NULL; + + PyStructSequence_SET_ITEM(v, 0, PyLong_FromLong((long)st->st_mode)); +#ifdef HAVE_LARGEFILE_SUPPORT + PyStructSequence_SET_ITEM(v, 1, + PyLong_FromUnsignedLongLong(st->st_ino)); +#else + PyStructSequence_SET_ITEM(v, 1, PyLong_FromUnsignedLong((unsigned long)st->st_ino)); +#endif +#ifdef MS_WINDOWS + PyStructSequence_SET_ITEM(v, 2, PyLong_FromUnsignedLong(st->st_dev)); +#else + PyStructSequence_SET_ITEM(v, 2, _PyLong_FromDev(st->st_dev)); +#endif + PyStructSequence_SET_ITEM(v, 3, PyLong_FromLong((long)st->st_nlink)); +#if defined(MS_WINDOWS) + PyStructSequence_SET_ITEM(v, 4, PyLong_FromLong(0)); + PyStructSequence_SET_ITEM(v, 5, PyLong_FromLong(0)); +#else + PyStructSequence_SET_ITEM(v, 4, _PyLong_FromUid(st->st_uid)); + PyStructSequence_SET_ITEM(v, 5, _PyLong_FromGid(st->st_gid)); +#endif +#ifdef HAVE_LARGEFILE_SUPPORT + PyStructSequence_SET_ITEM(v, 6, + PyLong_FromLongLong((PY_LONG_LONG)st->st_size)); +#else + PyStructSequence_SET_ITEM(v, 6, PyLong_FromLong(st->st_size)); +#endif + +#if defined(HAVE_STAT_TV_NSEC) + ansec = st->st_atim.tv_nsec; + mnsec = st->st_mtim.tv_nsec; + cnsec = st->st_ctim.tv_nsec; +#elif defined(HAVE_STAT_TV_NSEC2) + ansec = st->st_atimespec.tv_nsec; + mnsec = st->st_mtimespec.tv_nsec; + cnsec = st->st_ctimespec.tv_nsec; +#elif defined(HAVE_STAT_NSEC) + ansec = st->st_atime_nsec; + mnsec = st->st_mtime_nsec; + cnsec = st->st_ctime_nsec; +#else + ansec = mnsec = cnsec = 0; +#endif + fill_time(v, 7, st->st_atime, ansec); + fill_time(v, 8, st->st_mtime, mnsec); + fill_time(v, 9, st->st_ctime, cnsec); + +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + PyStructSequence_SET_ITEM(v, ST_BLKSIZE_IDX, + PyLong_FromLong((long)st->st_blksize)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_BLOCKS + PyStructSequence_SET_ITEM(v, ST_BLOCKS_IDX, + PyLong_FromLong((long)st->st_blocks)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_RDEV + PyStructSequence_SET_ITEM(v, ST_RDEV_IDX, + PyLong_FromLong((long)st->st_rdev)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_GEN + PyStructSequence_SET_ITEM(v, ST_GEN_IDX, + PyLong_FromLong((long)st->st_gen)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME + { + PyObject *val; + unsigned long bsec,bnsec; + bsec = (long)st->st_birthtime; +#ifdef HAVE_STAT_TV_NSEC2 + bnsec = st->st_birthtimespec.tv_nsec; +#else + bnsec = 0; +#endif + if (_stat_float_times) { + val = PyFloat_FromDouble(bsec + 1e-9*bnsec); + } else { + val = PyLong_FromLong((long)bsec); + } + PyStructSequence_SET_ITEM(v, ST_BIRTHTIME_IDX, + val); + } +#endif +#ifdef HAVE_STRUCT_STAT_ST_FLAGS + PyStructSequence_SET_ITEM(v, ST_FLAGS_IDX, + PyLong_FromLong((long)st->st_flags)); +#endif +#ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES + PyStructSequence_SET_ITEM(v, ST_FILE_ATTRIBUTES_IDX, + PyLong_FromUnsignedLong(st->st_file_attributes)); +#endif + + if (PyErr_Occurred()) { + Py_DECREF(v); + return NULL; + } + + return v; +} + +char *PyStructSequence_UnnamedField = "unnamed field"; + +PyDoc_STRVAR(stat_result__doc__, +"stat_result: Result from stat, fstat, or lstat.\n\n\ +This object may be accessed either as a tuple of\n\ + (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime)\n\ +or via the attributes st_mode, st_ino, st_dev, st_nlink, st_uid, and so on.\n\ +\n\ +Posix/windows: If your platform supports st_blksize, st_blocks, st_rdev,\n\ +or st_flags, they are available as attributes only.\n\ +\n\ +See os.stat for more information."); + +static PyStructSequence_Field stat_result_fields[] = { + {"st_mode", "protection bits"}, + {"st_ino", "inode"}, + {"st_dev", "device"}, + {"st_nlink", "number of hard links"}, + {"st_uid", "user ID of owner"}, + {"st_gid", "group ID of owner"}, + {"st_size", "total size, in bytes"}, + /* The NULL is replaced with PyStructSequence_UnnamedField later. */ + {NULL, "integer time of last access"}, + {NULL, "integer time of last modification"}, + {NULL, "integer time of last change"}, + {"st_atime", "time of last access"}, + {"st_mtime", "time of last modification"}, + {"st_ctime", "time of last change"}, + {"st_atime_ns", "time of last access in nanoseconds"}, + {"st_mtime_ns", "time of last modification in nanoseconds"}, + {"st_ctime_ns", "time of last change in nanoseconds"}, +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + {"st_blksize", "blocksize for filesystem I/O"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_BLOCKS + {"st_blocks", "number of blocks allocated"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_RDEV + {"st_rdev", "device type (if inode device)"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_FLAGS + {"st_flags", "user defined flags for file"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_GEN + {"st_gen", "generation number"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME + {"st_birthtime", "time of creation"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES + {"st_file_attributes", "Windows file attribute bits"}, +#endif + {0} +}; + +static PyStructSequence_Desc stat_result_desc = { + "scandir.stat_result", /* name */ + stat_result__doc__, /* doc */ + stat_result_fields, + 10 +}; + + +#ifdef MS_WINDOWS +static int +win32_warn_bytes_api() +{ + return PyErr_WarnEx(PyExc_DeprecationWarning, + "The Windows bytes API has been deprecated, " + "use Unicode filenames instead", + 1); +} +#endif + +typedef struct { + const char *function_name; + const char *argument_name; + int nullable; + wchar_t *wide; + char *narrow; + int fd; + Py_ssize_t length; + PyObject *object; + PyObject *cleanup; +} path_t; + +static void +path_cleanup(path_t *path) { + if (path->cleanup) { + Py_CLEAR(path->cleanup); + } +} + +static int +path_converter(PyObject *o, void *p) { + path_t *path = (path_t *)p; + PyObject *unicode, *bytes; + Py_ssize_t length; + char *narrow; + +#define FORMAT_EXCEPTION(exc, fmt) \ + PyErr_Format(exc, "%s%s" fmt, \ + path->function_name ? path->function_name : "", \ + path->function_name ? ": " : "", \ + path->argument_name ? path->argument_name : "path") + + /* Py_CLEANUP_SUPPORTED support */ + if (o == NULL) { + path_cleanup(path); + return 1; + } + + /* ensure it's always safe to call path_cleanup() */ + path->cleanup = NULL; + + if (o == Py_None) { + if (!path->nullable) { + FORMAT_EXCEPTION(PyExc_TypeError, + "can't specify None for %s argument"); + return 0; + } + path->wide = NULL; + path->narrow = NULL; + path->length = 0; + path->object = o; + path->fd = -1; + return 1; + } + + unicode = PyUnicode_FromObject(o); + if (unicode) { +#ifdef MS_WINDOWS + wchar_t *wide; + + wide = PyUnicode_AsUnicodeAndSize(unicode, &length); + if (!wide) { + Py_DECREF(unicode); + return 0; + } + if (length > 32767) { + FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); + Py_DECREF(unicode); + return 0; + } + if (wcslen(wide) != length) { + FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character"); + Py_DECREF(unicode); + return 0; + } + + path->wide = wide; + path->narrow = NULL; + path->length = length; + path->object = o; + path->fd = -1; + path->cleanup = unicode; + return Py_CLEANUP_SUPPORTED; +#else +#if PY_MAJOR_VERSION >= 3 + if (!PyUnicode_FSConverter(unicode, &bytes)) + bytes = NULL; +#else + bytes = PyUnicode_AsEncodedString(unicode, FS_ENCODING, "strict"); +#endif + Py_DECREF(unicode); +#endif + } + else { + PyErr_Clear(); +#if PY_MAJOR_VERSION >= 3 + if (PyObject_CheckBuffer(o)) { + bytes = PyBytes_FromObject(o); + } +#else + if (PyString_Check(o)) { + bytes = o; + Py_INCREF(bytes); + } +#endif + else + bytes = NULL; + if (!bytes) { + PyErr_Clear(); + } + } + + if (!bytes) { + if (!PyErr_Occurred()) + FORMAT_EXCEPTION(PyExc_TypeError, "illegal type for %s parameter"); + return 0; + } + +#ifdef MS_WINDOWS + if (win32_warn_bytes_api()) { + Py_DECREF(bytes); + return 0; + } +#endif + + length = PyBytes_GET_SIZE(bytes); +#ifdef MS_WINDOWS + if (length > MAX_PATH-1) { + FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); + Py_DECREF(bytes); + return 0; + } +#endif + + narrow = PyBytes_AS_STRING(bytes); + if ((size_t)length != strlen(narrow)) { + FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); + Py_DECREF(bytes); + return 0; + } + + path->wide = NULL; + path->narrow = narrow; + path->length = length; + path->object = o; + path->fd = -1; + path->cleanup = bytes; + return Py_CLEANUP_SUPPORTED; +} + +static PyObject * +path_error(path_t *path) +{ +#ifdef MS_WINDOWS + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, + 0, path->object); +#else + return PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path->object); +#endif +} + + +/* SECTION: Main DirEntry and scandir implementation, taken from + Python 3.5's posixmodule.c */ + +PyDoc_STRVAR(posix_scandir__doc__, +"scandir(path='.') -> iterator of DirEntry objects for given path"); + +static char *follow_symlinks_keywords[] = {"follow_symlinks", NULL}; +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 3 +static char *follow_symlinks_format = "|$p:DirEntry.stat"; +#else +static char *follow_symlinks_format = "|i:DirEntry.stat"; +#endif + +typedef struct { + PyObject_HEAD + PyObject *name; + PyObject *path; + PyObject *stat; + PyObject *lstat; +#ifdef MS_WINDOWS + struct _Py_stat_struct win32_lstat; + unsigned __int64 win32_file_index; + int got_file_index; +#if PY_MAJOR_VERSION < 3 + int name_path_bytes; +#endif +#else /* POSIX */ +#ifdef HAVE_DIRENT_D_TYPE + unsigned char d_type; +#endif + ino_t d_ino; +#endif +} DirEntry; + +static void +DirEntry_dealloc(DirEntry *entry) +{ + Py_XDECREF(entry->name); + Py_XDECREF(entry->path); + Py_XDECREF(entry->stat); + Py_XDECREF(entry->lstat); + Py_TYPE(entry)->tp_free((PyObject *)entry); +} + +/* Forward reference */ +static int +DirEntry_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits); + +/* Set exception and return -1 on error, 0 for False, 1 for True */ +static int +DirEntry_is_symlink(DirEntry *self) +{ +#ifdef MS_WINDOWS + return (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK; +#elif defined(HAVE_DIRENT_D_TYPE) + /* POSIX */ + if (self->d_type != DT_UNKNOWN) + return self->d_type == DT_LNK; + else + return DirEntry_test_mode(self, 0, S_IFLNK); +#else + /* POSIX without d_type */ + return DirEntry_test_mode(self, 0, S_IFLNK); +#endif +} + +static PyObject * +DirEntry_py_is_symlink(DirEntry *self) +{ + int result; + + result = DirEntry_is_symlink(self); + if (result == -1) + return NULL; + return PyBool_FromLong(result); +} + +static PyObject * +DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) +{ + int result; + struct _Py_stat_struct st; + +#ifdef MS_WINDOWS + wchar_t *path; + + path = PyUnicode_AsUnicode(self->path); + if (!path) + return NULL; + + if (follow_symlinks) + result = win32_stat_w(path, &st); + else + result = win32_lstat_w(path, &st); + + if (result != 0) { + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, + 0, self->path); + } +#else /* POSIX */ + PyObject *bytes; + char *path; + +#if PY_MAJOR_VERSION >= 3 + if (!PyUnicode_FSConverter(self->path, &bytes)) + return NULL; +#else + if (PyString_Check(self->path)) { + bytes = self->path; + Py_INCREF(bytes); + } else { + bytes = PyUnicode_AsEncodedString(self->path, FS_ENCODING, "strict"); + if (!bytes) + return NULL; + } +#endif + path = PyBytes_AS_STRING(bytes); + + if (follow_symlinks) + result = STAT(path, &st); + else + result = LSTAT(path, &st); + Py_DECREF(bytes); + + if (result != 0) + return PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, self->path); +#endif + + return _pystat_fromstructstat(&st); +} + +static PyObject * +DirEntry_get_lstat(DirEntry *self) +{ + if (!self->lstat) { +#ifdef MS_WINDOWS + self->lstat = _pystat_fromstructstat(&self->win32_lstat); +#else /* POSIX */ + self->lstat = DirEntry_fetch_stat(self, 0); +#endif + } + Py_XINCREF(self->lstat); + return self->lstat; +} + +static PyObject * +DirEntry_get_stat(DirEntry *self, int follow_symlinks) +{ + if (!follow_symlinks) + return DirEntry_get_lstat(self); + + if (!self->stat) { + int result = DirEntry_is_symlink(self); + if (result == -1) + return NULL; + else if (result) + self->stat = DirEntry_fetch_stat(self, 1); + else + self->stat = DirEntry_get_lstat(self); + } + + Py_XINCREF(self->stat); + return self->stat; +} + +static PyObject * +DirEntry_stat(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, follow_symlinks_format, + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_get_stat(self, follow_symlinks); +} + +/* Set exception and return -1 on error, 0 for False, 1 for True */ +static int +DirEntry_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits) +{ + PyObject *stat = NULL; + PyObject *st_mode = NULL; + long mode; + int result; +#if defined(MS_WINDOWS) || defined(HAVE_DIRENT_D_TYPE) + int is_symlink; + int need_stat; +#endif +#ifdef MS_WINDOWS + unsigned long dir_bits; +#endif + _Py_IDENTIFIER(st_mode); + +#ifdef MS_WINDOWS + is_symlink = (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK; + need_stat = follow_symlinks && is_symlink; +#elif defined(HAVE_DIRENT_D_TYPE) + is_symlink = self->d_type == DT_LNK; + need_stat = self->d_type == DT_UNKNOWN || (follow_symlinks && is_symlink); +#endif + +#if defined(MS_WINDOWS) || defined(HAVE_DIRENT_D_TYPE) + if (need_stat) { +#endif + stat = DirEntry_get_stat(self, follow_symlinks); + if (!stat) { + if (PyErr_ExceptionMatches(PyExc_FileNotFoundError)) { + /* If file doesn't exist (anymore), then return False + (i.e., say it's not a file/directory) */ + PyErr_Clear(); + return 0; + } + goto error; + } + st_mode = _PyObject_GetAttrId(stat, &PyId_st_mode); + if (!st_mode) + goto error; + + mode = PyLong_AsLong(st_mode); + if (mode == -1 && PyErr_Occurred()) + goto error; + Py_CLEAR(st_mode); + Py_CLEAR(stat); + result = (mode & S_IFMT) == mode_bits; +#if defined(MS_WINDOWS) || defined(HAVE_DIRENT_D_TYPE) + } + else if (is_symlink) { + assert(mode_bits != S_IFLNK); + result = 0; + } + else { + assert(mode_bits == S_IFDIR || mode_bits == S_IFREG); +#ifdef MS_WINDOWS + dir_bits = self->win32_lstat.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY; + if (mode_bits == S_IFDIR) + result = dir_bits != 0; + else + result = dir_bits == 0; +#else /* POSIX */ + if (mode_bits == S_IFDIR) + result = self->d_type == DT_DIR; + else + result = self->d_type == DT_REG; +#endif + } +#endif + + return result; + +error: + Py_XDECREF(st_mode); + Py_XDECREF(stat); + return -1; +} + +static PyObject * +DirEntry_py_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits) +{ + int result; + + result = DirEntry_test_mode(self, follow_symlinks, mode_bits); + if (result == -1) + return NULL; + return PyBool_FromLong(result); +} + +static PyObject * +DirEntry_is_dir(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, follow_symlinks_format, + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_py_test_mode(self, follow_symlinks, S_IFDIR); +} + +static PyObject * +DirEntry_is_file(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, follow_symlinks_format, + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_py_test_mode(self, follow_symlinks, S_IFREG); +} + +static PyObject * +DirEntry_inode(DirEntry *self) +{ +#ifdef MS_WINDOWS + if (!self->got_file_index) { + wchar_t *path; + struct _Py_stat_struct stat; + + path = PyUnicode_AsUnicode(self->path); + if (!path) + return NULL; + + if (win32_lstat_w(path, &stat) != 0) { + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, + 0, self->path); + } + + self->win32_file_index = stat.st_ino; + self->got_file_index = 1; + } + return PyLong_FromUnsignedLongLong(self->win32_file_index); +#else /* POSIX */ +#ifdef HAVE_LARGEFILE_SUPPORT + return PyLong_FromUnsignedLongLong(self->d_ino); +#else + return PyLong_FromUnsignedLong((unsigned long)self->d_ino); +#endif +#endif +} + +#if PY_MAJOR_VERSION < 3 && defined(MS_WINDOWS) + +PyObject *DirEntry_name_getter(DirEntry *self, void *closure) { + if (self->name_path_bytes) { + return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(self->name), + PyUnicode_GetSize(self->name), "strict"); + } else { + Py_INCREF(self->name); + return self->name; + } +} + +PyObject *DirEntry_path_getter(DirEntry *self, void *closure) { + if (self->name_path_bytes) { + return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(self->path), + PyUnicode_GetSize(self->path), "strict"); + } else { + Py_INCREF(self->path); + return self->path; + } +} + +static PyGetSetDef DirEntry_getset[] = { + {"name", (getter)DirEntry_name_getter, NULL, + "the entry's base filename, relative to scandir() \"path\" argument", NULL}, + {"path", (getter)DirEntry_path_getter, NULL, + "the entry's full path name; equivalent to os.path.join(scandir_path, entry.name)", NULL}, + {NULL} +}; + +#else + +static PyMemberDef DirEntry_members[] = { + {"name", T_OBJECT_EX, offsetof(DirEntry, name), READONLY, + "the entry's base filename, relative to scandir() \"path\" argument"}, + {"path", T_OBJECT_EX, offsetof(DirEntry, path), READONLY, + "the entry's full path name; equivalent to os.path.join(scandir_path, entry.name)"}, + {NULL} +}; + +#endif + +static PyObject * +DirEntry_repr(DirEntry *self) +{ +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_FromFormat("<DirEntry %R>", self->name); +#elif defined(MS_WINDOWS) + PyObject *name; + PyObject *name_repr; + PyObject *entry_repr; + + name = DirEntry_name_getter(self, NULL); + if (!name) + return NULL; + name_repr = PyObject_Repr(name); + Py_DECREF(name); + if (!name_repr) + return NULL; + entry_repr = PyString_FromFormat("<DirEntry %s>", PyString_AsString(name_repr)); + Py_DECREF(name_repr); + return entry_repr; +#else + PyObject *name_repr; + PyObject *entry_repr; + + name_repr = PyObject_Repr(self->name); + if (!name_repr) + return NULL; + entry_repr = PyString_FromFormat("<DirEntry %s>", PyString_AsString(name_repr)); + Py_DECREF(name_repr); + return entry_repr; +#endif +} + +static PyMethodDef DirEntry_methods[] = { + {"is_dir", (PyCFunction)DirEntry_is_dir, METH_VARARGS | METH_KEYWORDS, + "return True if the entry is a directory; cached per entry" + }, + {"is_file", (PyCFunction)DirEntry_is_file, METH_VARARGS | METH_KEYWORDS, + "return True if the entry is a file; cached per entry" + }, + {"is_symlink", (PyCFunction)DirEntry_py_is_symlink, METH_NOARGS, + "return True if the entry is a symbolic link; cached per entry" + }, + {"stat", (PyCFunction)DirEntry_stat, METH_VARARGS | METH_KEYWORDS, + "return stat_result object for the entry; cached per entry" + }, + {"inode", (PyCFunction)DirEntry_inode, METH_NOARGS, + "return inode of the entry; cached per entry", + }, + {NULL} +}; + +static PyTypeObject DirEntryType = { + PyVarObject_HEAD_INIT(NULL, 0) + MODNAME ".DirEntry", /* tp_name */ + sizeof(DirEntry), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)DirEntry_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)DirEntry_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + DirEntry_methods, /* tp_methods */ +#if PY_MAJOR_VERSION < 3 && defined(MS_WINDOWS) + NULL, /* tp_members */ + DirEntry_getset, /* tp_getset */ +#else + DirEntry_members, /* tp_members */ + NULL, /* tp_getset */ +#endif +}; + +#ifdef MS_WINDOWS + +static wchar_t * +join_path_filenameW(wchar_t *path_wide, wchar_t* filename) +{ + Py_ssize_t path_len; + Py_ssize_t size; + wchar_t *result; + wchar_t ch; + + if (!path_wide) { /* Default arg: "." */ + path_wide = L"."; + path_len = 1; + } + else { + path_len = wcslen(path_wide); + } + + /* The +1's are for the path separator and the NUL */ + size = path_len + 1 + wcslen(filename) + 1; + result = PyMem_New(wchar_t, size); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + wcscpy(result, path_wide); + if (path_len > 0) { + ch = result[path_len - 1]; + if (ch != SEP && ch != ALTSEP && ch != L':') + result[path_len++] = SEP; + wcscpy(result + path_len, filename); + } + return result; +} + +static PyObject * +DirEntry_from_find_data(path_t *path, WIN32_FIND_DATAW *dataW) +{ + DirEntry *entry; + BY_HANDLE_FILE_INFORMATION file_info; + ULONG reparse_tag; + wchar_t *joined_path; + + entry = PyObject_New(DirEntry, &DirEntryType); + if (!entry) + return NULL; + entry->name = NULL; + entry->path = NULL; + entry->stat = NULL; + entry->lstat = NULL; + entry->got_file_index = 0; +#if PY_MAJOR_VERSION < 3 + entry->name_path_bytes = path->object && PyBytes_Check(path->object); +#endif + + entry->name = PyUnicode_FromWideChar(dataW->cFileName, wcslen(dataW->cFileName)); + if (!entry->name) + goto error; + + joined_path = join_path_filenameW(path->wide, dataW->cFileName); + if (!joined_path) + goto error; + + entry->path = PyUnicode_FromWideChar(joined_path, wcslen(joined_path)); + PyMem_Free(joined_path); + if (!entry->path) + goto error; + + find_data_to_file_info_w(dataW, &file_info, &reparse_tag); + _Py_attribute_data_to_stat(&file_info, reparse_tag, &entry->win32_lstat); + + return (PyObject *)entry; + +error: + Py_DECREF(entry); + return NULL; +} + +#else /* POSIX */ + +static char * +join_path_filename(char *path_narrow, char* filename, Py_ssize_t filename_len) +{ + Py_ssize_t path_len; + Py_ssize_t size; + char *result; + + if (!path_narrow) { /* Default arg: "." */ + path_narrow = "."; + path_len = 1; + } + else { + path_len = strlen(path_narrow); + } + + if (filename_len == -1) + filename_len = strlen(filename); + + /* The +1's are for the path separator and the NUL */ + size = path_len + 1 + filename_len + 1; + result = PyMem_New(char, size); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + strcpy(result, path_narrow); + if (path_len > 0 && result[path_len - 1] != '/') + result[path_len++] = '/'; + strcpy(result + path_len, filename); + return result; +} + +static PyObject * +DirEntry_from_posix_info(path_t *path, char *name, Py_ssize_t name_len, + ino_t d_ino +#ifdef HAVE_DIRENT_D_TYPE + , unsigned char d_type +#endif + ) +{ + DirEntry *entry; + char *joined_path; + + entry = PyObject_New(DirEntry, &DirEntryType); + if (!entry) + return NULL; + entry->name = NULL; + entry->path = NULL; + entry->stat = NULL; + entry->lstat = NULL; + + joined_path = join_path_filename(path->narrow, name, name_len); + if (!joined_path) + goto error; + + if (!path->narrow || !PyBytes_Check(path->object)) { +#if PY_MAJOR_VERSION >= 3 + entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len); + entry->path = PyUnicode_DecodeFSDefault(joined_path); +#else + entry->name = PyUnicode_Decode(name, name_len, + FS_ENCODING, "strict"); + entry->path = PyUnicode_Decode(joined_path, strlen(joined_path), + FS_ENCODING, "strict"); +#endif + } + else { + entry->name = PyBytes_FromStringAndSize(name, name_len); + entry->path = PyBytes_FromString(joined_path); + } + PyMem_Free(joined_path); + if (!entry->name || !entry->path) + goto error; + +#ifdef HAVE_DIRENT_D_TYPE + entry->d_type = d_type; +#endif + entry->d_ino = d_ino; + + return (PyObject *)entry; + +error: + Py_XDECREF(entry); + return NULL; +} + +#endif + + +typedef struct { + PyObject_HEAD + path_t path; +#ifdef MS_WINDOWS + HANDLE handle; + WIN32_FIND_DATAW file_data; + int first_time; +#else /* POSIX */ + DIR *dirp; +#endif +} ScandirIterator; + +#ifdef MS_WINDOWS + +static void +ScandirIterator_close(ScandirIterator *iterator) +{ + if (iterator->handle == INVALID_HANDLE_VALUE) + return; + + Py_BEGIN_ALLOW_THREADS + FindClose(iterator->handle); + Py_END_ALLOW_THREADS + iterator->handle = INVALID_HANDLE_VALUE; +} + +static PyObject * +ScandirIterator_iternext(ScandirIterator *iterator) +{ + WIN32_FIND_DATAW *file_data = &iterator->file_data; + BOOL success; + + /* Happens if the iterator is iterated twice */ + if (iterator->handle == INVALID_HANDLE_VALUE) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + while (1) { + if (!iterator->first_time) { + Py_BEGIN_ALLOW_THREADS + success = FindNextFileW(iterator->handle, file_data); + Py_END_ALLOW_THREADS + if (!success) { + if (GetLastError() != ERROR_NO_MORE_FILES) + return path_error(&iterator->path); + /* No more files found in directory, stop iterating */ + break; + } + } + iterator->first_time = 0; + + /* Skip over . and .. */ + if (wcscmp(file_data->cFileName, L".") != 0 && + wcscmp(file_data->cFileName, L"..") != 0) + return DirEntry_from_find_data(&iterator->path, file_data); + + /* Loop till we get a non-dot directory or finish iterating */ + } + + ScandirIterator_close(iterator); + + PyErr_SetNone(PyExc_StopIteration); + return NULL; +} + +#else /* POSIX */ + +static void +ScandirIterator_close(ScandirIterator *iterator) +{ + if (!iterator->dirp) + return; + + Py_BEGIN_ALLOW_THREADS + closedir(iterator->dirp); + Py_END_ALLOW_THREADS + iterator->dirp = NULL; + return; +} + +static PyObject * +ScandirIterator_iternext(ScandirIterator *iterator) +{ + struct dirent *direntp; + Py_ssize_t name_len; + int is_dot; + + /* Happens if the iterator is iterated twice */ + if (!iterator->dirp) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + while (1) { + errno = 0; + Py_BEGIN_ALLOW_THREADS + direntp = readdir(iterator->dirp); + Py_END_ALLOW_THREADS + + if (!direntp) { + if (errno != 0) + return path_error(&iterator->path); + /* No more files found in directory, stop iterating */ + break; + } + + /* Skip over . and .. */ + name_len = NAMLEN(direntp); + is_dot = direntp->d_name[0] == '.' && + (name_len == 1 || (direntp->d_name[1] == '.' && name_len == 2)); + if (!is_dot) { + return DirEntry_from_posix_info(&iterator->path, direntp->d_name, + name_len, direntp->d_ino +#ifdef HAVE_DIRENT_D_TYPE + , direntp->d_type +#endif + ); + } + + /* Loop till we get a non-dot directory or finish iterating */ + } + + ScandirIterator_close(iterator); + + PyErr_SetNone(PyExc_StopIteration); + return NULL; +} + +#endif + +static void +ScandirIterator_dealloc(ScandirIterator *iterator) +{ + ScandirIterator_close(iterator); + Py_XDECREF(iterator->path.object); + path_cleanup(&iterator->path); + Py_TYPE(iterator)->tp_free((PyObject *)iterator); +} + +static PyTypeObject ScandirIteratorType = { + PyVarObject_HEAD_INIT(NULL, 0) + MODNAME ".ScandirIterator", /* tp_name */ + sizeof(ScandirIterator), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)ScandirIterator_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)ScandirIterator_iternext, /* tp_iternext */ +}; + +static PyObject * +posix_scandir(PyObject *self, PyObject *args, PyObject *kwargs) +{ + ScandirIterator *iterator; + static char *keywords[] = {"path", NULL}; +#ifdef MS_WINDOWS + wchar_t *path_strW; +#else + char *path; +#endif + + iterator = PyObject_New(ScandirIterator, &ScandirIteratorType); + if (!iterator) + return NULL; + memset(&iterator->path, 0, sizeof(path_t)); + iterator->path.function_name = "scandir"; + iterator->path.nullable = 1; + +#ifdef MS_WINDOWS + iterator->handle = INVALID_HANDLE_VALUE; +#else + iterator->dirp = NULL; +#endif + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O&:scandir", keywords, + path_converter, &iterator->path)) + goto error; + + /* path_converter doesn't keep path.object around, so do it + manually for the lifetime of the iterator here (the refcount + is decremented in ScandirIterator_dealloc) + */ + Py_XINCREF(iterator->path.object); + +#ifdef MS_WINDOWS + if (iterator->path.narrow) { + PyErr_SetString(PyExc_TypeError, + "os.scandir() doesn't support bytes path on Windows, use Unicode instead"); + goto error; + } + iterator->first_time = 1; + + path_strW = join_path_filenameW(iterator->path.wide, L"*.*"); + if (!path_strW) + goto error; + + Py_BEGIN_ALLOW_THREADS + iterator->handle = FindFirstFileW(path_strW, &iterator->file_data); + Py_END_ALLOW_THREADS + + PyMem_Free(path_strW); + + if (iterator->handle == INVALID_HANDLE_VALUE) { + path_error(&iterator->path); + goto error; + } +#else /* POSIX */ + if (iterator->path.narrow) + path = iterator->path.narrow; + else + path = "."; + + errno = 0; + Py_BEGIN_ALLOW_THREADS + iterator->dirp = opendir(path); + Py_END_ALLOW_THREADS + + if (!iterator->dirp) { + path_error(&iterator->path); + goto error; + } +#endif + + return (PyObject *)iterator; + +error: + Py_DECREF(iterator); + return NULL; +} + + +/* SECTION: Module and method definitions and initialization code */ + +static PyMethodDef scandir_methods[] = { + {"scandir", (PyCFunction)posix_scandir, + METH_VARARGS | METH_KEYWORDS, + posix_scandir__doc__}, + {NULL, NULL}, +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_scandir", + NULL, + 0, + scandir_methods, + NULL, + NULL, + NULL, + NULL, +}; +#endif + +#if PY_MAJOR_VERSION >= 3 +PyObject * +PyInit__scandir(void) +{ + PyObject *module = PyModule_Create(&moduledef); +#else +void +init_scandir(void) +{ + PyObject *module = Py_InitModule("_scandir", scandir_methods); +#endif + if (module == NULL) { + INIT_ERROR; + } + + billion = PyLong_FromLong(1000000000); + if (!billion) + INIT_ERROR; + + stat_result_desc.fields[7].name = PyStructSequence_UnnamedField; + stat_result_desc.fields[8].name = PyStructSequence_UnnamedField; + stat_result_desc.fields[9].name = PyStructSequence_UnnamedField; + PyStructSequence_InitType(&StatResultType, &stat_result_desc); + structseq_new = StatResultType.tp_new; + StatResultType.tp_new = statresult_new; + + if (PyType_Ready(&ScandirIteratorType) < 0) + INIT_ERROR; + if (PyType_Ready(&DirEntryType) < 0) + INIT_ERROR; + + PyModule_AddObject(module, "DirEntry", (PyObject *)&DirEntryType); + +#if PY_MAJOR_VERSION >= 3 + return module; +#endif +} diff --git a/third_party/python/scandir/benchmark.py b/third_party/python/scandir/benchmark.py new file mode 100644 index 0000000000..89a4b9d891 --- /dev/null +++ b/third_party/python/scandir/benchmark.py @@ -0,0 +1,192 @@ +"""Simple benchmark to compare the speed of scandir.walk() with os.walk().""" + +import optparse +import os +import stat +import sys +import timeit + +import warnings +with warnings.catch_warnings(record=True): + import scandir + +DEPTH = 4 +NUM_DIRS = 5 +NUM_FILES = 50 + + +def os_walk_pre_35(top, topdown=True, onerror=None, followlinks=False): + """Pre Python 3.5 implementation of os.walk() that doesn't use scandir.""" + islink, join, isdir = os.path.islink, os.path.join, os.path.isdir + + try: + names = os.listdir(top) + except OSError as err: + if onerror is not None: + onerror(err) + return + + dirs, nondirs = [], [] + for name in names: + if isdir(join(top, name)): + dirs.append(name) + else: + nondirs.append(name) + + if topdown: + yield top, dirs, nondirs + for name in dirs: + new_path = join(top, name) + if followlinks or not islink(new_path): + for x in os_walk_pre_35(new_path, topdown, onerror, followlinks): + yield x + if not topdown: + yield top, dirs, nondirs + + +def create_tree(path, depth=DEPTH): + """Create a directory tree at path with given depth, and NUM_DIRS and + NUM_FILES at each level. + """ + os.mkdir(path) + for i in range(NUM_FILES): + filename = os.path.join(path, 'file{0:03}.txt'.format(i)) + with open(filename, 'wb') as f: + f.write(b'foo') + if depth <= 1: + return + for i in range(NUM_DIRS): + dirname = os.path.join(path, 'dir{0:03}'.format(i)) + create_tree(dirname, depth - 1) + + +def get_tree_size(path): + """Return total size of all files in directory tree at path.""" + size = 0 + try: + for entry in scandir.scandir(path): + if entry.is_symlink(): + pass + elif entry.is_dir(): + size += get_tree_size(os.path.join(path, entry.name)) + else: + size += entry.stat().st_size + except OSError: + pass + return size + + +def benchmark(path, get_size=False): + sizes = {} + + if get_size: + def do_os_walk(): + size = 0 + for root, dirs, files in os.walk(path): + for filename in files: + fullname = os.path.join(root, filename) + st = os.lstat(fullname) + if not stat.S_ISLNK(st.st_mode): + size += st.st_size + sizes['os_walk'] = size + + def do_scandir_walk(): + sizes['scandir_walk'] = get_tree_size(path) + + else: + def do_os_walk(): + for root, dirs, files in os.walk(path): + pass + + def do_scandir_walk(): + for root, dirs, files in scandir.walk(path): + pass + + # Run this once first to cache things, so we're not benchmarking I/O + print("Priming the system's cache...") + do_scandir_walk() + + # Use the best of 3 time for each of them to eliminate high outliers + os_walk_time = 1000000 + scandir_walk_time = 1000000 + N = 3 + for i in range(N): + print('Benchmarking walks on {0}, repeat {1}/{2}...'.format( + path, i + 1, N)) + os_walk_time = min(os_walk_time, timeit.timeit(do_os_walk, number=1)) + scandir_walk_time = min(scandir_walk_time, + timeit.timeit(do_scandir_walk, number=1)) + + if get_size: + if sizes['os_walk'] == sizes['scandir_walk']: + equality = 'equal' + else: + equality = 'NOT EQUAL!' + print('os.walk size {0}, scandir.walk size {1} -- {2}'.format( + sizes['os_walk'], sizes['scandir_walk'], equality)) + + print('os.walk took {0:.3f}s, scandir.walk took {1:.3f}s -- {2:.1f}x as fast'.format( + os_walk_time, scandir_walk_time, os_walk_time / scandir_walk_time)) + + +if __name__ == '__main__': + usage = """Usage: benchmark.py [-h] [tree_dir] + +Create a large directory tree named "benchtree" (relative to this script) and +benchmark os.walk() versus scandir.walk(). If tree_dir is specified, benchmark +using it instead of creating a tree.""" + parser = optparse.OptionParser(usage=usage) + parser.add_option('-s', '--size', action='store_true', + help='get size of directory tree while walking') + parser.add_option('-c', '--scandir', type='choice', choices=['best', 'generic', 'c', 'python', 'os'], default='best', + help='version of scandir() to use, default "%default"') + options, args = parser.parse_args() + + if args: + tree_dir = args[0] + else: + tree_dir = os.path.join(os.path.dirname(__file__), 'benchtree') + if not os.path.exists(tree_dir): + print('Creating tree at {0}: depth={1}, num_dirs={2}, num_files={3}'.format( + tree_dir, DEPTH, NUM_DIRS, NUM_FILES)) + create_tree(tree_dir) + + if options.scandir == 'generic': + scandir.scandir = scandir.scandir_generic + elif options.scandir == 'c': + if scandir.scandir_c is None: + print("ERROR: Compiled C version of scandir not found!") + sys.exit(1) + scandir.scandir = scandir.scandir_c + elif options.scandir == 'python': + if scandir.scandir_python is None: + print("ERROR: Python version of scandir not found!") + sys.exit(1) + scandir.scandir = scandir.scandir_python + elif options.scandir == 'os': + if not hasattr(os, 'scandir'): + print("ERROR: Python 3.5's os.scandir() not found!") + sys.exit(1) + scandir.scandir = os.scandir + elif hasattr(os, 'scandir'): + scandir.scandir = os.scandir + + if scandir.scandir == getattr(os, 'scandir', None): + print("Using Python 3.5's builtin os.scandir()") + elif scandir.scandir == scandir.scandir_c: + print('Using fast C version of scandir') + elif scandir.scandir == scandir.scandir_python: + print('Using slower ctypes version of scandir') + elif scandir.scandir == scandir.scandir_generic: + print('Using very slow generic version of scandir') + else: + print('ERROR: Unsure which version of scandir we are using!') + sys.exit(1) + + if hasattr(os, 'scandir'): + os.walk = os_walk_pre_35 + print('Comparing against pre-Python 3.5 version of os.walk()') + else: + print('Comparing against builtin version of os.walk()') + + benchmark(tree_dir, get_size=options.size) diff --git a/third_party/python/scandir/osdefs.h b/third_party/python/scandir/osdefs.h new file mode 100644 index 0000000000..d678ca3b4d --- /dev/null +++ b/third_party/python/scandir/osdefs.h @@ -0,0 +1,48 @@ +// from CPython +#ifndef Py_OSDEFS_H +#define Py_OSDEFS_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Operating system dependencies */ + +#ifdef MS_WINDOWS +#define SEP L'\\' +#define ALTSEP L'/' +#define MAXPATHLEN 256 +#define DELIM L';' +#endif + +/* Filename separator */ +#ifndef SEP +#define SEP L'/' +#endif + +/* Max pathname length */ +#ifdef __hpux +#include <sys/param.h> +#include <limits.h> +#ifndef PATH_MAX +#define PATH_MAX MAXPATHLEN +#endif +#endif + +#ifndef MAXPATHLEN +#if defined(PATH_MAX) && PATH_MAX > 1024 +#define MAXPATHLEN PATH_MAX +#else +#define MAXPATHLEN 1024 +#endif +#endif + +/* Search path entry delimiter */ +#ifndef DELIM +#define DELIM L':' +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OSDEFS_H */ diff --git a/third_party/python/scandir/scandir.py b/third_party/python/scandir/scandir.py new file mode 100644 index 0000000000..aac7208e8e --- /dev/null +++ b/third_party/python/scandir/scandir.py @@ -0,0 +1,693 @@ +"""scandir, a better directory iterator and faster os.walk(), now in the Python 3.5 stdlib + +scandir() is a generator version of os.listdir() that returns an +iterator over files in a directory, and also exposes the extra +information most OSes provide while iterating files in a directory +(such as type and stat information). + +This module also includes a version of os.walk() that uses scandir() +to speed it up significantly. + +See README.md or https://github.com/benhoyt/scandir for rationale and +docs, or read PEP 471 (https://www.python.org/dev/peps/pep-0471/) for +more details on its inclusion into Python 3.5 + +scandir is released under the new BSD 3-clause license. See +LICENSE.txt for the full license text. +""" + +from __future__ import division + +from errno import ENOENT +from os import listdir, lstat, stat, strerror +from os.path import join, islink +from stat import S_IFDIR, S_IFLNK, S_IFREG +import collections +import sys + +try: + import _scandir +except ImportError: + _scandir = None + +try: + import ctypes +except ImportError: + ctypes = None + +if _scandir is None and ctypes is None: + import warnings + warnings.warn("scandir can't find the compiled _scandir C module " + "or ctypes, using slow generic fallback") + +__version__ = '1.9.0' +__all__ = ['scandir', 'walk'] + +# Windows FILE_ATTRIBUTE constants for interpreting the +# FIND_DATA.dwFileAttributes member +FILE_ATTRIBUTE_ARCHIVE = 32 +FILE_ATTRIBUTE_COMPRESSED = 2048 +FILE_ATTRIBUTE_DEVICE = 64 +FILE_ATTRIBUTE_DIRECTORY = 16 +FILE_ATTRIBUTE_ENCRYPTED = 16384 +FILE_ATTRIBUTE_HIDDEN = 2 +FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768 +FILE_ATTRIBUTE_NORMAL = 128 +FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192 +FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072 +FILE_ATTRIBUTE_OFFLINE = 4096 +FILE_ATTRIBUTE_READONLY = 1 +FILE_ATTRIBUTE_REPARSE_POINT = 1024 +FILE_ATTRIBUTE_SPARSE_FILE = 512 +FILE_ATTRIBUTE_SYSTEM = 4 +FILE_ATTRIBUTE_TEMPORARY = 256 +FILE_ATTRIBUTE_VIRTUAL = 65536 + +IS_PY3 = sys.version_info >= (3, 0) + +if IS_PY3: + unicode = str # Because Python <= 3.2 doesn't have u'unicode' syntax + + +class GenericDirEntry(object): + __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path') + + def __init__(self, scandir_path, name): + self._scandir_path = scandir_path + self.name = name + self._stat = None + self._lstat = None + self._path = None + + @property + def path(self): + if self._path is None: + self._path = join(self._scandir_path, self.name) + return self._path + + def stat(self, follow_symlinks=True): + if follow_symlinks: + if self._stat is None: + self._stat = stat(self.path) + return self._stat + else: + if self._lstat is None: + self._lstat = lstat(self.path) + return self._lstat + + # The code duplication below is intentional: this is for slightly + # better performance on systems that fall back to GenericDirEntry. + # It avoids an additional attribute lookup and method call, which + # are relatively slow on CPython. + def is_dir(self, follow_symlinks=True): + try: + st = self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno != ENOENT: + raise + return False # Path doesn't exist or is a broken symlink + return st.st_mode & 0o170000 == S_IFDIR + + def is_file(self, follow_symlinks=True): + try: + st = self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno != ENOENT: + raise + return False # Path doesn't exist or is a broken symlink + return st.st_mode & 0o170000 == S_IFREG + + def is_symlink(self): + try: + st = self.stat(follow_symlinks=False) + except OSError as e: + if e.errno != ENOENT: + raise + return False # Path doesn't exist or is a broken symlink + return st.st_mode & 0o170000 == S_IFLNK + + def inode(self): + st = self.stat(follow_symlinks=False) + return st.st_ino + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + +def _scandir_generic(path=unicode('.')): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + for name in listdir(path): + yield GenericDirEntry(path, name) + + +if IS_PY3 and sys.platform == 'win32': + def scandir_generic(path=unicode('.')): + if isinstance(path, bytes): + raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead") + return _scandir_generic(path) + scandir_generic.__doc__ = _scandir_generic.__doc__ +else: + scandir_generic = _scandir_generic + + +scandir_c = None +scandir_python = None + + +if sys.platform == 'win32': + if ctypes is not None: + from ctypes import wintypes + + # Various constants from windows.h + INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value + ERROR_FILE_NOT_FOUND = 2 + ERROR_NO_MORE_FILES = 18 + IO_REPARSE_TAG_SYMLINK = 0xA000000C + + # Numer of seconds between 1601-01-01 and 1970-01-01 + SECONDS_BETWEEN_EPOCHS = 11644473600 + + kernel32 = ctypes.windll.kernel32 + + # ctypes wrappers for (wide string versions of) FindFirstFile, + # FindNextFile, and FindClose + FindFirstFile = kernel32.FindFirstFileW + FindFirstFile.argtypes = [ + wintypes.LPCWSTR, + ctypes.POINTER(wintypes.WIN32_FIND_DATAW), + ] + FindFirstFile.restype = wintypes.HANDLE + + FindNextFile = kernel32.FindNextFileW + FindNextFile.argtypes = [ + wintypes.HANDLE, + ctypes.POINTER(wintypes.WIN32_FIND_DATAW), + ] + FindNextFile.restype = wintypes.BOOL + + FindClose = kernel32.FindClose + FindClose.argtypes = [wintypes.HANDLE] + FindClose.restype = wintypes.BOOL + + Win32StatResult = collections.namedtuple('Win32StatResult', [ + 'st_mode', + 'st_ino', + 'st_dev', + 'st_nlink', + 'st_uid', + 'st_gid', + 'st_size', + 'st_atime', + 'st_mtime', + 'st_ctime', + 'st_atime_ns', + 'st_mtime_ns', + 'st_ctime_ns', + 'st_file_attributes', + ]) + + def filetime_to_time(filetime): + """Convert Win32 FILETIME to time since Unix epoch in seconds.""" + total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime + return total / 10000000 - SECONDS_BETWEEN_EPOCHS + + def find_data_to_stat(data): + """Convert Win32 FIND_DATA struct to stat_result.""" + # First convert Win32 dwFileAttributes to st_mode + attributes = data.dwFileAttributes + st_mode = 0 + if attributes & FILE_ATTRIBUTE_DIRECTORY: + st_mode |= S_IFDIR | 0o111 + else: + st_mode |= S_IFREG + if attributes & FILE_ATTRIBUTE_READONLY: + st_mode |= 0o444 + else: + st_mode |= 0o666 + if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and + data.dwReserved0 == IO_REPARSE_TAG_SYMLINK): + st_mode ^= st_mode & 0o170000 + st_mode |= S_IFLNK + + st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow + st_atime = filetime_to_time(data.ftLastAccessTime) + st_mtime = filetime_to_time(data.ftLastWriteTime) + st_ctime = filetime_to_time(data.ftCreationTime) + + # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev, + # st_nlink, st_uid, st_gid + return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size, + st_atime, st_mtime, st_ctime, + int(st_atime * 1000000000), + int(st_mtime * 1000000000), + int(st_ctime * 1000000000), + attributes) + + class Win32DirEntryPython(object): + __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path', '_inode') + + def __init__(self, scandir_path, name, find_data): + self._scandir_path = scandir_path + self.name = name + self._stat = None + self._lstat = None + self._find_data = find_data + self._path = None + self._inode = None + + @property + def path(self): + if self._path is None: + self._path = join(self._scandir_path, self.name) + return self._path + + def stat(self, follow_symlinks=True): + if follow_symlinks: + if self._stat is None: + if self.is_symlink(): + # It's a symlink, call link-following stat() + self._stat = stat(self.path) + else: + # Not a symlink, stat is same as lstat value + if self._lstat is None: + self._lstat = find_data_to_stat(self._find_data) + self._stat = self._lstat + return self._stat + else: + if self._lstat is None: + # Lazily convert to stat object, because it's slow + # in Python, and often we only need is_dir() etc + self._lstat = find_data_to_stat(self._find_data) + return self._lstat + + def is_dir(self, follow_symlinks=True): + is_symlink = self.is_symlink() + if follow_symlinks and is_symlink: + try: + return self.stat().st_mode & 0o170000 == S_IFDIR + except OSError as e: + if e.errno != ENOENT: + raise + return False + elif is_symlink: + return False + else: + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_DIRECTORY != 0) + + def is_file(self, follow_symlinks=True): + is_symlink = self.is_symlink() + if follow_symlinks and is_symlink: + try: + return self.stat().st_mode & 0o170000 == S_IFREG + except OSError as e: + if e.errno != ENOENT: + raise + return False + elif is_symlink: + return False + else: + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_DIRECTORY == 0) + + def is_symlink(self): + return (self._find_data.dwFileAttributes & + FILE_ATTRIBUTE_REPARSE_POINT != 0 and + self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK) + + def inode(self): + if self._inode is None: + self._inode = lstat(self.path).st_ino + return self._inode + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + def win_error(error, filename): + exc = WindowsError(error, ctypes.FormatError(error)) + exc.filename = filename + return exc + + def _scandir_python(path=unicode('.')): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + # Call FindFirstFile and handle errors + if isinstance(path, bytes): + is_bytes = True + filename = join(path.decode('mbcs', 'strict'), '*.*') + else: + is_bytes = False + filename = join(path, '*.*') + data = wintypes.WIN32_FIND_DATAW() + data_p = ctypes.byref(data) + handle = FindFirstFile(filename, data_p) + if handle == INVALID_HANDLE_VALUE: + error = ctypes.GetLastError() + if error == ERROR_FILE_NOT_FOUND: + # No files, don't yield anything + return + raise win_error(error, path) + + # Call FindNextFile in a loop, stopping when no more files + try: + while True: + # Skip '.' and '..' (current and parent directory), but + # otherwise yield (filename, stat_result) tuple + name = data.cFileName + if name not in ('.', '..'): + if is_bytes: + name = name.encode('mbcs', 'replace') + yield Win32DirEntryPython(path, name, data) + + data = wintypes.WIN32_FIND_DATAW() + data_p = ctypes.byref(data) + success = FindNextFile(handle, data_p) + if not success: + error = ctypes.GetLastError() + if error == ERROR_NO_MORE_FILES: + break + raise win_error(error, path) + finally: + if not FindClose(handle): + raise win_error(ctypes.GetLastError(), path) + + if IS_PY3: + def scandir_python(path=unicode('.')): + if isinstance(path, bytes): + raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead") + return _scandir_python(path) + scandir_python.__doc__ = _scandir_python.__doc__ + else: + scandir_python = _scandir_python + + if _scandir is not None: + scandir_c = _scandir.scandir + DirEntry_c = _scandir.DirEntry + + if _scandir is not None: + scandir = scandir_c + DirEntry = DirEntry_c + elif ctypes is not None: + scandir = scandir_python + DirEntry = Win32DirEntryPython + else: + scandir = scandir_generic + DirEntry = GenericDirEntry + + +# Linux, OS X, and BSD implementation +elif sys.platform.startswith(('linux', 'darwin', 'sunos5')) or 'bsd' in sys.platform: + have_dirent_d_type = (sys.platform != 'sunos5') + + if ctypes is not None and have_dirent_d_type: + import ctypes.util + + DIR_p = ctypes.c_void_p + + # Rather annoying how the dirent struct is slightly different on each + # platform. The only fields we care about are d_name and d_type. + class Dirent(ctypes.Structure): + if sys.platform.startswith('linux'): + _fields_ = ( + ('d_ino', ctypes.c_ulong), + ('d_off', ctypes.c_long), + ('d_reclen', ctypes.c_ushort), + ('d_type', ctypes.c_byte), + ('d_name', ctypes.c_char * 256), + ) + elif 'openbsd' in sys.platform: + _fields_ = ( + ('d_ino', ctypes.c_uint64), + ('d_off', ctypes.c_uint64), + ('d_reclen', ctypes.c_uint16), + ('d_type', ctypes.c_uint8), + ('d_namlen', ctypes.c_uint8), + ('__d_padding', ctypes.c_uint8 * 4), + ('d_name', ctypes.c_char * 256), + ) + else: + _fields_ = ( + ('d_ino', ctypes.c_uint32), # must be uint32, not ulong + ('d_reclen', ctypes.c_ushort), + ('d_type', ctypes.c_byte), + ('d_namlen', ctypes.c_byte), + ('d_name', ctypes.c_char * 256), + ) + + DT_UNKNOWN = 0 + DT_DIR = 4 + DT_REG = 8 + DT_LNK = 10 + + Dirent_p = ctypes.POINTER(Dirent) + Dirent_pp = ctypes.POINTER(Dirent_p) + + libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) + opendir = libc.opendir + opendir.argtypes = [ctypes.c_char_p] + opendir.restype = DIR_p + + readdir_r = libc.readdir_r + readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp] + readdir_r.restype = ctypes.c_int + + closedir = libc.closedir + closedir.argtypes = [DIR_p] + closedir.restype = ctypes.c_int + + file_system_encoding = sys.getfilesystemencoding() + + class PosixDirEntry(object): + __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path', '_inode') + + def __init__(self, scandir_path, name, d_type, inode): + self._scandir_path = scandir_path + self.name = name + self._d_type = d_type + self._inode = inode + self._stat = None + self._lstat = None + self._path = None + + @property + def path(self): + if self._path is None: + self._path = join(self._scandir_path, self.name) + return self._path + + def stat(self, follow_symlinks=True): + if follow_symlinks: + if self._stat is None: + if self.is_symlink(): + self._stat = stat(self.path) + else: + if self._lstat is None: + self._lstat = lstat(self.path) + self._stat = self._lstat + return self._stat + else: + if self._lstat is None: + self._lstat = lstat(self.path) + return self._lstat + + def is_dir(self, follow_symlinks=True): + if (self._d_type == DT_UNKNOWN or + (follow_symlinks and self.is_symlink())): + try: + st = self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno != ENOENT: + raise + return False + return st.st_mode & 0o170000 == S_IFDIR + else: + return self._d_type == DT_DIR + + def is_file(self, follow_symlinks=True): + if (self._d_type == DT_UNKNOWN or + (follow_symlinks and self.is_symlink())): + try: + st = self.stat(follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno != ENOENT: + raise + return False + return st.st_mode & 0o170000 == S_IFREG + else: + return self._d_type == DT_REG + + def is_symlink(self): + if self._d_type == DT_UNKNOWN: + try: + st = self.stat(follow_symlinks=False) + except OSError as e: + if e.errno != ENOENT: + raise + return False + return st.st_mode & 0o170000 == S_IFLNK + else: + return self._d_type == DT_LNK + + def inode(self): + return self._inode + + def __str__(self): + return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) + + __repr__ = __str__ + + def posix_error(filename): + errno = ctypes.get_errno() + exc = OSError(errno, strerror(errno)) + exc.filename = filename + return exc + + def scandir_python(path=unicode('.')): + """Like os.listdir(), but yield DirEntry objects instead of returning + a list of names. + """ + if isinstance(path, bytes): + opendir_path = path + is_bytes = True + else: + opendir_path = path.encode(file_system_encoding) + is_bytes = False + dir_p = opendir(opendir_path) + if not dir_p: + raise posix_error(path) + try: + result = Dirent_p() + while True: + entry = Dirent() + if readdir_r(dir_p, entry, result): + raise posix_error(path) + if not result: + break + name = entry.d_name + if name not in (b'.', b'..'): + if not is_bytes: + name = name.decode(file_system_encoding) + yield PosixDirEntry(path, name, entry.d_type, entry.d_ino) + finally: + if closedir(dir_p): + raise posix_error(path) + + if _scandir is not None: + scandir_c = _scandir.scandir + DirEntry_c = _scandir.DirEntry + + if _scandir is not None: + scandir = scandir_c + DirEntry = DirEntry_c + elif ctypes is not None: + scandir = scandir_python + DirEntry = PosixDirEntry + else: + scandir = scandir_generic + DirEntry = GenericDirEntry + + +# Some other system -- no d_type or stat information +else: + scandir = scandir_generic + DirEntry = GenericDirEntry + + +def _walk(top, topdown=True, onerror=None, followlinks=False): + """Like Python 3.5's implementation of os.walk() -- faster than + the pre-Python 3.5 version as it uses scandir() internally. + """ + dirs = [] + nondirs = [] + + # We may not have read permission for top, in which case we can't + # get a list of the files the directory contains. os.walk + # always suppressed the exception then, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. That logic is copied here. + try: + scandir_it = scandir(top) + except OSError as error: + if onerror is not None: + onerror(error) + return + + while True: + try: + try: + entry = next(scandir_it) + except StopIteration: + break + except OSError as error: + if onerror is not None: + onerror(error) + return + + try: + is_dir = entry.is_dir() + except OSError: + # If is_dir() raises an OSError, consider that the entry is not + # a directory, same behaviour than os.path.isdir(). + is_dir = False + + if is_dir: + dirs.append(entry.name) + else: + nondirs.append(entry.name) + + if not topdown and is_dir: + # Bottom-up: recurse into sub-directory, but exclude symlinks to + # directories if followlinks is False + if followlinks: + walk_into = True + else: + try: + is_symlink = entry.is_symlink() + except OSError: + # If is_symlink() raises an OSError, consider that the + # entry is not a symbolic link, same behaviour than + # os.path.islink(). + is_symlink = False + walk_into = not is_symlink + + if walk_into: + for entry in walk(entry.path, topdown, onerror, followlinks): + yield entry + + # Yield before recursion if going top down + if topdown: + yield top, dirs, nondirs + + # Recurse into sub-directories + for name in dirs: + new_path = join(top, name) + # Issue #23605: os.path.islink() is used instead of caching + # entry.is_symlink() result during the loop on os.scandir() because + # the caller can replace the directory entry during the "yield" + # above. + if followlinks or not islink(new_path): + for entry in walk(new_path, topdown, onerror, followlinks): + yield entry + else: + # Yield after recursion if going bottom up + yield top, dirs, nondirs + + +if IS_PY3 or sys.platform != 'win32': + walk = _walk +else: + # Fix for broken unicode handling on Windows on Python 2.x, see: + # https://github.com/benhoyt/scandir/issues/54 + file_system_encoding = sys.getfilesystemencoding() + + def walk(top, topdown=True, onerror=None, followlinks=False): + if isinstance(top, bytes): + top = top.decode(file_system_encoding) + return _walk(top, topdown, onerror, followlinks) diff --git a/third_party/python/scandir/setup.cfg b/third_party/python/scandir/setup.cfg new file mode 100644 index 0000000000..8bfd5a12f8 --- /dev/null +++ b/third_party/python/scandir/setup.cfg @@ -0,0 +1,4 @@ +[egg_info] +tag_build = +tag_date = 0 + diff --git a/third_party/python/scandir/setup.py b/third_party/python/scandir/setup.py new file mode 100644 index 0000000000..5987c54ea8 --- /dev/null +++ b/third_party/python/scandir/setup.py @@ -0,0 +1,80 @@ +"""Run "python setup.py install" to install scandir.""" + +try: + from setuptools import setup, Extension + from setuptools.command.build_ext import build_ext as base_build_ext +except ImportError: + import warnings + import sys + val = sys.exc_info()[1] + + warnings.warn("import of setuptools failed %r" % val) + from distutils.core import setup, Extension + from distutils.command.build_ext import build_ext as base_build_ext + +import os +import re +import sys +import logging + +# Get version without importing scandir because that will lock the +# .pyd file (if scandir is already installed) so it can't be +# overwritten during the install process +with open(os.path.join(os.path.dirname(__file__), 'scandir.py')) as f: + for line in f: + match = re.match(r"__version__.*'([0-9.]+)'", line) + if match: + version = match.group(1) + break + else: + raise Exception("Couldn't find version in setup.py") + +with open('README.rst') as f: + long_description = f.read() + + +class BuildExt(base_build_ext): + + # the extension is optional since in case of lack of c the api + # there is a ctypes fallback and a slow python fallback + + def build_extension(self, ext): + try: + base_build_ext.build_extension(self, ext) + except Exception: + exception = sys.exc_info()[0] + logging.warn("building the %s failed with %s", ext.name, exception) + +extension = Extension('_scandir', ['_scandir.c'], optional=True) + + +setup( + name='scandir', + version=version, + author='Ben Hoyt', + author_email='benhoyt@gmail.com', + url='https://github.com/benhoyt/scandir', + license='New BSD License', + description='scandir, a better directory iterator and faster os.walk()', + long_description=long_description, + py_modules=['scandir'], + ext_modules=[extension], + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Operating System :: OS Independent', + 'License :: OSI Approved :: BSD License', + 'Programming Language :: Python', + 'Topic :: System :: Filesystems', + 'Topic :: System :: Operating System', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: Implementation :: CPython', + ], cmdclass={'build_ext': BuildExt}, +) diff --git a/third_party/python/scandir/test/run_tests.py b/third_party/python/scandir/test/run_tests.py new file mode 100644 index 0000000000..409ad97c91 --- /dev/null +++ b/third_party/python/scandir/test/run_tests.py @@ -0,0 +1,25 @@ +"""Run all unit tests.""" + +import glob +import os +import sys + +if sys.version_info[:2] < (2, 7): + import unittest2 as unittest +else: + import unittest + + +def main(): + test_dir = os.path.dirname(os.path.abspath(__file__)) + test_files = glob.glob(os.path.join(test_dir, 'test_*.py')) + test_names = [os.path.basename(f)[:-3] for f in test_files] + + sys.path.insert(0, os.path.join(test_dir, '..')) + + suite = unittest.defaultTestLoader.loadTestsFromNames(test_names) + result = unittest.TextTestRunner(verbosity=2).run(suite) + sys.exit(1 if (result.errors or result.failures) else 0) + +if __name__ == '__main__': + main() diff --git a/third_party/python/scandir/test/test_scandir.py b/third_party/python/scandir/test/test_scandir.py new file mode 100644 index 0000000000..8e8d1a3ed5 --- /dev/null +++ b/third_party/python/scandir/test/test_scandir.py @@ -0,0 +1,320 @@ +"""Tests for scandir.scandir().""" + +from __future__ import unicode_literals + +import os +import shutil +import sys +import time + +if sys.version_info[:2] < (2, 7): + import unittest2 as unittest +else: + import unittest + +try: + import scandir + has_scandir = True +except ImportError: + has_scandir = False + +FILE_ATTRIBUTE_DIRECTORY = 16 + +TEST_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'testdir')) + +IS_PY3 = sys.version_info >= (3, 0) + +if IS_PY3: + int_types = int +else: + int_types = (int, long) + str = unicode + + +if hasattr(os, 'symlink'): + try: + link_name = os.path.join(os.path.dirname(__file__), '_testlink') + os.symlink(__file__, link_name) + os.remove(link_name) + symlinks_supported = True + except NotImplementedError: + # Windows versions before Vista don't support symbolic links + symlinks_supported = False +else: + symlinks_supported = False + + +def create_file(path, contents='1234'): + with open(path, 'w') as f: + f.write(contents) + + +def setup_main(): + join = os.path.join + + os.mkdir(TEST_PATH) + os.mkdir(join(TEST_PATH, 'subdir')) + create_file(join(TEST_PATH, 'file1.txt')) + create_file(join(TEST_PATH, 'file2.txt'), contents='12345678') + + os.mkdir(join(TEST_PATH, 'subdir', 'unidir\u018F')) + create_file(join(TEST_PATH, 'subdir', 'file1.txt')) + create_file(join(TEST_PATH, 'subdir', 'unicod\u018F.txt')) + + create_file(join(TEST_PATH, 'subdir', 'unidir\u018F', 'file1.txt')) + + os.mkdir(join(TEST_PATH, 'linkdir')) + + +def setup_symlinks(): + join = os.path.join + + os.mkdir(join(TEST_PATH, 'linkdir', 'linksubdir')) + create_file(join(TEST_PATH, 'linkdir', 'file1.txt')) + + os.symlink(os.path.abspath(join(TEST_PATH, 'linkdir', 'file1.txt')), + join(TEST_PATH, 'linkdir', 'link_to_file')) + + dir_name = os.path.abspath(join(TEST_PATH, 'linkdir', 'linksubdir')) + dir_link = join(TEST_PATH, 'linkdir', 'link_to_dir') + if sys.version_info >= (3, 3): + # "target_is_directory" was only added in Python 3.3 + os.symlink(dir_name, dir_link, target_is_directory=True) + else: + os.symlink(dir_name, dir_link) + + +def teardown(): + try: + shutil.rmtree(TEST_PATH) + except OSError: + # why does the above fail sometimes? + time.sleep(0.1) + shutil.rmtree(TEST_PATH) + + +class TestMixin(object): + def setUp(self): + if not os.path.exists(TEST_PATH): + setup_main() + if symlinks_supported and not os.path.exists( + os.path.join(TEST_PATH, 'linkdir', 'linksubdir')): + setup_symlinks() + + if not hasattr(unittest.TestCase, 'skipTest'): + def skipTest(self, reason): + sys.stdout.write('skipped {0!r} '.format(reason)) + + def test_basic(self): + entries = sorted(self.scandir_func(TEST_PATH), key=lambda e: e.name) + self.assertEqual([(e.name, e.is_dir()) for e in entries], + [('file1.txt', False), ('file2.txt', False), + ('linkdir', True), ('subdir', True)]) + self.assertEqual([e.path for e in entries], + [os.path.join(TEST_PATH, e.name) for e in entries]) + + def test_dir_entry(self): + entries = dict((e.name, e) for e in self.scandir_func(TEST_PATH)) + e = entries['file1.txt'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [False, True, False]) + e = entries['file2.txt'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [False, True, False]) + e = entries['subdir'] + self.assertEqual([e.is_dir(), e.is_file(), e.is_symlink()], [True, False, False]) + + self.assertEqual(entries['file1.txt'].stat().st_size, 4) + self.assertEqual(entries['file2.txt'].stat().st_size, 8) + + def test_stat(self): + entries = list(self.scandir_func(TEST_PATH)) + for entry in entries: + os_stat = os.stat(os.path.join(TEST_PATH, entry.name)) + scandir_stat = entry.stat() + self.assertEqual(os_stat.st_mode, scandir_stat.st_mode) + # TODO: be nice to figure out why these aren't identical on Windows and on PyPy + # * Windows: they seem to be a few microseconds to tens of seconds out + # * PyPy: for some reason os_stat's times are nanosecond, scandir's are not + self.assertAlmostEqual(os_stat.st_mtime, scandir_stat.st_mtime, delta=1) + self.assertAlmostEqual(os_stat.st_ctime, scandir_stat.st_ctime, delta=1) + if entry.is_file(): + self.assertEqual(os_stat.st_size, scandir_stat.st_size) + + def test_returns_iter(self): + it = self.scandir_func(TEST_PATH) + entry = next(it) + assert hasattr(entry, 'name') + + def check_file_attributes(self, result): + self.assertTrue(hasattr(result, 'st_file_attributes')) + self.assertTrue(isinstance(result.st_file_attributes, int_types)) + self.assertTrue(0 <= result.st_file_attributes <= 0xFFFFFFFF) + + def test_file_attributes(self): + if sys.platform != 'win32' or not self.has_file_attributes: + # st_file_attributes is Win32 specific (but can't use + # unittest.skipUnless on Python 2.6) + return self.skipTest('st_file_attributes not supported') + + entries = dict((e.name, e) for e in self.scandir_func(TEST_PATH)) + + # test st_file_attributes on a file (FILE_ATTRIBUTE_DIRECTORY not set) + result = entries['file1.txt'].stat() + self.check_file_attributes(result) + self.assertEqual(result.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY, 0) + + # test st_file_attributes on a directory (FILE_ATTRIBUTE_DIRECTORY set) + result = entries['subdir'].stat() + self.check_file_attributes(result) + self.assertEqual(result.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY, + FILE_ATTRIBUTE_DIRECTORY) + + def test_path(self): + entries = sorted(self.scandir_func(TEST_PATH), key=lambda e: e.name) + self.assertEqual([os.path.basename(e.name) for e in entries], + ['file1.txt', 'file2.txt', 'linkdir', 'subdir']) + self.assertEqual([os.path.normpath(os.path.join(TEST_PATH, e.name)) for e in entries], + [os.path.normpath(e.path) for e in entries]) + + def test_symlink(self): + if not symlinks_supported: + return self.skipTest('symbolic links not supported') + + entries = sorted(self.scandir_func(os.path.join(TEST_PATH, 'linkdir')), + key=lambda e: e.name) + + self.assertEqual([(e.name, e.is_symlink()) for e in entries], + [('file1.txt', False), + ('link_to_dir', True), + ('link_to_file', True), + ('linksubdir', False)]) + + self.assertEqual([(e.name, e.is_file(), e.is_file(follow_symlinks=False)) + for e in entries], + [('file1.txt', True, True), + ('link_to_dir', False, False), + ('link_to_file', True, False), + ('linksubdir', False, False)]) + + self.assertEqual([(e.name, e.is_dir(), e.is_dir(follow_symlinks=False)) + for e in entries], + [('file1.txt', False, False), + ('link_to_dir', True, False), + ('link_to_file', False, False), + ('linksubdir', True, True)]) + + def test_bytes(self): + # Check that unicode filenames are returned correctly as bytes in output + path = os.path.join(TEST_PATH, 'subdir').encode(sys.getfilesystemencoding(), 'replace') + self.assertTrue(isinstance(path, bytes)) + + # Python 3.6 on Windows fixes the bytes filename thing by using UTF-8 + if IS_PY3 and sys.platform == 'win32': + if not (sys.version_info >= (3, 6) and self.scandir_func == os.scandir): + self.assertRaises(TypeError, self.scandir_func, path) + return + + entries = [e for e in self.scandir_func(path) if e.name.startswith(b'unicod')] + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, bytes)) + self.assertTrue(isinstance(entry.path, bytes)) + + # b'unicod?.txt' on Windows, b'unicod\xc6\x8f.txt' (UTF-8) or similar on POSIX + entry_name = 'unicod\u018f.txt'.encode(sys.getfilesystemencoding(), 'replace') + self.assertEqual(entry.name, entry_name) + self.assertEqual(entry.path, os.path.join(path, entry_name)) + + def test_unicode(self): + # Check that unicode filenames are returned correctly as (unicode) str in output + path = os.path.join(TEST_PATH, 'subdir') + if not IS_PY3: + path = path.decode(sys.getfilesystemencoding(), 'replace') + self.assertTrue(isinstance(path, str)) + entries = [e for e in self.scandir_func(path) if e.name.startswith('unicod')] + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, str)) + self.assertTrue(isinstance(entry.path, str)) + + entry_name = 'unicod\u018f.txt' + self.assertEqual(entry.name, entry_name) + self.assertEqual(entry.path, os.path.join(path, 'unicod\u018f.txt')) + + # Check that it handles unicode input properly + path = os.path.join(TEST_PATH, 'subdir', 'unidir\u018f') + self.assertTrue(isinstance(path, str)) + entries = list(self.scandir_func(path)) + self.assertEqual(len(entries), 1) + entry = entries[0] + + self.assertTrue(isinstance(entry.name, str)) + self.assertTrue(isinstance(entry.path, str)) + self.assertEqual(entry.name, 'file1.txt') + self.assertEqual(entry.path, os.path.join(path, 'file1.txt')) + + def test_walk_unicode_handling(self): + encoding = sys.getfilesystemencoding() + dirname_unicode = u'test_unicode_dir' + dirname_bytes = dirname_unicode.encode(encoding) + dirpath = os.path.join(TEST_PATH.encode(encoding), dirname_bytes) + try: + os.makedirs(dirpath) + + if sys.platform != 'win32': + # test bytes + self.assertTrue(isinstance(dirpath, bytes)) + for (path, dirs, files) in scandir.walk(dirpath): + self.assertTrue(isinstance(path, bytes)) + + # test unicode + text_type = str if IS_PY3 else unicode + dirpath_unicode = text_type(dirpath, encoding) + self.assertTrue(isinstance(dirpath_unicode, text_type)) + for (path, dirs, files) in scandir.walk(dirpath_unicode): + self.assertTrue(isinstance(path, text_type)) + finally: + shutil.rmtree(dirpath) + +if has_scandir: + class TestScandirGeneric(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_generic + self.has_file_attributes = False + TestMixin.setUp(self) + + + if getattr(scandir, 'scandir_python', None): + class TestScandirPython(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_python + self.has_file_attributes = True + TestMixin.setUp(self) + + + if getattr(scandir, 'scandir_c', None): + class TestScandirC(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = scandir.scandir_c + self.has_file_attributes = True + TestMixin.setUp(self) + + + class TestScandirDirEntry(unittest.TestCase): + def setUp(self): + if not os.path.exists(TEST_PATH): + setup_main() + + def test_iter_returns_dir_entry(self): + it = scandir.scandir(TEST_PATH) + entry = next(it) + assert isinstance(entry, scandir.DirEntry) + + +if hasattr(os, 'scandir'): + class TestScandirOS(TestMixin, unittest.TestCase): + def setUp(self): + self.scandir_func = os.scandir + self.has_file_attributes = True + TestMixin.setUp(self) diff --git a/third_party/python/scandir/test/test_walk.py b/third_party/python/scandir/test/test_walk.py new file mode 100644 index 0000000000..7995f3adba --- /dev/null +++ b/third_party/python/scandir/test/test_walk.py @@ -0,0 +1,213 @@ +"""Tests for scandir.walk(), copied from CPython's tests for os.walk().""" + +import os +import shutil +import sys + +if sys.version_info[:2] < (2, 7): + import unittest2 as unittest +else: + import unittest + +import scandir + +walk_func = scandir.walk + + +class TestWalk(unittest.TestCase): + testfn = os.path.join(os.path.dirname(__file__), 'temp') + + def test_traversal(self): + # Build: + # TESTFN/ + # TEST1/ a file kid and two directory kids + # tmp1 + # SUB1/ a file kid and a directory kid + # tmp2 + # SUB11/ no kids + # SUB2/ a file kid and a dirsymlink kid + # tmp3 + # link/ a symlink to TESTFN.2 + # TEST2/ + # tmp4 a lone file + walk_path = os.path.join(self.testfn, "TEST1") + sub1_path = os.path.join(walk_path, "SUB1") + sub11_path = os.path.join(sub1_path, "SUB11") + sub2_path = os.path.join(walk_path, "SUB2") + tmp1_path = os.path.join(walk_path, "tmp1") + tmp2_path = os.path.join(sub1_path, "tmp2") + tmp3_path = os.path.join(sub2_path, "tmp3") + link_path = os.path.join(sub2_path, "link") + t2_path = os.path.join(self.testfn, "TEST2") + tmp4_path = os.path.join(self.testfn, "TEST2", "tmp4") + + # Create stuff. + os.makedirs(sub11_path) + os.makedirs(sub2_path) + os.makedirs(t2_path) + for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path: + f = open(path, "w") + f.write("I'm " + path + " and proud of it. Blame test_os.\n") + f.close() + has_symlink = hasattr(os, "symlink") + if has_symlink: + try: + if sys.platform == 'win32' and sys.version_info >= (3, 2): + # "target_is_directory" was only added in Python 3.2 (on Windows) + os.symlink(os.path.abspath(t2_path), link_path, target_is_directory=True) + else: + os.symlink(os.path.abspath(t2_path), link_path) + sub2_tree = (sub2_path, ["link"], ["tmp3"]) + except NotImplementedError: + sub2_tree = (sub2_path, [], ["tmp3"]) + else: + sub2_tree = (sub2_path, [], ["tmp3"]) + + # Walk top-down. + all = list(walk_func(walk_path)) + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: TESTFN, SUB1, SUB11, SUB2 + # flipped: TESTFN, SUB2, SUB1, SUB11 + flipped = all[0][1][0] != "SUB1" + all[0][1].sort() + self.assertEqual(all[0], (walk_path, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[1 + flipped], (sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 + flipped], (sub11_path, [], [])) + self.assertEqual(all[3 - 2 * flipped], sub2_tree) + + # Prune the search. + all = [] + for root, dirs, files in walk_func(walk_path): + all.append((root, dirs, files)) + # Don't descend into SUB1. + if 'SUB1' in dirs: + # Note that this also mutates the dirs we appended to all! + dirs.remove('SUB1') + self.assertEqual(len(all), 2) + self.assertEqual(all[0], (walk_path, ["SUB2"], ["tmp1"])) + self.assertEqual(all[1], sub2_tree) + + # Walk bottom-up. + all = list(walk_func(walk_path, topdown=False)) + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: SUB11, SUB1, SUB2, TESTFN + # flipped: SUB2, SUB11, SUB1, TESTFN + flipped = all[3][1][0] != "SUB1" + all[3][1].sort() + self.assertEqual(all[3], (walk_path, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[flipped], (sub11_path, [], [])) + self.assertEqual(all[flipped + 1], (sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 - 2 * flipped], sub2_tree) + + if has_symlink: + # Walk, following symlinks. + for root, dirs, files in walk_func(walk_path, followlinks=True): + if root == link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with followlinks=True") + + # Test creating a directory and adding it to dirnames + sub3_path = os.path.join(walk_path, "SUB3") + all = [] + for root, dirs, files in walk_func(walk_path): + all.append((root, dirs, files)) + if 'SUB1' in dirs: + os.makedirs(sub3_path) + dirs.append('SUB3') + all.sort() + self.assertEqual(os.path.split(all[-1][0])[1], 'SUB3') + + def tearDown(self): + # Tear everything down. This is a decent use for bottom-up on + # Windows, which doesn't have a recursive delete command. The + # (not so) subtlety is that rmdir will fail unless the dir's + # kids are removed first, so bottom up is essential. + for root, dirs, files in os.walk(self.testfn, topdown=False): + for name in files: + os.remove(os.path.join(root, name)) + for name in dirs: + dirname = os.path.join(root, name) + if not os.path.islink(dirname): + os.rmdir(dirname) + else: + os.remove(dirname) + os.rmdir(self.testfn) + + +class TestWalkSymlink(unittest.TestCase): + temp_dir = os.path.join(os.path.dirname(__file__), 'temp') + + def setUp(self): + os.mkdir(self.temp_dir) + self.dir_name = os.path.join(self.temp_dir, 'dir') + os.mkdir(self.dir_name) + open(os.path.join(self.dir_name, 'subfile'), 'w').close() + self.file_name = os.path.join(self.temp_dir, 'file') + open(self.file_name, 'w').close() + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_symlink_to_file(self): + if not hasattr(os, 'symlink'): + return + + try: + os.symlink(self.file_name, os.path.join(self.temp_dir, + 'link_to_file')) + except NotImplementedError: + # Windows versions before Vista don't support symbolic links + return + + output = sorted(walk_func(self.temp_dir)) + dirs = sorted(output[0][1]) + files = sorted(output[0][2]) + self.assertEqual(dirs, ['dir']) + self.assertEqual(files, ['file', 'link_to_file']) + + self.assertEqual(len(output), 2) + self.assertEqual(output[1][1], []) + self.assertEqual(output[1][2], ['subfile']) + + def test_symlink_to_directory(self): + if not hasattr(os, 'symlink'): + return + + link_name = os.path.join(self.temp_dir, 'link_to_dir') + try: + if sys.platform == 'win32' and sys.version_info >= (3, 2): + # "target_is_directory" was only added in Python 3.2 (on Windows) + os.symlink(self.dir_name, link_name, target_is_directory=True) + else: + os.symlink(self.dir_name, link_name) + except NotImplementedError: + # Windows versions before Vista don't support symbolic links + return + + output = sorted(walk_func(self.temp_dir)) + dirs = sorted(output[0][1]) + files = sorted(output[0][2]) + self.assertEqual(dirs, ['dir', 'link_to_dir']) + self.assertEqual(files, ['file']) + + self.assertEqual(len(output), 2) + self.assertEqual(output[1][1], []) + self.assertEqual(output[1][2], ['subfile']) + + output = sorted(walk_func(self.temp_dir, followlinks=True)) + dirs = sorted(output[0][1]) + files = sorted(output[0][2]) + self.assertEqual(dirs, ['dir', 'link_to_dir']) + self.assertEqual(files, ['file']) + + self.assertEqual(len(output), 3) + self.assertEqual(output[1][1], []) + self.assertEqual(output[1][2], ['subfile']) + self.assertEqual(os.path.basename(output[2][0]), 'link_to_dir') + self.assertEqual(output[2][1], []) + self.assertEqual(output[2][2], ['subfile']) diff --git a/third_party/python/scandir/winreparse.h b/third_party/python/scandir/winreparse.h new file mode 100644 index 0000000000..66f7775dd2 --- /dev/null +++ b/third_party/python/scandir/winreparse.h @@ -0,0 +1,53 @@ +#ifndef Py_WINREPARSE_H +#define Py_WINREPARSE_H + +#ifdef MS_WINDOWS +#include <Windows.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* The following structure was copied from + http://msdn.microsoft.com/en-us/library/ff552012.aspx as the required + include doesn't seem to be present in the Windows SDK (at least as included + with Visual Studio Express). */ +typedef struct _REPARSE_DATA_BUFFER { + ULONG ReparseTag; + USHORT ReparseDataLength; + USHORT Reserved; + union { + struct { + USHORT SubstituteNameOffset; + USHORT SubstituteNameLength; + USHORT PrintNameOffset; + USHORT PrintNameLength; + ULONG Flags; + WCHAR PathBuffer[1]; + } SymbolicLinkReparseBuffer; + + struct { + USHORT SubstituteNameOffset; + USHORT SubstituteNameLength; + USHORT PrintNameOffset; + USHORT PrintNameLength; + WCHAR PathBuffer[1]; + } MountPointReparseBuffer; + + struct { + UCHAR DataBuffer[1]; + } GenericReparseBuffer; + }; +} REPARSE_DATA_BUFFER, *PREPARSE_DATA_BUFFER; + +#define REPARSE_DATA_BUFFER_HEADER_SIZE FIELD_OFFSET(REPARSE_DATA_BUFFER,\ + GenericReparseBuffer) +#define MAXIMUM_REPARSE_DATA_BUFFER_SIZE ( 16 * 1024 ) + +#ifdef __cplusplus +} +#endif + +#endif /* MS_WINDOWS */ + +#endif /* !Py_WINREPARSE_H */ |