diff options
Diffstat (limited to 'third_party/python/diskcache')
12 files changed, 5944 insertions, 0 deletions
diff --git a/third_party/python/diskcache/diskcache-4.1.0.dist-info/LICENSE b/third_party/python/diskcache/diskcache-4.1.0.dist-info/LICENSE new file mode 100644 index 0000000000..3259b989fd --- /dev/null +++ b/third_party/python/diskcache/diskcache-4.1.0.dist-info/LICENSE @@ -0,0 +1,12 @@ +Copyright 2016-2019 Grant Jenks + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. diff --git a/third_party/python/diskcache/diskcache-4.1.0.dist-info/METADATA b/third_party/python/diskcache/diskcache-4.1.0.dist-info/METADATA new file mode 100644 index 0000000000..dff6db382f --- /dev/null +++ b/third_party/python/diskcache/diskcache-4.1.0.dist-info/METADATA @@ -0,0 +1,430 @@ +Metadata-Version: 2.1 +Name: diskcache +Version: 4.1.0 +Summary: Disk Cache -- Disk and file backed persistent cache. +Home-page: http://www.grantjenks.com/docs/diskcache/ +Author: Grant Jenks +Author-email: contact@grantjenks.com +License: Apache 2.0 +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Natural Language :: English +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy + +DiskCache: Disk Backed Cache +============================ + +`DiskCache`_ is an Apache2 licensed disk and file backed cache library, written +in pure-Python, and compatible with Django. + +The cloud-based computing of 2019 puts a premium on memory. Gigabytes of empty +space is left on disks as processes vie for memory. Among these processes is +Memcached (and sometimes Redis) which is used as a cache. Wouldn't it be nice +to leverage empty disk space for caching? + +Django is Python's most popular web framework and ships with several caching +backends. Unfortunately the file-based cache in Django is essentially +broken. The culling method is random and large caches repeatedly scan a cache +directory which slows linearly with growth. Can you really allow it to take +sixty milliseconds to store a key in a cache with a thousand items? + +In Python, we can do better. And we can do it in pure-Python! + +:: + + In [1]: import pylibmc + In [2]: client = pylibmc.Client(['127.0.0.1'], binary=True) + In [3]: client[b'key'] = b'value' + In [4]: %timeit client[b'key'] + + 10000 loops, best of 3: 25.4 µs per loop + + In [5]: import diskcache as dc + In [6]: cache = dc.Cache('tmp') + In [7]: cache[b'key'] = b'value' + In [8]: %timeit cache[b'key'] + + 100000 loops, best of 3: 11.8 µs per loop + +**Note:** Micro-benchmarks have their place but are not a substitute for real +measurements. DiskCache offers cache benchmarks to defend its performance +claims. Micro-optimizations are avoided but your mileage may vary. + +DiskCache efficiently makes gigabytes of storage space available for +caching. By leveraging rock-solid database libraries and memory-mapped files, +cache performance can match and exceed industry-standard solutions. There's no +need for a C compiler or running another process. Performance is a feature and +testing has 100% coverage with unit tests and hours of stress. + +Testimonials +------------ + +`Daren Hasenkamp`_, Founder -- + + "It's a useful, simple API, just like I love about Redis. It has reduced + the amount of queries hitting my Elasticsearch cluster by over 25% for a + website that gets over a million users/day (100+ hits/second)." + +`Mathias Petermann`_, Senior Linux System Engineer -- + + "I implemented it into a wrapper for our Ansible lookup modules and we were + able to speed up some Ansible runs by almost 3 times. DiskCache is saving + us a ton of time." + +Does your company or website use `DiskCache`_? Send us a `message +<contact@grantjenks.com>`_ and let us know. + +.. _`Daren Hasenkamp`: https://www.linkedin.com/in/daren-hasenkamp-93006438/ +.. _`Mathias Petermann`: https://www.linkedin.com/in/mathias-petermann-a8aa273b/ + +Features +-------- + +- Pure-Python +- Fully Documented +- Benchmark comparisons (alternatives, Django cache backends) +- 100% test coverage +- Hours of stress testing +- Performance matters +- Django compatible API +- Thread-safe and process-safe +- Supports multiple eviction policies (LRU and LFU included) +- Keys support "tag" metadata and eviction +- Developed on Python 3.7 +- Tested on CPython 2.7, 3.4, 3.5, 3.6, 3.7 and PyPy +- Tested on Linux, Mac OS X, and Windows +- Tested using Travis CI and AppVeyor CI + +.. image:: https://api.travis-ci.org/grantjenks/python-diskcache.svg?branch=master + :target: http://www.grantjenks.com/docs/diskcache/ + +.. image:: https://ci.appveyor.com/api/projects/status/github/grantjenks/python-diskcache?branch=master&svg=true + :target: http://www.grantjenks.com/docs/diskcache/ + +Quickstart +---------- + +Installing `DiskCache`_ is simple with `pip <http://www.pip-installer.org/>`_:: + + $ pip install diskcache + +You can access documentation in the interpreter with Python's built-in help +function:: + + >>> import diskcache + >>> help(diskcache) + +The core of `DiskCache`_ is three data types intended for caching. `Cache`_ +objects manage a SQLite database and filesystem directory to store key and +value pairs. `FanoutCache`_ provides a sharding layer to utilize multiple +caches and `DjangoCache`_ integrates that with `Django`_:: + + >>> from diskcache import Cache, FanoutCache, DjangoCache + >>> help(Cache) + >>> help(FanoutCache) + >>> help(DjangoCache) + +Built atop the caching data types, are `Deque`_ and `Index`_ which work as a +cross-process, persistent replacements for Python's ``collections.deque`` and +``dict``. These implement the sequence and mapping container base classes:: + + >>> from diskcache import Deque, Index + >>> help(Deque) + >>> help(Index) + +Finally, a number of `recipes`_ for cross-process synchronization are provided +using an underlying cache. Features like memoization with cache stampede +prevention, cross-process locking, and cross-process throttling are available:: + + >>> from diskcache import memoize_stampede, Lock, throttle + >>> help(memoize_stampede) + >>> help(Lock) + >>> help(throttle) + +Python's docstrings are a quick way to get started but not intended as a +replacement for the `DiskCache Tutorial`_ and `DiskCache API Reference`_. + +.. _`Cache`: http://www.grantjenks.com/docs/diskcache/tutorial.html#cache +.. _`FanoutCache`: http://www.grantjenks.com/docs/diskcache/tutorial.html#fanoutcache +.. _`DjangoCache`: http://www.grantjenks.com/docs/diskcache/tutorial.html#djangocache +.. _`Django`: https://www.djangoproject.com/ +.. _`Deque`: http://www.grantjenks.com/docs/diskcache/tutorial.html#deque +.. _`Index`: http://www.grantjenks.com/docs/diskcache/tutorial.html#index +.. _`recipes`: http://www.grantjenks.com/docs/diskcache/tutorial.html#recipes + +User Guide +---------- + +For those wanting more details, this part of the documentation describes +tutorial, benchmarks, API, and development. + +* `DiskCache Tutorial`_ +* `DiskCache Cache Benchmarks`_ +* `DiskCache DjangoCache Benchmarks`_ +* `Case Study: Web Crawler`_ +* `Case Study: Landing Page Caching`_ +* `Talk: All Things Cached - SF Python 2017 Meetup`_ +* `DiskCache API Reference`_ +* `DiskCache Development`_ + +.. _`DiskCache Tutorial`: http://www.grantjenks.com/docs/diskcache/tutorial.html +.. _`DiskCache Cache Benchmarks`: http://www.grantjenks.com/docs/diskcache/cache-benchmarks.html +.. _`DiskCache DjangoCache Benchmarks`: http://www.grantjenks.com/docs/diskcache/djangocache-benchmarks.html +.. _`Talk: All Things Cached - SF Python 2017 Meetup`: http://www.grantjenks.com/docs/diskcache/sf-python-2017-meetup-talk.html +.. _`Case Study: Web Crawler`: http://www.grantjenks.com/docs/diskcache/case-study-web-crawler.html +.. _`Case Study: Landing Page Caching`: http://www.grantjenks.com/docs/diskcache/case-study-landing-page-caching.html +.. _`DiskCache API Reference`: http://www.grantjenks.com/docs/diskcache/api.html +.. _`DiskCache Development`: http://www.grantjenks.com/docs/diskcache/development.html + +Comparisons +----------- + +Comparisons to popular projects related to `DiskCache`_. + +Key-Value Stores +................ + +`DiskCache`_ is mostly a simple key-value store. Feature comparisons with four +other projects are shown in the tables below. + +* `dbm`_ is part of Python's standard library and implements a generic + interface to variants of the DBM database — dbm.gnu or dbm.ndbm. If none of + these modules is installed, the slow-but-simple dbm.dumb is used. +* `shelve`_ is part of Python's standard library and implements a “shelf” as a + persistent, dictionary-like object. The difference with “dbm” databases is + that the values can be anything that the pickle module can handle. +* `sqlitedict`_ is a lightweight wrapper around Python's sqlite3 database with + a simple, Pythonic dict-like interface and support for multi-thread + access. Keys are arbitrary strings, values arbitrary pickle-able objects. +* `pickleDB`_ is a lightweight and simple key-value store. It is built upon + Python's simplejson module and was inspired by Redis. It is licensed with the + BSD three-caluse license. + +.. _`dbm`: https://docs.python.org/3/library/dbm.html +.. _`shelve`: https://docs.python.org/3/library/shelve.html +.. _`sqlitedict`: https://github.com/RaRe-Technologies/sqlitedict +.. _`pickleDB`: https://pythonhosted.org/pickleDB/ + +**Features** + +================ ============= ========= ========= ============ ============ +Feature diskcache dbm shelve sqlitedict pickleDB +================ ============= ========= ========= ============ ============ +Atomic? Always Maybe Maybe Maybe No +Persistent? Yes Yes Yes Yes Yes +Thread-safe? Yes No No Yes No +Process-safe? Yes No No Maybe No +Backend? SQLite DBM DBM SQLite File +Serialization? Customizable None Pickle Customizable JSON +Data Types? Mapping/Deque Mapping Mapping Mapping Mapping +Ordering? Insert/Sorted None None None None +Eviction? LRU/LFU/more None None None None +Vacuum? Automatic Maybe Maybe Manual Automatic +Transactions? Yes No No Maybe No +Multiprocessing? Yes No No No No +Forkable? Yes No No No No +Metadata? Yes No No No No +================ ============= ========= ========= ============ ============ + +**Quality** + +================ ============= ========= ========= ============ ============ +Project diskcache dbm shelve sqlitedict pickleDB +================ ============= ========= ========= ============ ============ +Tests? Yes Yes Yes Yes Yes +Coverage? Yes Yes Yes Yes No +Stress? Yes No No No No +CI Tests? Linux/Windows Yes Yes Linux No +Python? 2/3/PyPy All All 2/3 2/3 +License? Apache2 Python Python Apache2 3-Clause BSD +Docs? Extensive Summary Summary Readme Summary +Benchmarks? Yes No No No No +Sources? GitHub GitHub GitHub GitHub GitHub +Pure-Python? Yes Yes Yes Yes Yes +Server? No No No No No +Integrations? Django None None None None +================ ============= ========= ========= ============ ============ + +**Timings** + +These are rough measurements. See `DiskCache Cache Benchmarks`_ for more +rigorous data. + +================ ============= ========= ========= ============ ============ +Project diskcache dbm shelve sqlitedict pickleDB +================ ============= ========= ========= ============ ============ +get 25 µs 36 µs 41 µs 513 µs 92 µs +set 198 µs 900 µs 928 µs 697 µs 1,020 µs +delete 248 µs 740 µs 702 µs 1,717 µs 1,020 µs +================ ============= ========= ========= ============ ============ + +Caching Libraries +................. + +* `joblib.Memory`_ provides caching functions and works by explicitly saving + the inputs and outputs to files. It is designed to work with non-hashable and + potentially large input and output data types such as numpy arrays. +* `klepto`_ extends Python’s `lru_cache` to utilize different keymaps and + alternate caching algorithms, such as `lfu_cache` and `mru_cache`. Klepto + uses a simple dictionary-sytle interface for all caches and archives. + +.. _`klepto`: https://pypi.org/project/klepto/ +.. _`joblib.Memory`: https://joblib.readthedocs.io/en/latest/memory.html + +Data Structures +............... + +* `dict`_ is a mapping object that maps hashable keys to arbitrary + values. Mappings are mutable objects. There is currently only one standard + Python mapping type, the dictionary. +* `pandas`_ is a Python package providing fast, flexible, and expressive data + structures designed to make working with “relational” or “labeled” data both + easy and intuitive. +* `Sorted Containers`_ is an Apache2 licensed sorted collections library, + written in pure-Python, and fast as C-extensions. Sorted Containers + implements sorted list, sorted dictionary, and sorted set data types. + +.. _`dict`: https://docs.python.org/3/library/stdtypes.html#typesmapping +.. _`pandas`: https://pandas.pydata.org/ +.. _`Sorted Containers`: http://www.grantjenks.com/docs/sortedcontainers/ + +Pure-Python Databases +..................... + +* `ZODB`_ supports an isomorphic interface for database operations which means + there's little impact on your code to make objects persistent and there's no + database mapper that partially hides the datbase. +* `CodernityDB`_ is an open source, pure-Python, multi-platform, schema-less, + NoSQL database and includes an HTTP server version, and a Python client + library that aims to be 100% compatible with the embedded version. +* `TinyDB`_ is a tiny, document oriented database optimized for your + happiness. If you need a simple database with a clean API that just works + without lots of configuration, TinyDB might be the right choice for you. + +.. _`ZODB`: http://www.zodb.org/ +.. _`CodernityDB`: https://pypi.org/project/CodernityDB/ +.. _`TinyDB`: https://tinydb.readthedocs.io/ + +Object Relational Mappings (ORM) +................................ + +* `Django ORM`_ provides models that are the single, definitive source of + information about data and contains the essential fields and behaviors of the + stored data. Generally, each model maps to a single SQL database table. +* `SQLAlchemy`_ is the Python SQL toolkit and Object Relational Mapper that + gives application developers the full power and flexibility of SQL. It + provides a full suite of well known enterprise-level persistence patterns. +* `Peewee`_ is a simple and small ORM. It has few (but expressive) concepts, + making it easy to learn and intuitive to use. Peewee supports Sqlite, MySQL, + and PostgreSQL with tons of extensions. +* `SQLObject`_ is a popular Object Relational Manager for providing an object + interface to your database, with tables as classes, rows as instances, and + columns as attributes. +* `Pony ORM`_ is a Python ORM with beautiful query syntax. Use Python syntax + for interacting with the database. Pony translates such queries into SQL and + executes them in the database in the most efficient way. + +.. _`Django ORM`: https://docs.djangoproject.com/en/dev/topics/db/ +.. _`SQLAlchemy`: https://www.sqlalchemy.org/ +.. _`Peewee`: http://docs.peewee-orm.com/ +.. _`dataset`: https://dataset.readthedocs.io/ +.. _`SQLObject`: http://sqlobject.org/ +.. _`Pony ORM`: https://ponyorm.com/ + +SQL Databases +............. + +* `SQLite`_ is part of Python's standard library and provides a lightweight + disk-based database that doesn’t require a separate server process and allows + accessing the database using a nonstandard variant of the SQL query language. +* `MySQL`_ is one of the world’s most popular open source databases and has + become a leading database choice for web-based applications. MySQL includes a + standardized database driver for Python platforms and development. +* `PostgreSQL`_ is a powerful, open source object-relational database system + with over 30 years of active development. Psycopg is the most popular + PostgreSQL adapter for the Python programming language. +* `Oracle DB`_ is a relational database management system (RDBMS) from the + Oracle Corporation. Originally developed in 1977, Oracle DB is one of the + most trusted and widely used enterprise relational database engines. +* `Microsoft SQL Server`_ is a relational database management system developed + by Microsoft. As a database server, it stores and retrieves data as requested + by other software applications. + +.. _`SQLite`: https://docs.python.org/3/library/sqlite3.html +.. _`MySQL`: https://dev.mysql.com/downloads/connector/python/ +.. _`PostgreSQL`: http://initd.org/psycopg/ +.. _`Oracle DB`: https://pypi.org/project/cx_Oracle/ +.. _`Microsoft SQL Server`: https://pypi.org/project/pyodbc/ + +Other Databases +............... + +* `Memcached`_ is free and open source, high-performance, distributed memory + object caching system, generic in nature, but intended for use in speeding up + dynamic web applications by alleviating database load. +* `Redis`_ is an open source, in-memory data structure store, used as a + database, cache and message broker. It supports data structures such as + strings, hashes, lists, sets, sorted sets with range queries, and more. +* `MongoDB`_ is a cross-platform document-oriented database program. Classified + as a NoSQL database program, MongoDB uses JSON-like documents with + schema. PyMongo is the recommended way to work with MongoDB from Python. +* `LMDB`_ is a lightning-fast, memory-mapped database. With memory-mapped + files, it has the read performance of a pure in-memory database while + retaining the persistence of standard disk-based databases. +* `BerkeleyDB`_ is a software library intended to provide a high-performance + embedded database for key/value data. Berkeley DB is a programmatic toolkit + that provides built-in database support for desktop and server applications. +* `LevelDB`_ is a fast key-value storage library written at Google that + provides an ordered mapping from string keys to string values. Data is stored + sorted by key and users can provide a custom comparison function. + +.. _`Memcached`: https://pypi.org/project/python-memcached/ +.. _`MongoDB`: https://api.mongodb.com/python/current/ +.. _`Redis`: https://redis.io/clients#python +.. _`LMDB`: https://lmdb.readthedocs.io/ +.. _`BerkeleyDB`: https://pypi.org/project/bsddb3/ +.. _`LevelDB`: https://plyvel.readthedocs.io/ + +Reference +--------- + +* `DiskCache Documentation`_ +* `DiskCache at PyPI`_ +* `DiskCache at GitHub`_ +* `DiskCache Issue Tracker`_ + +.. _`DiskCache Documentation`: http://www.grantjenks.com/docs/diskcache/ +.. _`DiskCache at PyPI`: https://pypi.python.org/pypi/diskcache/ +.. _`DiskCache at GitHub`: https://github.com/grantjenks/python-diskcache/ +.. _`DiskCache Issue Tracker`: https://github.com/grantjenks/python-diskcache/issues/ + +License +------- + +Copyright 2016-2019 Grant Jenks + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +.. _`DiskCache`: http://www.grantjenks.com/docs/diskcache/ + + diff --git a/third_party/python/diskcache/diskcache-4.1.0.dist-info/RECORD b/third_party/python/diskcache/diskcache-4.1.0.dist-info/RECORD new file mode 100644 index 0000000000..d4bca0ad45 --- /dev/null +++ b/third_party/python/diskcache/diskcache-4.1.0.dist-info/RECORD @@ -0,0 +1,12 @@ +diskcache/__init__.py,sha256=6RtBwXsSbdiZ-H44I7nLwF2-1VyVMHq1cJ5ynfnQt-E,1234 +diskcache/cli.py,sha256=JzkI2KtJJ0VRfBAq69lTkItoLwg4vOrTezczetSCfaY,40 +diskcache/core.py,sha256=nZRqL-VuJZZw1Ll5ADBzGivpIN_vXTDC8KSOyQ_XvjI,82426 +diskcache/djangocache.py,sha256=vAWT1FdmvHoHas244yoOblc6GhvozgLuFyjASMFPaK0,15488 +diskcache/fanout.py,sha256=Ha5C8BpClAHKEi6cJvJ5HvmAKlNwfiMpjb_az_hIJE0,21271 +diskcache/persistent.py,sha256=hgsS9-LymHsBeuNx0fBPOsiobvpJmGOIxT1T67BQUYw,37450 +diskcache/recipes.py,sha256=VQty-6AVoXcc6hfp1QOFvQZSf8W5AbQBFe1N3QlyILk,13849 +diskcache-4.1.0.dist-info/LICENSE,sha256=KBQYvOJPaViOo1FzqVpqPSGqW0jDZG6KiE8kLKMzNkw,559 +diskcache-4.1.0.dist-info/METADATA,sha256=wWGlNFCEiyWQ6R5zq3m3RFQbxELo6oJyrpryNir-yFo,19886 +diskcache-4.1.0.dist-info/WHEEL,sha256=h_aVn5OB2IERUjMbi2pucmR_zzWJtk303YXvhh60NJ8,110 +diskcache-4.1.0.dist-info/top_level.txt,sha256=A5fqg_AHgOQc_0o1NZ-Uo5Bsb7CV3fR99J-p1-F4yuA,10 +diskcache-4.1.0.dist-info/RECORD,, diff --git a/third_party/python/diskcache/diskcache-4.1.0.dist-info/WHEEL b/third_party/python/diskcache/diskcache-4.1.0.dist-info/WHEEL new file mode 100644 index 0000000000..78e6f69d1d --- /dev/null +++ b/third_party/python/diskcache/diskcache-4.1.0.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.33.4) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/third_party/python/diskcache/diskcache-4.1.0.dist-info/top_level.txt b/third_party/python/diskcache/diskcache-4.1.0.dist-info/top_level.txt new file mode 100644 index 0000000000..91667d46b1 --- /dev/null +++ b/third_party/python/diskcache/diskcache-4.1.0.dist-info/top_level.txt @@ -0,0 +1 @@ +diskcache diff --git a/third_party/python/diskcache/diskcache/__init__.py b/third_party/python/diskcache/diskcache/__init__.py new file mode 100644 index 0000000000..192524e6ce --- /dev/null +++ b/third_party/python/diskcache/diskcache/__init__.py @@ -0,0 +1,51 @@ +""" +DiskCache API Reference +======================= + +The :doc:`tutorial` provides a helpful walkthrough of most methods. + +""" + +from .core import Cache, Disk, EmptyDirWarning, JSONDisk, UnknownFileWarning, Timeout +from .core import DEFAULT_SETTINGS, ENOVAL, EVICTION_POLICY, UNKNOWN +from .fanout import FanoutCache +from .persistent import Deque, Index +from .recipes import Averager, BoundedSemaphore, Lock, RLock +from .recipes import barrier, memoize_stampede, throttle + +__all__ = [ + 'Averager', + 'BoundedSemaphore', + 'Cache', + 'DEFAULT_SETTINGS', + 'Deque', + 'Disk', + 'ENOVAL', + 'EVICTION_POLICY', + 'EmptyDirWarning', + 'FanoutCache', + 'Index', + 'JSONDisk', + 'Lock', + 'RLock', + 'Timeout', + 'UNKNOWN', + 'UnknownFileWarning', + 'barrier', + 'memoize_stampede', + 'throttle', +] + +try: + from .djangocache import DjangoCache # pylint: disable=wrong-import-position + __all__.append('DjangoCache') +except Exception: # pylint: disable=broad-except + # Django not installed or not setup so ignore. + pass + +__title__ = 'diskcache' +__version__ = '4.1.0' +__build__ = 0x040100 +__author__ = 'Grant Jenks' +__license__ = 'Apache 2.0' +__copyright__ = 'Copyright 2016-2018 Grant Jenks' diff --git a/third_party/python/diskcache/diskcache/cli.py b/third_party/python/diskcache/diskcache/cli.py new file mode 100644 index 0000000000..44bffebfcc --- /dev/null +++ b/third_party/python/diskcache/diskcache/cli.py @@ -0,0 +1 @@ +"Command line interface to disk cache." diff --git a/third_party/python/diskcache/diskcache/core.py b/third_party/python/diskcache/diskcache/core.py new file mode 100644 index 0000000000..0c8fd2c745 --- /dev/null +++ b/third_party/python/diskcache/diskcache/core.py @@ -0,0 +1,2481 @@ +"""Core disk and file backed cache API. + +""" + +import codecs +import contextlib as cl +import errno +import functools as ft +import io +import json +import os +import os.path as op +import pickletools +import sqlite3 +import struct +import sys +import tempfile +import threading +import time +import warnings +import zlib + +############################################################################ +# BEGIN Python 2/3 Shims +############################################################################ + +if sys.hexversion < 0x03000000: + import cPickle as pickle # pylint: disable=import-error + # ISSUE #25 Fix for http://bugs.python.org/issue10211 + from cStringIO import StringIO as BytesIO # pylint: disable=import-error + from thread import get_ident # pylint: disable=import-error,no-name-in-module + TextType = unicode # pylint: disable=invalid-name,undefined-variable + BytesType = str + INT_TYPES = int, long # pylint: disable=undefined-variable + range = xrange # pylint: disable=redefined-builtin,invalid-name,undefined-variable + io_open = io.open # pylint: disable=invalid-name +else: + import pickle + from io import BytesIO # pylint: disable=ungrouped-imports + from threading import get_ident + TextType = str + BytesType = bytes + INT_TYPES = (int,) + io_open = open # pylint: disable=invalid-name + +def full_name(func): + "Return full name of `func` by adding the module and function name." + try: + # The __qualname__ attribute is only available in Python 3.3 and later. + # GrantJ 2019-03-29 Remove after support for Python 2 is dropped. + name = func.__qualname__ + except AttributeError: + name = func.__name__ + return func.__module__ + '.' + name + +############################################################################ +# END Python 2/3 Shims +############################################################################ + +try: + WindowsError +except NameError: + class WindowsError(Exception): + "Windows error place-holder on platforms without support." + +class Constant(tuple): + "Pretty display of immutable constant." + def __new__(cls, name): + return tuple.__new__(cls, (name,)) + + def __repr__(self): + return '%s' % self[0] + +DBNAME = 'cache.db' +ENOVAL = Constant('ENOVAL') +UNKNOWN = Constant('UNKNOWN') + +MODE_NONE = 0 +MODE_RAW = 1 +MODE_BINARY = 2 +MODE_TEXT = 3 +MODE_PICKLE = 4 + +DEFAULT_SETTINGS = { + u'statistics': 0, # False + u'tag_index': 0, # False + u'eviction_policy': u'least-recently-stored', + u'size_limit': 2 ** 30, # 1gb + u'cull_limit': 10, + u'sqlite_auto_vacuum': 1, # FULL + u'sqlite_cache_size': 2 ** 13, # 8,192 pages + u'sqlite_journal_mode': u'wal', + u'sqlite_mmap_size': 2 ** 26, # 64mb + u'sqlite_synchronous': 1, # NORMAL + u'disk_min_file_size': 2 ** 15, # 32kb + u'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, +} + +METADATA = { + u'count': 0, + u'size': 0, + u'hits': 0, + u'misses': 0, +} + +EVICTION_POLICY = { + 'none': { + 'init': None, + 'get': None, + 'cull': None, + }, + 'least-recently-stored': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_store_time ON' + ' Cache (store_time)' + ), + 'get': None, + 'cull': 'SELECT {fields} FROM Cache ORDER BY store_time LIMIT ?', + }, + 'least-recently-used': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_access_time ON' + ' Cache (access_time)' + ), + 'get': 'access_time = {now}', + 'cull': 'SELECT {fields} FROM Cache ORDER BY access_time LIMIT ?', + }, + 'least-frequently-used': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_access_count ON' + ' Cache (access_count)' + ), + 'get': 'access_count = access_count + 1', + 'cull': 'SELECT {fields} FROM Cache ORDER BY access_count LIMIT ?', + }, +} + + +class Disk(object): + "Cache key and value serialization for SQLite database and files." + def __init__(self, directory, min_file_size=0, pickle_protocol=0): + """Initialize disk instance. + + :param str directory: directory path + :param int min_file_size: minimum size for file use + :param int pickle_protocol: pickle protocol for serialization + + """ + self._directory = directory + self.min_file_size = min_file_size + self.pickle_protocol = pickle_protocol + + + def hash(self, key): + """Compute portable hash for `key`. + + :param key: key to hash + :return: hash value + + """ + mask = 0xFFFFFFFF + disk_key, _ = self.put(key) + type_disk_key = type(disk_key) + + if type_disk_key is sqlite3.Binary: + return zlib.adler32(disk_key) & mask + elif type_disk_key is TextType: + return zlib.adler32(disk_key.encode('utf-8')) & mask # pylint: disable=no-member + elif type_disk_key in INT_TYPES: + return disk_key % mask + else: + assert type_disk_key is float + return zlib.adler32(struct.pack('!d', disk_key)) & mask + + + def put(self, key): + """Convert `key` to fields key and raw for Cache table. + + :param key: key to convert + :return: (database key, raw boolean) pair + + """ + # pylint: disable=bad-continuation,unidiomatic-typecheck + type_key = type(key) + + if type_key is BytesType: + return sqlite3.Binary(key), True + elif ((type_key is TextType) + or (type_key in INT_TYPES + and -9223372036854775808 <= key <= 9223372036854775807) + or (type_key is float)): + return key, True + else: + data = pickle.dumps(key, protocol=self.pickle_protocol) + result = pickletools.optimize(data) + return sqlite3.Binary(result), False + + + def get(self, key, raw): + """Convert fields `key` and `raw` from Cache table to key. + + :param key: database key to convert + :param bool raw: flag indicating raw database storage + :return: corresponding Python key + + """ + # pylint: disable=no-self-use,unidiomatic-typecheck + if raw: + return BytesType(key) if type(key) is sqlite3.Binary else key + else: + return pickle.load(BytesIO(key)) + + + def store(self, value, read, key=UNKNOWN): + """Convert `value` to fields size, mode, filename, and value for Cache + table. + + :param value: value to convert + :param bool read: True when value is file-like object + :param key: key for item (default UNKNOWN) + :return: (size, mode, filename, value) tuple for Cache table + + """ + # pylint: disable=unidiomatic-typecheck + type_value = type(value) + min_file_size = self.min_file_size + + if ((type_value is TextType and len(value) < min_file_size) + or (type_value in INT_TYPES + and -9223372036854775808 <= value <= 9223372036854775807) + or (type_value is float)): + return 0, MODE_RAW, None, value + elif type_value is BytesType: + if len(value) < min_file_size: + return 0, MODE_RAW, None, sqlite3.Binary(value) + else: + filename, full_path = self.filename(key, value) + + with open(full_path, 'wb') as writer: + writer.write(value) + + return len(value), MODE_BINARY, filename, None + elif type_value is TextType: + filename, full_path = self.filename(key, value) + + with io_open(full_path, 'w', encoding='UTF-8') as writer: + writer.write(value) + + size = op.getsize(full_path) + return size, MODE_TEXT, filename, None + elif read: + size = 0 + reader = ft.partial(value.read, 2 ** 22) + filename, full_path = self.filename(key, value) + + with open(full_path, 'wb') as writer: + for chunk in iter(reader, b''): + size += len(chunk) + writer.write(chunk) + + return size, MODE_BINARY, filename, None + else: + result = pickle.dumps(value, protocol=self.pickle_protocol) + + if len(result) < min_file_size: + return 0, MODE_PICKLE, None, sqlite3.Binary(result) + else: + filename, full_path = self.filename(key, value) + + with open(full_path, 'wb') as writer: + writer.write(result) + + return len(result), MODE_PICKLE, filename, None + + + def fetch(self, mode, filename, value, read): + """Convert fields `mode`, `filename`, and `value` from Cache table to + value. + + :param int mode: value mode raw, binary, text, or pickle + :param str filename: filename of corresponding value + :param value: database value + :param bool read: when True, return an open file handle + :return: corresponding Python value + + """ + # pylint: disable=no-self-use,unidiomatic-typecheck + if mode == MODE_RAW: + return BytesType(value) if type(value) is sqlite3.Binary else value + elif mode == MODE_BINARY: + if read: + return open(op.join(self._directory, filename), 'rb') + else: + with open(op.join(self._directory, filename), 'rb') as reader: + return reader.read() + elif mode == MODE_TEXT: + full_path = op.join(self._directory, filename) + with io_open(full_path, 'r', encoding='UTF-8') as reader: + return reader.read() + elif mode == MODE_PICKLE: + if value is None: + with open(op.join(self._directory, filename), 'rb') as reader: + return pickle.load(reader) + else: + return pickle.load(BytesIO(value)) + + + def filename(self, key=UNKNOWN, value=UNKNOWN): + """Return filename and full-path tuple for file storage. + + Filename will be a randomly generated 28 character hexadecimal string + with ".val" suffixed. Two levels of sub-directories will be used to + reduce the size of directories. On older filesystems, lookups in + directories with many files may be slow. + + The default implementation ignores the `key` and `value` parameters. + + In some scenarios, for example :meth:`Cache.push + <diskcache.Cache.push>`, the `key` or `value` may not be known when the + item is stored in the cache. + + :param key: key for item (default UNKNOWN) + :param value: value for item (default UNKNOWN) + + """ + # pylint: disable=unused-argument + hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8') + sub_dir = op.join(hex_name[:2], hex_name[2:4]) + name = hex_name[4:] + '.val' + directory = op.join(self._directory, sub_dir) + + try: + os.makedirs(directory) + except OSError as error: + if error.errno != errno.EEXIST: + raise + + filename = op.join(sub_dir, name) + full_path = op.join(self._directory, filename) + return filename, full_path + + + def remove(self, filename): + """Remove a file given by `filename`. + + This method is cross-thread and cross-process safe. If an "error no + entry" occurs, it is suppressed. + + :param str filename: relative path to file + + """ + full_path = op.join(self._directory, filename) + + try: + os.remove(full_path) + except WindowsError: + pass + except OSError as error: + if error.errno != errno.ENOENT: + # ENOENT may occur if two caches attempt to delete the same + # file at the same time. + raise + + +class JSONDisk(Disk): + "Cache key and value using JSON serialization with zlib compression." + def __init__(self, directory, compress_level=1, **kwargs): + """Initialize JSON disk instance. + + Keys and values are compressed using the zlib library. The + `compress_level` is an integer from 0 to 9 controlling the level of + compression; 1 is fastest and produces the least compression, 9 is + slowest and produces the most compression, and 0 is no compression. + + :param str directory: directory path + :param int compress_level: zlib compression level (default 1) + :param kwargs: super class arguments + + """ + self.compress_level = compress_level + super(JSONDisk, self).__init__(directory, **kwargs) + + + def put(self, key): + json_bytes = json.dumps(key).encode('utf-8') + data = zlib.compress(json_bytes, self.compress_level) + return super(JSONDisk, self).put(data) + + + def get(self, key, raw): + data = super(JSONDisk, self).get(key, raw) + return json.loads(zlib.decompress(data).decode('utf-8')) + + + def store(self, value, read, key=UNKNOWN): + if not read: + json_bytes = json.dumps(value).encode('utf-8') + value = zlib.compress(json_bytes, self.compress_level) + return super(JSONDisk, self).store(value, read, key=key) + + + def fetch(self, mode, filename, value, read): + data = super(JSONDisk, self).fetch(mode, filename, value, read) + if not read: + data = json.loads(zlib.decompress(data).decode('utf-8')) + return data + + +class Timeout(Exception): + "Database timeout expired." + + +class UnknownFileWarning(UserWarning): + "Warning used by Cache.check for unknown files." + + +class EmptyDirWarning(UserWarning): + "Warning used by Cache.check for empty directories." + + +def args_to_key(base, args, kwargs, typed): + """Create cache key out of function arguments. + + :param tuple base: base of key + :param tuple args: function arguments + :param dict kwargs: function keyword arguments + :param bool typed: include types in cache key + :return: cache key tuple + + """ + key = base + args + + if kwargs: + key += (ENOVAL,) + sorted_items = sorted(kwargs.items()) + + for item in sorted_items: + key += item + + if typed: + key += tuple(type(arg) for arg in args) + + if kwargs: + key += tuple(type(value) for _, value in sorted_items) + + return key + + +class Cache(object): + "Disk and file backed cache." + # pylint: disable=bad-continuation + def __init__(self, directory=None, timeout=60, disk=Disk, **settings): + """Initialize cache instance. + + :param str directory: cache directory + :param float timeout: SQLite connection timeout + :param disk: Disk type or subclass for serialization + :param settings: any of DEFAULT_SETTINGS + + """ + try: + assert issubclass(disk, Disk) + except (TypeError, AssertionError): + raise ValueError('disk must subclass diskcache.Disk') + + if directory is None: + directory = tempfile.mkdtemp(prefix='diskcache-') + directory = op.expanduser(directory) + directory = op.expandvars(directory) + + self._directory = directory + self._timeout = 0 # Manually handle retries during initialization. + self._local = threading.local() + self._txn_id = None + + if not op.isdir(directory): + try: + os.makedirs(directory, 0o755) + except OSError as error: + if error.errno != errno.EEXIST: + raise EnvironmentError( + error.errno, + 'Cache directory "%s" does not exist' + ' and could not be created' % self._directory + ) + + sql = self._sql_retry + + # Setup Settings table. + + try: + current_settings = dict(sql( + 'SELECT key, value FROM Settings' + ).fetchall()) + except sqlite3.OperationalError: + current_settings = {} + + sets = DEFAULT_SETTINGS.copy() + sets.update(current_settings) + sets.update(settings) + + for key in METADATA: + sets.pop(key, None) + + # Chance to set pragmas before any tables are created. + + for key, value in sorted(sets.items()): + if key.startswith('sqlite_'): + self.reset(key, value, update=False) + + sql('CREATE TABLE IF NOT EXISTS Settings (' + ' key TEXT NOT NULL UNIQUE,' + ' value)' + ) + + # Setup Disk object (must happen after settings initialized). + + kwargs = { + key[5:]: value for key, value in sets.items() + if key.startswith('disk_') + } + self._disk = disk(directory, **kwargs) + + # Set cached attributes: updates settings and sets pragmas. + + for key, value in sets.items(): + query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' + sql(query, (key, value)) + self.reset(key, value) + + for key, value in METADATA.items(): + query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' + sql(query, (key, value)) + self.reset(key) + + (self._page_size,), = sql('PRAGMA page_size').fetchall() + + # Setup Cache table. + + sql('CREATE TABLE IF NOT EXISTS Cache (' + ' rowid INTEGER PRIMARY KEY,' + ' key BLOB,' + ' raw INTEGER,' + ' store_time REAL,' + ' expire_time REAL,' + ' access_time REAL,' + ' access_count INTEGER DEFAULT 0,' + ' tag BLOB,' + ' size INTEGER DEFAULT 0,' + ' mode INTEGER DEFAULT 0,' + ' filename TEXT,' + ' value BLOB)' + ) + + sql('CREATE UNIQUE INDEX IF NOT EXISTS Cache_key_raw ON' + ' Cache(key, raw)' + ) + + sql('CREATE INDEX IF NOT EXISTS Cache_expire_time ON' + ' Cache (expire_time)' + ) + + query = EVICTION_POLICY[self.eviction_policy]['init'] + + if query is not None: + sql(query) + + # Use triggers to keep Metadata updated. + + sql('CREATE TRIGGER IF NOT EXISTS Settings_count_insert' + ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value + 1' + ' WHERE key = "count"; END' + ) + + sql('CREATE TRIGGER IF NOT EXISTS Settings_count_delete' + ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value - 1' + ' WHERE key = "count"; END' + ) + + sql('CREATE TRIGGER IF NOT EXISTS Settings_size_insert' + ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value + NEW.size' + ' WHERE key = "size"; END' + ) + + sql('CREATE TRIGGER IF NOT EXISTS Settings_size_update' + ' AFTER UPDATE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings' + ' SET value = value + NEW.size - OLD.size' + ' WHERE key = "size"; END' + ) + + sql('CREATE TRIGGER IF NOT EXISTS Settings_size_delete' + ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value - OLD.size' + ' WHERE key = "size"; END' + ) + + # Create tag index if requested. + + if self.tag_index: # pylint: disable=no-member + self.create_tag_index() + else: + self.drop_tag_index() + + # Close and re-open database connection with given timeout. + + self.close() + self._timeout = timeout + self._sql # pylint: disable=pointless-statement + + + @property + def directory(self): + """Cache directory.""" + return self._directory + + + @property + def timeout(self): + """SQLite connection timeout value in seconds.""" + return self._timeout + + + @property + def disk(self): + """Disk used for serialization.""" + return self._disk + + + @property + def _con(self): + # Check process ID to support process forking. If the process + # ID changes, close the connection and update the process ID. + + local_pid = getattr(self._local, 'pid', None) + pid = os.getpid() + + if local_pid != pid: + self.close() + self._local.pid = pid + + con = getattr(self._local, 'con', None) + + if con is None: + con = self._local.con = sqlite3.connect( + op.join(self._directory, DBNAME), + timeout=self._timeout, + isolation_level=None, + ) + + # Some SQLite pragmas work on a per-connection basis so + # query the Settings table and reset the pragmas. The + # Settings table may not exist so catch and ignore the + # OperationalError that may occur. + + try: + select = 'SELECT key, value FROM Settings' + settings = con.execute(select).fetchall() + except sqlite3.OperationalError: + pass + else: + for key, value in settings: + if key.startswith('sqlite_'): + self.reset(key, value, update=False) + + return con + + + @property + def _sql(self): + return self._con.execute + + + @property + def _sql_retry(self): + sql = self._sql + + # 2018-11-01 GrantJ - Some SQLite builds/versions handle + # the SQLITE_BUSY return value and connection parameter + # "timeout" differently. For a more reliable duration, + # manually retry the statement for 60 seconds. Only used + # by statements which modify the database and do not use + # a transaction (like those in ``__init__`` or ``reset``). + # See Issue #85 for and tests/issue_85.py for more details. + + def _execute_with_retry(statement, *args, **kwargs): + start = time.time() + while True: + try: + return sql(statement, *args, **kwargs) + except sqlite3.OperationalError as exc: + if str(exc) != 'database is locked': + raise + diff = time.time() - start + if diff > 60: + raise + time.sleep(0.001) + + return _execute_with_retry + + + @cl.contextmanager + def transact(self, retry=False): + """Context manager to perform a transaction by locking the cache. + + While the cache is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + + Transactions may be nested and may not be shared between threads. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + >>> cache = Cache() + >>> with cache.transact(): # Atomically increment two keys. + ... _ = cache.incr('total', 123.4) + ... _ = cache.incr('count', 1) + >>> with cache.transact(): # Atomically calculate average. + ... average = cache['total'] / cache['count'] + >>> average + 123.4 + + :param bool retry: retry if database timeout occurs (default False) + :return: context manager for use in `with` statement + :raises Timeout: if database timeout occurs + + """ + with self._transact(retry=retry): + yield + + + @cl.contextmanager + def _transact(self, retry=False, filename=None): + sql = self._sql + filenames = [] + _disk_remove = self._disk.remove + tid = get_ident() + txn_id = self._txn_id + + if tid == txn_id: + begin = False + else: + while True: + try: + sql('BEGIN IMMEDIATE') + begin = True + self._txn_id = tid + break + except sqlite3.OperationalError: + if retry: + continue + if filename is not None: + _disk_remove(filename) + raise Timeout + + try: + yield sql, filenames.append + except BaseException: + if begin: + assert self._txn_id == tid + self._txn_id = None + sql('ROLLBACK') + raise + else: + if begin: + assert self._txn_id == tid + self._txn_id = None + sql('COMMIT') + for name in filenames: + if name is not None: + _disk_remove(name) + + + def set(self, key, value, expire=None, read=False, tag=None, retry=False): + """Set `key` and `value` item in cache. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param value: value for item + :param float expire: seconds until item expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was set + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read, key=key) + columns = (expire_time, tag, size, mode, filename, db_value) + + # The order of SELECT, UPDATE, and INSERT is important below. + # + # Typical cache usage pattern is: + # + # value = cache.get(key) + # if value is None: + # value = expensive_calculation() + # cache.set(key, value) + # + # Cache.get does not evict expired keys to avoid writes during lookups. + # Commonly used/expired keys will therefore remain in the cache making + # an UPDATE the preferred path. + # + # The alternative is to assume the key is not present by first trying + # to INSERT and then handling the IntegrityError that occurs from + # violating the UNIQUE constraint. This optimistic approach was + # rejected based on the common cache usage pattern. + # + # INSERT OR REPLACE aka UPSERT is not used because the old filename may + # need cleanup. + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + (rowid, old_filename), = rows + cleanup(old_filename) + self._row_update(rowid, now, columns) + else: + self._row_insert(db_key, raw, now, columns) + + self._cull(now, sql, cleanup) + + return True + + + def __setitem__(self, key, value): + """Set corresponding `value` for `key` in cache. + + :param key: key for item + :param value: value for item + :return: corresponding value + :raises KeyError: if key is not found + + """ + self.set(key, value, retry=True) + + + def _row_update(self, rowid, now, columns): + sql = self._sql + expire_time, tag, size, mode, filename, value = columns + sql('UPDATE Cache SET' + ' store_time = ?,' + ' expire_time = ?,' + ' access_time = ?,' + ' access_count = ?,' + ' tag = ?,' + ' size = ?,' + ' mode = ?,' + ' filename = ?,' + ' value = ?' + ' WHERE rowid = ?', ( + now, # store_time + expire_time, + now, # access_time + 0, # access_count + tag, + size, + mode, + filename, + value, + rowid, + ), + ) + + + def _row_insert(self, key, raw, now, columns): + sql = self._sql + expire_time, tag, size, mode, filename, value = columns + sql('INSERT INTO Cache(' + ' key, raw, store_time, expire_time, access_time,' + ' access_count, tag, size, mode, filename, value' + ') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', ( + key, + raw, + now, # store_time + expire_time, + now, # access_time + 0, # access_count + tag, + size, + mode, + filename, + value, + ), + ) + + + def _cull(self, now, sql, cleanup, limit=None): + cull_limit = self.cull_limit if limit is None else limit + + if cull_limit == 0: + return + + # Evict expired keys. + + select_expired_template = ( + 'SELECT %s FROM Cache' + ' WHERE expire_time IS NOT NULL AND expire_time < ?' + ' ORDER BY expire_time LIMIT ?' + ) + + select_expired = select_expired_template % 'filename' + rows = sql(select_expired, (now, cull_limit)).fetchall() + + if rows: + delete_expired = ( + 'DELETE FROM Cache WHERE rowid IN (%s)' + % (select_expired_template % 'rowid') + ) + sql(delete_expired, (now, cull_limit)) + + for filename, in rows: + cleanup(filename) + + cull_limit -= len(rows) + + if cull_limit == 0: + return + + # Evict keys by policy. + + select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] + + if select_policy is None or self.volume() < self.size_limit: + return + + select_filename = select_policy.format(fields='filename', now=now) + rows = sql(select_filename, (cull_limit,)).fetchall() + + if rows: + delete = ( + 'DELETE FROM Cache WHERE rowid IN (%s)' + % (select_policy.format(fields='rowid', now=now)) + ) + sql(delete, (cull_limit,)) + + for filename, in rows: + cleanup(filename) + + + def touch(self, key, expire=None, retry=False): + """Touch `key` in cache and update `expire` time. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param float expire: seconds until item expires + (default None, no expiry) + :param bool retry: retry if database timeout occurs (default False) + :return: True if key was touched + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + + with self._transact(retry) as (sql, _): + rows = sql( + 'SELECT rowid, expire_time FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + (rowid, old_expire_time), = rows + + if old_expire_time is None or old_expire_time > now: + sql('UPDATE Cache SET expire_time = ? WHERE rowid = ?', + (expire_time, rowid), + ) + return True + + return False + + + def add(self, key, value, expire=None, read=False, tag=None, retry=False): + """Add `key` and `value` item to cache. + + Similar to `set`, but only add to cache if key not present. + + Operation is atomic. Only one concurrent add operation for a given key + will succeed. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param value: value for item + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was added + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read, key=key) + columns = (expire_time, tag, size, mode, filename, db_value) + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename, expire_time FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + (rowid, old_filename, old_expire_time), = rows + + if old_expire_time is None or old_expire_time > now: + cleanup(filename) + return False + + cleanup(old_filename) + self._row_update(rowid, now, columns) + else: + self._row_insert(db_key, raw, now, columns) + + self._cull(now, sql, cleanup) + + return True + + + def incr(self, key, delta=1, default=0, retry=False): + """Increment value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent increment operations will be + counted individually. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param int delta: amount to increment (default 1) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) + :return: new value for item + :raises KeyError: if key is not found and default is None + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid, expire_time, filename, value FROM Cache' + ' WHERE key = ? AND raw = ?' + ) + + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (db_key, raw)).fetchall() + + if not rows: + if default is None: + raise KeyError(key) + + value = default + delta + columns = (None, None) + self._disk.store(value, False, key=key) + self._row_insert(db_key, raw, now, columns) + self._cull(now, sql, cleanup) + return value + + (rowid, expire_time, filename, value), = rows + + if expire_time is not None and expire_time < now: + if default is None: + raise KeyError(key) + + value = default + delta + columns = (None, None) + self._disk.store(value, False, key=key) + self._row_update(rowid, now, columns) + self._cull(now, sql, cleanup) + cleanup(filename) + return value + + value += delta + + columns = 'store_time = ?, value = ?' + update_column = EVICTION_POLICY[self.eviction_policy]['get'] + + if update_column is not None: + columns += ', ' + update_column.format(now=now) + + update = 'UPDATE Cache SET %s WHERE rowid = ?' % columns + sql(update, (now, value, rowid)) + + return value + + + def decr(self, key, delta=1, default=0, retry=False): + """Decrement value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent decrement operations will be + counted individually. + + Unlike Memcached, negative values are supported. Value may be + decremented below zero. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param int delta: amount to decrement (default 1) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) + :return: new value for item + :raises KeyError: if key is not found and default is None + :raises Timeout: if database timeout occurs + + """ + return self.incr(key, -delta, default, retry) + + + def get(self, key, default=None, read=False, expire_time=False, tag=False, + retry=False): + """Retrieve value from cache. If `key` is missing, return `default`. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param default: value to return if key is missing (default None) + :param bool read: if True, return file handle to value + (default False) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item or default if key not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + update_column = EVICTION_POLICY[self.eviction_policy]['get'] + select = ( + 'SELECT rowid, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + if expire_time and tag: + default = (default, None, None) + elif expire_time or tag: + default = (default, None) + + if not self.statistics and update_column is None: + # Fast path, no transaction necessary. + + rows = self._sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + return default + + (rowid, db_expire_time, db_tag, mode, filename, db_value), = rows + + try: + value = self._disk.fetch(mode, filename, db_value, read) + except IOError: + # Key was deleted before we could retrieve result. + return default + + else: # Slow path, transaction required. + cache_hit = ( + 'UPDATE Settings SET value = value + 1 WHERE key = "hits"' + ) + cache_miss = ( + 'UPDATE Settings SET value = value + 1 WHERE key = "misses"' + ) + + with self._transact(retry) as (sql, _): + rows = sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + if self.statistics: + sql(cache_miss) + return default + + (rowid, db_expire_time, db_tag, + mode, filename, db_value), = rows + + try: + value = self._disk.fetch(mode, filename, db_value, read) + except IOError as error: + if error.errno == errno.ENOENT: + # Key was deleted before we could retrieve result. + if self.statistics: + sql(cache_miss) + return default + else: + raise + + if self.statistics: + sql(cache_hit) + + now = time.time() + update = 'UPDATE Cache SET %s WHERE rowid = ?' + + if update_column is not None: + sql(update % update_column.format(now=now), (rowid,)) + + if expire_time and tag: + return (value, db_expire_time, db_tag) + elif expire_time: + return (value, db_expire_time) + elif tag: + return (value, db_tag) + else: + return value + + + def __getitem__(self, key): + """Return corresponding value for `key` from cache. + + :param key: key matching item + :return: corresponding value + :raises KeyError: if key is not found + + """ + value = self.get(key, default=ENOVAL, retry=True) + if value is ENOVAL: + raise KeyError(key) + return value + + + def read(self, key, retry=False): + """Return file handle value corresponding to `key` from cache. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default False) + :return: file open for reading in binary mode + :raises KeyError: if key is not found + :raises Timeout: if database timeout occurs + + """ + handle = self.get(key, default=ENOVAL, read=True, retry=retry) + if handle is ENOVAL: + raise KeyError(key) + return handle + + + def __contains__(self, key): + """Return `True` if `key` matching item is found in cache. + + :param key: key matching item + :return: True if key matching item + + """ + sql = self._sql + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid FROM Cache' + ' WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + rows = sql(select, (db_key, raw, time.time())).fetchall() + + return bool(rows) + + + def pop(self, key, default=None, expire_time=False, tag=False, retry=False): + """Remove corresponding item for `key` from cache and return value. + + If `key` is missing, return `default`. + + Operation is atomic. Concurrent operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param default: value to return if key is missing (default None) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item or default if key not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + with self._transact(retry) as (sql, _): + rows = sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + return default + + (rowid, db_expire_time, db_tag, mode, filename, db_value), = rows + + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + try: + value = self._disk.fetch(mode, filename, db_value, False) + except IOError as error: + if error.errno == errno.ENOENT: + # Key was deleted before we could retrieve result. + return default + else: + raise + finally: + if filename is not None: + self._disk.remove(filename) + + if expire_time and tag: + return value, db_expire_time, db_tag + elif expire_time: + return value, db_expire_time + elif tag: + return value, db_tag + else: + return value + + + def __delitem__(self, key, retry=True): + """Delete corresponding item for `key` from cache. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default `True`). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default True) + :raises KeyError: if key is not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + + with self._transact(retry) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename FROM Cache' + ' WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)', + (db_key, raw, time.time()), + ).fetchall() + + if not rows: + raise KeyError(key) + + (rowid, filename), = rows + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(filename) + + return True + + + def delete(self, key, retry=False): + """Delete corresponding item for `key` from cache. + + Missing keys are ignored. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was deleted + :raises Timeout: if database timeout occurs + + """ + try: + return self.__delitem__(key, retry=retry) + except KeyError: + return False + + + def push(self, value, prefix=None, side='back', expire=None, read=False, + tag=None, retry=False): + """Push `value` onto `side` of queue identified by `prefix` in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + Defaults to pushing value on back of queue. Set side to 'front' to push + value on front of queue. Side must be one of 'back' or 'front'. + + Operation is atomic. Concurrent operations will be serialized. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.pull`. + + >>> cache = Cache() + >>> print(cache.push('first value')) + 500000000000000 + >>> cache.get(500000000000000) + 'first value' + >>> print(cache.push('second value')) + 500000000000001 + >>> print(cache.push('third value', side='front')) + 499999999999999 + >>> cache.push(1234, prefix='userids') + 'userids-500000000000000' + + :param value: value for item + :param str prefix: key prefix (default None, key is integer) + :param str side: either 'back' or 'front' (default 'back') + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: key for item in cache + :raises Timeout: if database timeout occurs + + """ + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + now = time.time() + raw = True + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read) + columns = (expire_time, tag, size, mode, filename, db_value) + order = {'back': 'DESC', 'front': 'ASC'} + select = ( + 'SELECT key FROM Cache' + ' WHERE ? < key AND key < ? AND raw = ?' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql(select, (min_key, max_key, raw)).fetchall() + + if rows: + (key,), = rows + + if prefix is not None: + num = int(key[(key.rfind('-') + 1):]) + else: + num = key + + if side == 'back': + num += 1 + else: + assert side == 'front' + num -= 1 + else: + num = 500000000000000 + + if prefix is not None: + db_key = '{0}-{1:015d}'.format(prefix, num) + else: + db_key = num + + self._row_insert(db_key, raw, now, columns) + self._cull(now, sql, cleanup) + + return db_key + + + def pull(self, prefix=None, default=(None, None), side='front', + expire_time=False, tag=False, retry=False): + """Pull key and value item pair from `side` of queue in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + If queue is empty, return default. + + Defaults to pulling key and value item pairs from front of queue. Set + side to 'back' to pull from back of queue. Side must be one of 'front' + or 'back'. + + Operation is atomic. Concurrent operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.push` and `Cache.get`. + + >>> cache = Cache() + >>> cache.pull() + (None, None) + >>> for letter in 'abc': + ... print(cache.push(letter)) + 500000000000000 + 500000000000001 + 500000000000002 + >>> key, value = cache.pull() + >>> print(key) + 500000000000000 + >>> value + 'a' + >>> _, value = cache.pull(side='back') + >>> value + 'c' + >>> cache.push(1234, 'userids') + 'userids-500000000000000' + >>> _, value = cache.pull('userids') + >>> value + 1234 + + :param str prefix: key prefix (default None, key is integer) + :param default: value to return if key is missing + (default (None, None)) + :param str side: either 'front' or 'back' (default 'front') + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair or default if queue is empty + :raises Timeout: if database timeout occurs + + """ + # Caution: Nearly identical code exists in Cache.peek + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + order = {'front': 'ASC', 'back': 'DESC'} + select = ( + 'SELECT rowid, key, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (min_key, max_key)).fetchall() + + if not rows: + return default + + (rowid, key, db_expire, db_tag, mode, name, + db_value), = rows + + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + if db_expire is not None and db_expire < time.time(): + cleanup(name) + else: + break + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError as error: + if error.errno == errno.ENOENT: + # Key was deleted before we could retrieve result. + continue + else: + raise + finally: + if name is not None: + self._disk.remove(name) + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + + def peek(self, prefix=None, default=(None, None), side='front', + expire_time=False, tag=False, retry=False): + """Peek at key and value item pair from `side` of queue in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + If queue is empty, return default. + + Defaults to peeking at key and value item pairs from front of queue. + Set side to 'back' to pull from back of queue. Side must be one of + 'front' or 'back'. + + Expired items are deleted from cache. Operation is atomic. Concurrent + operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.pull` and `Cache.push`. + + >>> cache = Cache() + >>> for letter in 'abc': + ... print(cache.push(letter)) + 500000000000000 + 500000000000001 + 500000000000002 + >>> key, value = cache.peek() + >>> print(key) + 500000000000000 + >>> value + 'a' + >>> key, value = cache.peek(side='back') + >>> print(key) + 500000000000002 + >>> value + 'c' + + :param str prefix: key prefix (default None, key is integer) + :param default: value to return if key is missing + (default (None, None)) + :param str side: either 'front' or 'back' (default 'front') + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair or default if queue is empty + :raises Timeout: if database timeout occurs + + """ + # Caution: Nearly identical code exists in Cache.pull + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + order = {'front': 'ASC', 'back': 'DESC'} + select = ( + 'SELECT rowid, key, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (min_key, max_key)).fetchall() + + if not rows: + return default + + (rowid, key, db_expire, db_tag, mode, name, + db_value), = rows + + if db_expire is not None and db_expire < time.time(): + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(name) + else: + break + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError as error: + if error.errno == errno.ENOENT: + # Key was deleted before we could retrieve result. + continue + else: + raise + finally: + if name is not None: + self._disk.remove(name) + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + + def peekitem(self, last=True, expire_time=False, tag=False, retry=False): + """Peek at key and value item pair in cache based on iteration order. + + Expired items are deleted from cache. Operation is atomic. Concurrent + operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + >>> cache = Cache() + >>> for num, letter in enumerate('abc'): + ... cache[letter] = num + >>> cache.peekitem() + ('c', 2) + >>> cache.peekitem(last=False) + ('a', 0) + + :param bool last: last item in iteration order (default True) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair + :raises KeyError: if cache is empty + :raises Timeout: if database timeout occurs + + """ + order = ('ASC', 'DESC') + select = ( + 'SELECT rowid, key, raw, expire_time, tag, mode, filename, value' + ' FROM Cache ORDER BY rowid %s LIMIT 1' + ) % order[last] + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select).fetchall() + + if not rows: + raise KeyError('dictionary is empty') + + (rowid, db_key, raw, db_expire, db_tag, mode, name, + db_value), = rows + + if db_expire is not None and db_expire < time.time(): + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(name) + else: + break + + key = self._disk.get(db_key, raw) + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError as error: + if error.errno == errno.ENOENT: + # Key was deleted before we could retrieve result. + continue + else: + raise + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + + def memoize(self, name=None, typed=False, expire=None, tag=None): + """Memoizing cache decorator. + + Decorator to wrap callable with memoizing function using cache. + Repeated calls with the same arguments will lookup result in cache and + avoid function evaluation. + + If name is set to None (default), the callable name will be determined + automatically. + + When expire is set to zero, function results will not be set in the + cache. Cache lookups still occur, however. Read + :doc:`case-study-landing-page-caching` for example usage. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as + distinct calls with distinct results. + + The original underlying function is accessible through the __wrapped__ + attribute. This is useful for introspection, for bypassing the cache, + or for rewrapping the function with a different cache. + + >>> from diskcache import Cache + >>> cache = Cache() + >>> @cache.memoize(expire=1, tag='fib') + ... def fibonacci(number): + ... if number == 0: + ... return 0 + ... elif number == 1: + ... return 1 + ... else: + ... return fibonacci(number - 1) + fibonacci(number - 2) + >>> print(fibonacci(100)) + 354224848179261915075 + + An additional `__cache_key__` attribute can be used to generate the + cache key used for the given arguments. + + >>> key = fibonacci.__cache_key__(100) + >>> print(cache[key]) + 354224848179261915075 + + Remember to call memoize when decorating a callable. If you forget, + then a TypeError will occur. Note the lack of parenthenses after + memoize below: + + >>> @cache.memoize + ... def test(): + ... pass + Traceback (most recent call last): + ... + TypeError: name cannot be callable + + :param cache: cache to store callable arguments and return values + :param str name: name given for callable (default None, automatic) + :param bool typed: cache different types separately (default False) + :param float expire: seconds until arguments expire + (default None, no expiry) + :param str tag: text to associate with arguments (default None) + :return: callable decorator + + """ + # Caution: Nearly identical code exists in DjangoCache.memoize + if callable(name): + raise TypeError('name cannot be callable') + + def decorator(func): + "Decorator created by memoize() for callable `func`." + base = (full_name(func),) if name is None else (name,) + + @ft.wraps(func) + def wrapper(*args, **kwargs): + "Wrapper for callable to cache arguments and return values." + key = wrapper.__cache_key__(*args, **kwargs) + result = self.get(key, default=ENOVAL, retry=True) + + if result is ENOVAL: + result = func(*args, **kwargs) + if expire is None or expire > 0: + self.set(key, result, expire, tag=tag, retry=True) + + return result + + def __cache_key__(*args, **kwargs): + "Make key for cache given function arguments." + return args_to_key(base, args, kwargs, typed) + + wrapper.__cache_key__ = __cache_key__ + return wrapper + + return decorator + + + def check(self, fix=False, retry=False): + """Check database and file system consistency. + + Intended for use in testing and post-mortem error analysis. + + While checking the Cache table for consistency, a writer lock is held + on the database. The lock blocks other cache clients from writing to + the database. For caches with many file references, the lock may be + held for a long time. For example, local benchmarking shows that a + cache with 1,000 file references takes ~60ms to check. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool fix: correct inconsistencies + :param bool retry: retry if database timeout occurs (default False) + :return: list of warnings + :raises Timeout: if database timeout occurs + + """ + # pylint: disable=access-member-before-definition,W0201 + with warnings.catch_warnings(record=True) as warns: + sql = self._sql + + # Check integrity of database. + + rows = sql('PRAGMA integrity_check').fetchall() + + if len(rows) != 1 or rows[0][0] != u'ok': + for message, in rows: + warnings.warn(message) + + if fix: + sql('VACUUM') + + with self._transact(retry) as (sql, _): + + # Check Cache.filename against file system. + + filenames = set() + select = ( + 'SELECT rowid, size, filename FROM Cache' + ' WHERE filename IS NOT NULL' + ) + + rows = sql(select).fetchall() + + for rowid, size, filename in rows: + full_path = op.join(self._directory, filename) + filenames.add(full_path) + + if op.exists(full_path): + real_size = op.getsize(full_path) + + if size != real_size: + message = 'wrong file size: %s, %d != %d' + args = full_path, real_size, size + warnings.warn(message % args) + + if fix: + sql('UPDATE Cache SET size = ?' + ' WHERE rowid = ?', + (real_size, rowid), + ) + + continue + + warnings.warn('file not found: %s' % full_path) + + if fix: + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + # Check file system against Cache.filename. + + for dirpath, _, files in os.walk(self._directory): + paths = [op.join(dirpath, filename) for filename in files] + error = set(paths) - filenames + + for full_path in error: + if DBNAME in full_path: + continue + + message = 'unknown file: %s' % full_path + warnings.warn(message, UnknownFileWarning) + + if fix: + os.remove(full_path) + + # Check for empty directories. + + for dirpath, dirs, files in os.walk(self._directory): + if not (dirs or files): + message = 'empty directory: %s' % dirpath + warnings.warn(message, EmptyDirWarning) + + if fix: + os.rmdir(dirpath) + + # Check Settings.count against count of Cache rows. + + self.reset('count') + (count,), = sql('SELECT COUNT(key) FROM Cache').fetchall() + + if self.count != count: + message = 'Settings.count != COUNT(Cache.key); %d != %d' + warnings.warn(message % (self.count, count)) + + if fix: + sql('UPDATE Settings SET value = ? WHERE key = ?', + (count, 'count'), + ) + + # Check Settings.size against sum of Cache.size column. + + self.reset('size') + select_size = 'SELECT COALESCE(SUM(size), 0) FROM Cache' + (size,), = sql(select_size).fetchall() + + if self.size != size: + message = 'Settings.size != SUM(Cache.size); %d != %d' + warnings.warn(message % (self.size, size)) + + if fix: + sql('UPDATE Settings SET value = ? WHERE key =?', + (size, 'size'), + ) + + return warns + + + def create_tag_index(self): + """Create tag index on cache database. + + It is better to initialize cache with `tag_index=True` than use this. + + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql('CREATE INDEX IF NOT EXISTS Cache_tag_rowid ON Cache(tag, rowid)') + self.reset('tag_index', 1) + + + def drop_tag_index(self): + """Drop tag index on cache database. + + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql('DROP INDEX IF EXISTS Cache_tag_rowid') + self.reset('tag_index', 0) + + + def evict(self, tag, retry=False): + """Remove items with matching `tag` from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param str tag: tag identifying items + :param bool retry: retry if database timeout occurs (default False) + :return: count of rows removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, filename FROM Cache' + ' WHERE tag = ? AND rowid > ?' + ' ORDER BY rowid LIMIT ?' + ) + args = [tag, 0, 100] + return self._select_delete(select, args, arg_index=1, retry=retry) + + + def expire(self, now=None, retry=False): + """Remove expired items from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param float now: current time (default None, ``time.time()`` used) + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, expire_time, filename FROM Cache' + ' WHERE ? < expire_time AND expire_time < ?' + ' ORDER BY expire_time LIMIT ?' + ) + args = [0, now or time.time(), 100] + return self._select_delete(select, args, row_index=1, retry=retry) + + + def cull(self, retry=False): + """Cull items from cache until volume is less than size limit. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + :raises Timeout: if database timeout occurs + + """ + now = time.time() + + # Remove expired items. + + count = self.expire(now) + + # Remove items by policy. + + select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] + + if select_policy is None: + return + + select_filename = select_policy.format(fields='filename', now=now) + + try: + while self.volume() > self.size_limit: + with self._transact(retry) as (sql, cleanup): + rows = sql(select_filename, (10,)).fetchall() + + if not rows: + break + + count += len(rows) + delete = ( + 'DELETE FROM Cache WHERE rowid IN (%s)' + % select_policy.format(fields='rowid', now=now) + ) + sql(delete, (10,)) + + for filename, in rows: + cleanup(filename) + except Timeout: + raise Timeout(count) + + return count + + + def clear(self, retry=False): + """Remove all items from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of rows removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, filename FROM Cache' + ' WHERE rowid > ?' + ' ORDER BY rowid LIMIT ?' + ) + args = [0, 100] + return self._select_delete(select, args, retry=retry) + + + def _select_delete(self, select, args, row_index=0, arg_index=0, + retry=False): + count = 0 + delete = 'DELETE FROM Cache WHERE rowid IN (%s)' + + try: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, args).fetchall() + + if not rows: + break + + count += len(rows) + sql(delete % ','.join(str(row[0]) for row in rows)) + + for row in rows: + args[arg_index] = row[row_index] + cleanup(row[-1]) + + except Timeout: + raise Timeout(count) + + return count + + + def iterkeys(self, reverse=False): + """Iterate Cache keys in database sort order. + + >>> cache = Cache() + >>> for key in [4, 1, 3, 0, 2]: + ... cache[key] = key + >>> list(cache.iterkeys()) + [0, 1, 2, 3, 4] + >>> list(cache.iterkeys(reverse=True)) + [4, 3, 2, 1, 0] + + :param bool reverse: reverse sort order (default False) + :return: iterator of Cache keys + + """ + sql = self._sql + limit = 100 + _disk_get = self._disk.get + + if reverse: + select = ( + 'SELECT key, raw FROM Cache' + ' ORDER BY key DESC, raw DESC LIMIT 1' + ) + iterate = ( + 'SELECT key, raw FROM Cache' + ' WHERE key = ? AND raw < ? OR key < ?' + ' ORDER BY key DESC, raw DESC LIMIT ?' + ) + else: + select = ( + 'SELECT key, raw FROM Cache' + ' ORDER BY key ASC, raw ASC LIMIT 1' + ) + iterate = ( + 'SELECT key, raw FROM Cache' + ' WHERE key = ? AND raw > ? OR key > ?' + ' ORDER BY key ASC, raw ASC LIMIT ?' + ) + + row = sql(select).fetchall() + + if row: + (key, raw), = row + else: + return + + yield _disk_get(key, raw) + + while True: + rows = sql(iterate, (key, raw, key, limit)).fetchall() + + if not rows: + break + + for key, raw in rows: + yield _disk_get(key, raw) + + + def _iter(self, ascending=True): + sql = self._sql + rows = sql('SELECT MAX(rowid) FROM Cache').fetchall() + (max_rowid,), = rows + yield # Signal ready. + + if max_rowid is None: + return + + bound = max_rowid + 1 + limit = 100 + _disk_get = self._disk.get + rowid = 0 if ascending else bound + select = ( + 'SELECT rowid, key, raw FROM Cache' + ' WHERE ? < rowid AND rowid < ?' + ' ORDER BY rowid %s LIMIT ?' + ) % ('ASC' if ascending else 'DESC') + + while True: + if ascending: + args = (rowid, bound, limit) + else: + args = (0, rowid, limit) + + rows = sql(select, args).fetchall() + + if not rows: + break + + for rowid, key, raw in rows: + yield _disk_get(key, raw) + + + def __iter__(self): + "Iterate keys in cache including expired items." + iterator = self._iter() + next(iterator) + return iterator + + + def __reversed__(self): + "Reverse iterate keys in cache including expired items." + iterator = self._iter(ascending=False) + next(iterator) + return iterator + + + def stats(self, enable=True, reset=False): + """Return cache statistics hits and misses. + + :param bool enable: enable collecting statistics (default True) + :param bool reset: reset hits and misses to 0 (default False) + :return: (hits, misses) + + """ + # pylint: disable=E0203,W0201 + result = (self.reset('hits'), self.reset('misses')) + + if reset: + self.reset('hits', 0) + self.reset('misses', 0) + + self.reset('statistics', enable) + + return result + + + def volume(self): + """Return estimated total size of cache on disk. + + :return: size in bytes + + """ + (page_count,), = self._sql('PRAGMA page_count').fetchall() + total_size = self._page_size * page_count + self.reset('size') + return total_size + + + def close(self): + """Close database connection. + + """ + con = getattr(self._local, 'con', None) + + if con is None: + return + + con.close() + + try: + delattr(self._local, 'con') + except AttributeError: + pass + + + def __enter__(self): + # Create connection in thread. + connection = self._con # pylint: disable=unused-variable + return self + + + def __exit__(self, *exception): + self.close() + + + def __len__(self): + "Count of items in cache including expired items." + return self.reset('count') + + + def __getstate__(self): + return (self.directory, self.timeout, type(self.disk)) + + + def __setstate__(self, state): + self.__init__(*state) + + + def reset(self, key, value=ENOVAL, update=True): + """Reset `key` and `value` item from Settings table. + + Use `reset` to update the value of Cache settings correctly. Cache + settings are stored in the Settings table of the SQLite database. If + `update` is ``False`` then no attempt is made to update the database. + + If `value` is not given, it is reloaded from the Settings + table. Otherwise, the Settings table is updated. + + Settings with the ``disk_`` prefix correspond to Disk + attributes. Updating the value will change the unprefixed attribute on + the associated Disk instance. + + Settings with the ``sqlite_`` prefix correspond to SQLite + pragmas. Updating the value will execute the corresponding PRAGMA + statement. + + SQLite PRAGMA statements may be executed before the Settings table + exists in the database by setting `update` to ``False``. + + :param str key: Settings key for item + :param value: value for item (optional) + :param bool update: update database Settings table (default True) + :return: updated value for item + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql_retry = self._sql_retry + + if value is ENOVAL: + select = 'SELECT value FROM Settings WHERE key = ?' + (value,), = sql_retry(select, (key,)).fetchall() + setattr(self, key, value) + return value + + if update: + statement = 'UPDATE Settings SET value = ? WHERE key = ?' + sql_retry(statement, (value, key)) + + if key.startswith('sqlite_'): + pragma = key[7:] + + # 2016-02-17 GrantJ - PRAGMA and isolation_level=None + # don't always play nicely together. Retry setting the + # PRAGMA. I think some PRAGMA statements expect to + # immediately take an EXCLUSIVE lock on the database. I + # can't find any documentation for this but without the + # retry, stress will intermittently fail with multiple + # processes. + + # 2018-11-05 GrantJ - Avoid setting pragma values that + # are already set. Pragma settings like auto_vacuum and + # journal_mode can take a long time or may not work after + # tables have been created. + + start = time.time() + while True: + try: + try: + (old_value,), = sql('PRAGMA %s' % (pragma)).fetchall() + update = old_value != value + except ValueError: + update = True + if update: + sql('PRAGMA %s = %s' % (pragma, value)).fetchall() + break + except sqlite3.OperationalError as exc: + if str(exc) != 'database is locked': + raise + diff = time.time() - start + if diff > 60: + raise + time.sleep(0.001) + elif key.startswith('disk_'): + attr = key[5:] + setattr(self._disk, attr, value) + + setattr(self, key, value) + return value diff --git a/third_party/python/diskcache/diskcache/djangocache.py b/third_party/python/diskcache/diskcache/djangocache.py new file mode 100644 index 0000000000..997b852406 --- /dev/null +++ b/third_party/python/diskcache/diskcache/djangocache.py @@ -0,0 +1,433 @@ +"Django-compatible disk and file backed cache." + +from functools import wraps +from django.core.cache.backends.base import BaseCache + +try: + from django.core.cache.backends.base import DEFAULT_TIMEOUT +except ImportError: + # For older versions of Django simply use 300 seconds. + DEFAULT_TIMEOUT = 300 + +from .core import ENOVAL, args_to_key, full_name +from .fanout import FanoutCache + + +class DjangoCache(BaseCache): + "Django-compatible disk and file backed cache." + def __init__(self, directory, params): + """Initialize DjangoCache instance. + + :param str directory: cache directory + :param dict params: cache parameters + + """ + super(DjangoCache, self).__init__(params) + shards = params.get('SHARDS', 8) + timeout = params.get('DATABASE_TIMEOUT', 0.010) + options = params.get('OPTIONS', {}) + self._cache = FanoutCache(directory, shards, timeout, **options) + + + @property + def directory(self): + """Cache directory.""" + return self._cache.directory + + + def cache(self, name): + """Return Cache with given `name` in subdirectory. + + :param str name: subdirectory name for Cache + :return: Cache with given name + + """ + return self._cache.cache(name) + + + def deque(self, name): + """Return Deque with given `name` in subdirectory. + + :param str name: subdirectory name for Deque + :return: Deque with given name + + """ + return self._cache.deque(name) + + + def index(self, name): + """Return Index with given `name` in subdirectory. + + :param str name: subdirectory name for Index + :return: Index with given name + + """ + return self._cache.index(name) + + + def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None, + read=False, tag=None, retry=True): + """Set a value in the cache if the key does not already exist. If + timeout is given, that timeout will be used for the key; otherwise the + default cache timeout will be used. + + Return True if the value was stored, False otherwise. + + :param key: key for item + :param value: value for item + :param float timeout: seconds until the item expires + (default 300 seconds) + :param int version: key version number (default None, cache parameter) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default True) + :return: True if item was added + + """ + # pylint: disable=arguments-differ + key = self.make_key(key, version=version) + timeout = self.get_backend_timeout(timeout=timeout) + return self._cache.add(key, value, timeout, read, tag, retry) + + + def get(self, key, default=None, version=None, read=False, + expire_time=False, tag=False, retry=False): + """Fetch a given key from the cache. If the key does not exist, return + default, which itself defaults to None. + + :param key: key for item + :param default: return value if key is missing (default None) + :param int version: key version number (default None, cache parameter) + :param bool read: if True, return file handle to value + (default False) + :param float expire_time: if True, return expire_time in tuple + (default False) + :param tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item if key is found else default + + """ + # pylint: disable=arguments-differ + key = self.make_key(key, version=version) + return self._cache.get(key, default, read, expire_time, tag, retry) + + + def read(self, key, version=None): + """Return file handle corresponding to `key` from Cache. + + :param key: Python key to retrieve + :param int version: key version number (default None, cache parameter) + :return: file open for reading in binary mode + :raises KeyError: if key is not found + + """ + key = self.make_key(key, version=version) + return self._cache.read(key) + + + def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None, + read=False, tag=None, retry=True): + """Set a value in the cache. If timeout is given, that timeout will be + used for the key; otherwise the default cache timeout will be used. + + :param key: key for item + :param value: value for item + :param float timeout: seconds until the item expires + (default 300 seconds) + :param int version: key version number (default None, cache parameter) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default True) + :return: True if item was set + + """ + # pylint: disable=arguments-differ + key = self.make_key(key, version=version) + timeout = self.get_backend_timeout(timeout=timeout) + return self._cache.set(key, value, timeout, read, tag, retry) + + + def touch(self, key, timeout=DEFAULT_TIMEOUT, version=None, retry=True): + """Touch a key in the cache. If timeout is given, that timeout will be + used for the key; otherwise the default cache timeout will be used. + + :param key: key for item + :param float timeout: seconds until the item expires + (default 300 seconds) + :param int version: key version number (default None, cache parameter) + :param bool retry: retry if database timeout occurs (default True) + :return: True if key was touched + + """ + # pylint: disable=arguments-differ + key = self.make_key(key, version=version) + timeout = self.get_backend_timeout(timeout=timeout) + return self._cache.touch(key, timeout, retry) + + + def pop(self, key, default=None, version=None, expire_time=False, + tag=False, retry=True): + """Remove corresponding item for `key` from cache and return value. + + If `key` is missing, return `default`. + + Operation is atomic. Concurrent operations will be serialized. + + :param key: key for item + :param default: return value if key is missing (default None) + :param int version: key version number (default None, cache parameter) + :param float expire_time: if True, return expire_time in tuple + (default False) + :param tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default True) + :return: value for item if key is found else default + + """ + key = self.make_key(key, version=version) + return self._cache.pop(key, default, expire_time, tag, retry) + + + def delete(self, key, version=None, retry=True): + """Delete a key from the cache, failing silently. + + :param key: key for item + :param int version: key version number (default None, cache parameter) + :param bool retry: retry if database timeout occurs (default True) + :return: True if item was deleted + + """ + # pylint: disable=arguments-differ + key = self.make_key(key, version=version) + self._cache.delete(key, retry) + + + def incr(self, key, delta=1, version=None, default=None, retry=True): + """Increment value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent increment operations will be + counted individually. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + :param key: key for item + :param int delta: amount to increment (default 1) + :param int version: key version number (default None, cache parameter) + :param int default: value if key is missing (default None) + :param bool retry: retry if database timeout occurs (default True) + :return: new value for item on success else None + :raises ValueError: if key is not found and default is None + + """ + # pylint: disable=arguments-differ + key = self.make_key(key, version=version) + try: + return self._cache.incr(key, delta, default, retry) + except KeyError: + raise ValueError("Key '%s' not found" % key) + + + def decr(self, key, delta=1, version=None, default=None, retry=True): + """Decrement value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent decrement operations will be + counted individually. + + Unlike Memcached, negative values are supported. Value may be + decremented below zero. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + :param key: key for item + :param int delta: amount to decrement (default 1) + :param int version: key version number (default None, cache parameter) + :param int default: value if key is missing (default None) + :param bool retry: retry if database timeout occurs (default True) + :return: new value for item on success else None + :raises ValueError: if key is not found and default is None + + """ + # pylint: disable=arguments-differ + return self.incr(key, -delta, version, default, retry) + + + def has_key(self, key, version=None): + """Returns True if the key is in the cache and has not expired. + + :param key: key for item + :param int version: key version number (default None, cache parameter) + :return: True if key is found + + """ + key = self.make_key(key, version=version) + return key in self._cache + + + def expire(self): + """Remove expired items from cache. + + :return: count of items removed + + """ + return self._cache.expire() + + + def stats(self, enable=True, reset=False): + """Return cache statistics hits and misses. + + :param bool enable: enable collecting statistics (default True) + :param bool reset: reset hits and misses to 0 (default False) + :return: (hits, misses) + + """ + return self._cache.stats(enable=enable, reset=reset) + + + def create_tag_index(self): + """Create tag index on cache database. + + Better to initialize cache with `tag_index=True` than use this. + + :raises Timeout: if database timeout occurs + + """ + self._cache.create_tag_index() + + + def drop_tag_index(self): + """Drop tag index on cache database. + + :raises Timeout: if database timeout occurs + + """ + self._cache.drop_tag_index() + + + def evict(self, tag): + """Remove items with matching `tag` from cache. + + :param str tag: tag identifying items + :return: count of items removed + + """ + return self._cache.evict(tag) + + + def cull(self): + """Cull items from cache until volume is less than size limit. + + :return: count of items removed + + """ + return self._cache.cull() + + + def clear(self): + "Remove *all* values from the cache at once." + return self._cache.clear() + + + def close(self, **kwargs): + "Close the cache connection." + # pylint: disable=unused-argument + self._cache.close() + + + def get_backend_timeout(self, timeout=DEFAULT_TIMEOUT): + """Return seconds to expiration. + + :param float timeout: seconds until the item expires + (default 300 seconds) + + """ + if timeout == DEFAULT_TIMEOUT: + timeout = self.default_timeout + elif timeout == 0: + # ticket 21147 - avoid time.time() related precision issues + timeout = -1 + return None if timeout is None else timeout + + + def memoize(self, name=None, timeout=DEFAULT_TIMEOUT, version=None, + typed=False, tag=None): + """Memoizing cache decorator. + + Decorator to wrap callable with memoizing function using cache. + Repeated calls with the same arguments will lookup result in cache and + avoid function evaluation. + + If name is set to None (default), the callable name will be determined + automatically. + + When timeout is set to zero, function results will not be set in the + cache. Cache lookups still occur, however. Read + :doc:`case-study-landing-page-caching` for example usage. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as + distinct calls with distinct results. + + The original underlying function is accessible through the __wrapped__ + attribute. This is useful for introspection, for bypassing the cache, + or for rewrapping the function with a different cache. + + An additional `__cache_key__` attribute can be used to generate the + cache key used for the given arguments. + + Remember to call memoize when decorating a callable. If you forget, + then a TypeError will occur. + + :param str name: name given for callable (default None, automatic) + :param float timeout: seconds until the item expires + (default 300 seconds) + :param int version: key version number (default None, cache parameter) + :param bool typed: cache different types separately (default False) + :param str tag: text to associate with arguments (default None) + :return: callable decorator + + """ + # Caution: Nearly identical code exists in Cache.memoize + if callable(name): + raise TypeError('name cannot be callable') + + def decorator(func): + "Decorator created by memoize() for callable `func`." + base = (full_name(func),) if name is None else (name,) + + @wraps(func) + def wrapper(*args, **kwargs): + "Wrapper for callable to cache arguments and return values." + key = wrapper.__cache_key__(*args, **kwargs) + result = self.get(key, ENOVAL, version, retry=True) + + if result is ENOVAL: + result = func(*args, **kwargs) + valid_timeout = ( + timeout is None + or timeout == DEFAULT_TIMEOUT + or timeout > 0 + ) + if valid_timeout: + self.set( + key, result, timeout, version, tag=tag, retry=True, + ) + + return result + + def __cache_key__(*args, **kwargs): + "Make key for cache given function arguments." + return args_to_key(base, args, kwargs, typed) + + wrapper.__cache_key__ = __cache_key__ + return wrapper + + return decorator diff --git a/third_party/python/diskcache/diskcache/fanout.py b/third_party/python/diskcache/diskcache/fanout.py new file mode 100644 index 0000000000..8a0a722ae6 --- /dev/null +++ b/third_party/python/diskcache/diskcache/fanout.py @@ -0,0 +1,677 @@ +"Fanout cache automatically shards keys and values." + +import itertools as it +import operator +import os.path as op +import sqlite3 +import sys +import tempfile +import time + +from .core import ENOVAL, DEFAULT_SETTINGS, Cache, Disk, Timeout +from .persistent import Deque, Index + +############################################################################ +# BEGIN Python 2/3 Shims +############################################################################ + +if sys.hexversion >= 0x03000000: + from functools import reduce + +############################################################################ +# END Python 2/3 Shims +############################################################################ + + +class FanoutCache(object): + "Cache that shards keys and values." + def __init__(self, directory=None, shards=8, timeout=0.010, disk=Disk, + **settings): + """Initialize cache instance. + + :param str directory: cache directory + :param int shards: number of shards to distribute writes + :param float timeout: SQLite connection timeout + :param disk: `Disk` instance for serialization + :param settings: any of `DEFAULT_SETTINGS` + + """ + if directory is None: + directory = tempfile.mkdtemp(prefix='diskcache-') + directory = op.expanduser(directory) + directory = op.expandvars(directory) + + default_size_limit = DEFAULT_SETTINGS['size_limit'] + size_limit = settings.pop('size_limit', default_size_limit) / shards + + self._count = shards + self._directory = directory + self._shards = tuple( + Cache( + directory=op.join(directory, '%03d' % num), + timeout=timeout, + disk=disk, + size_limit=size_limit, + **settings + ) + for num in range(shards) + ) + self._hash = self._shards[0].disk.hash + self._caches = {} + self._deques = {} + self._indexes = {} + + + @property + def directory(self): + """Cache directory.""" + return self._directory + + + def __getattr__(self, name): + return getattr(self._shards[0], name) + + + def set(self, key, value, expire=None, read=False, tag=None, retry=False): + """Set `key` and `value` item in cache. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param key: key for item + :param value: value for item + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool read: read value as raw bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was set + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.set(key, value, expire, read, tag, retry) + except Timeout: + return False + + + def __setitem__(self, key, value): + """Set `key` and `value` item in cache. + + Calls :func:`FanoutCache.set` internally with `retry` set to `True`. + + :param key: key for item + :param value: value for item + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + shard[key] = value + + + def touch(self, key, expire=None, retry=False): + """Touch `key` in cache and update `expire` time. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param key: key for item + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool retry: retry if database timeout occurs (default False) + :return: True if key was touched + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.touch(key, expire, retry) + except Timeout: + return False + + + def add(self, key, value, expire=None, read=False, tag=None, retry=False): + """Add `key` and `value` item to cache. + + Similar to `set`, but only add to cache if key not present. + + This operation is atomic. Only one concurrent add operation for given + key from separate threads or processes will succeed. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param key: key for item + :param value: value for item + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was added + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.add(key, value, expire, read, tag, retry) + except Timeout: + return False + + + def incr(self, key, delta=1, default=0, retry=False): + """Increment value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent increment operations will be + counted individually. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param key: key for item + :param int delta: amount to increment (default 1) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) + :return: new value for item on success else None + :raises KeyError: if key is not found and default is None + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.incr(key, delta, default, retry) + except Timeout: + return None + + + def decr(self, key, delta=1, default=0, retry=False): + """Decrement value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent decrement operations will be + counted individually. + + Unlike Memcached, negative values are supported. Value may be + decremented below zero. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param key: key for item + :param int delta: amount to decrement (default 1) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) + :return: new value for item on success else None + :raises KeyError: if key is not found and default is None + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.decr(key, delta, default, retry) + except Timeout: + return None + + + def get(self, key, default=None, read=False, expire_time=False, tag=False, + retry=False): + """Retrieve value from cache. If `key` is missing, return `default`. + + If database timeout occurs then returns `default` unless `retry` is set + to `True` (default `False`). + + :param key: key for item + :param default: return value if key is missing (default None) + :param bool read: if True, return file handle to value + (default False) + :param float expire_time: if True, return expire_time in tuple + (default False) + :param tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item if key is found else default + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.get(key, default, read, expire_time, tag, retry) + except (Timeout, sqlite3.OperationalError): + return default + + + def __getitem__(self, key): + """Return corresponding value for `key` from cache. + + Calls :func:`FanoutCache.get` internally with `retry` set to `True`. + + :param key: key for item + :return: value for item + :raises KeyError: if key is not found + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + return shard[key] + + + def read(self, key): + """Return file handle corresponding to `key` from cache. + + :param key: key for item + :return: file open for reading in binary mode + :raises KeyError: if key is not found + + """ + handle = self.get(key, default=ENOVAL, read=True, retry=True) + if handle is ENOVAL: + raise KeyError(key) + return handle + + + def __contains__(self, key): + """Return `True` if `key` matching item is found in cache. + + :param key: key for item + :return: True if key is found + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + return key in shard + + + def pop(self, key, default=None, expire_time=False, tag=False, retry=False): + """Remove corresponding item for `key` from cache and return value. + + If `key` is missing, return `default`. + + Operation is atomic. Concurrent operations will be serialized. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param key: key for item + :param default: return value if key is missing (default None) + :param float expire_time: if True, return expire_time in tuple + (default False) + :param tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item if key is found else default + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.pop(key, default, expire_time, tag, retry) + except Timeout: + return default + + + def delete(self, key, retry=False): + """Delete corresponding item for `key` from cache. + + Missing keys are ignored. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param key: key for item + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was deleted + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + try: + return shard.delete(key, retry) + except Timeout: + return False + + + def __delitem__(self, key): + """Delete corresponding item for `key` from cache. + + Calls :func:`FanoutCache.delete` internally with `retry` set to `True`. + + :param key: key for item + :raises KeyError: if key is not found + + """ + index = self._hash(key) % self._count + shard = self._shards[index] + del shard[key] + + + def check(self, fix=False, retry=False): + """Check database and file system consistency. + + Intended for use in testing and post-mortem error analysis. + + While checking the cache table for consistency, a writer lock is held + on the database. The lock blocks other cache clients from writing to + the database. For caches with many file references, the lock may be + held for a long time. For example, local benchmarking shows that a + cache with 1,000 file references takes ~60ms to check. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param bool fix: correct inconsistencies + :param bool retry: retry if database timeout occurs (default False) + :return: list of warnings + :raises Timeout: if database timeout occurs + + """ + warnings = (shard.check(fix, retry) for shard in self._shards) + return reduce(operator.iadd, warnings, []) + + + def expire(self, retry=False): + """Remove expired items from cache. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + + """ + return self._remove('expire', args=(time.time(),), retry=retry) + + + def create_tag_index(self): + """Create tag index on cache database. + + Better to initialize cache with `tag_index=True` than use this. + + :raises Timeout: if database timeout occurs + + """ + for shard in self._shards: + shard.create_tag_index() + + + def drop_tag_index(self): + """Drop tag index on cache database. + + :raises Timeout: if database timeout occurs + + """ + for shard in self._shards: + shard.drop_tag_index() + + + def evict(self, tag, retry=False): + """Remove items with matching `tag` from cache. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param str tag: tag identifying items + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + + """ + return self._remove('evict', args=(tag,), retry=retry) + + + def cull(self, retry=False): + """Cull items from cache until volume is less than size limit. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + + """ + return self._remove('cull', retry=retry) + + + def clear(self, retry=False): + """Remove all items from cache. + + If database timeout occurs then fails silently unless `retry` is set to + `True` (default `False`). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + + """ + return self._remove('clear', retry=retry) + + + def _remove(self, name, args=(), retry=False): + total = 0 + for shard in self._shards: + method = getattr(shard, name) + while True: + try: + count = method(*args, retry=retry) + total += count + except Timeout as timeout: + total += timeout.args[0] + else: + break + return total + + + def stats(self, enable=True, reset=False): + """Return cache statistics hits and misses. + + :param bool enable: enable collecting statistics (default True) + :param bool reset: reset hits and misses to 0 (default False) + :return: (hits, misses) + + """ + results = [shard.stats(enable, reset) for shard in self._shards] + total_hits = sum(hits for hits, _ in results) + total_misses = sum(misses for _, misses in results) + return total_hits, total_misses + + + def volume(self): + """Return estimated total size of cache on disk. + + :return: size in bytes + + """ + return sum(shard.volume() for shard in self._shards) + + + def close(self): + "Close database connection." + for shard in self._shards: + shard.close() + self._caches.clear() + self._deques.clear() + self._indexes.clear() + + + def __enter__(self): + return self + + + def __exit__(self, *exception): + self.close() + + + def __getstate__(self): + return (self._directory, self._count, self.timeout, type(self.disk)) + + + def __setstate__(self, state): + self.__init__(*state) + + + def __iter__(self): + "Iterate keys in cache including expired items." + iterators = (iter(shard) for shard in self._shards) + return it.chain.from_iterable(iterators) + + + def __reversed__(self): + "Reverse iterate keys in cache including expired items." + iterators = (reversed(shard) for shard in reversed(self._shards)) + return it.chain.from_iterable(iterators) + + + def __len__(self): + "Count of items in cache including expired items." + return sum(len(shard) for shard in self._shards) + + + def reset(self, key, value=ENOVAL): + """Reset `key` and `value` item from Settings table. + + If `value` is not given, it is reloaded from the Settings + table. Otherwise, the Settings table is updated. + + Settings attributes on cache objects are lazy-loaded and + read-only. Use `reset` to update the value. + + Settings with the ``sqlite_`` prefix correspond to SQLite + pragmas. Updating the value will execute the corresponding PRAGMA + statement. + + :param str key: Settings key for item + :param value: value for item (optional) + :return: updated value for item + + """ + for shard in self._shards: + while True: + try: + result = shard.reset(key, value) + except Timeout: + pass + else: + break + return result + + + def cache(self, name): + """Return Cache with given `name` in subdirectory. + + >>> fanout_cache = FanoutCache() + >>> cache = fanout_cache.cache('test') + >>> cache.set('abc', 123) + True + >>> cache.get('abc') + 123 + >>> len(cache) + 1 + >>> cache.delete('abc') + True + + :param str name: subdirectory name for Cache + :return: Cache with given name + + """ + _caches = self._caches + + try: + return _caches[name] + except KeyError: + parts = name.split('/') + directory = op.join(self._directory, 'cache', *parts) + temp = Cache(directory=directory) + _caches[name] = temp + return temp + + + def deque(self, name): + """Return Deque with given `name` in subdirectory. + + >>> cache = FanoutCache() + >>> deque = cache.deque('test') + >>> deque.extend('abc') + >>> deque.popleft() + 'a' + >>> deque.pop() + 'c' + >>> len(deque) + 1 + + :param str name: subdirectory name for Deque + :return: Deque with given name + + """ + _deques = self._deques + + try: + return _deques[name] + except KeyError: + parts = name.split('/') + directory = op.join(self._directory, 'deque', *parts) + temp = Deque(directory=directory) + _deques[name] = temp + return temp + + + def index(self, name): + """Return Index with given `name` in subdirectory. + + >>> cache = FanoutCache() + >>> index = cache.index('test') + >>> index['abc'] = 123 + >>> index['def'] = 456 + >>> index['ghi'] = 789 + >>> index.popitem() + ('ghi', 789) + >>> del index['abc'] + >>> len(index) + 1 + >>> index['def'] + 456 + + :param str name: subdirectory name for Index + :return: Index with given name + + """ + _indexes = self._indexes + + try: + return _indexes[name] + except KeyError: + parts = name.split('/') + directory = op.join(self._directory, 'index', *parts) + temp = Index(directory) + _indexes[name] = temp + return temp + + +############################################################################ +# BEGIN Python 2/3 Shims +############################################################################ + +if sys.hexversion < 0x03000000: + import types + memoize_func = Cache.__dict__['memoize'] # pylint: disable=invalid-name + FanoutCache.memoize = types.MethodType(memoize_func, None, FanoutCache) +else: + FanoutCache.memoize = Cache.memoize + +############################################################################ +# END Python 2/3 Shims +############################################################################ diff --git a/third_party/python/diskcache/diskcache/persistent.py b/third_party/python/diskcache/diskcache/persistent.py new file mode 100644 index 0000000000..961f77361f --- /dev/null +++ b/third_party/python/diskcache/diskcache/persistent.py @@ -0,0 +1,1403 @@ +"""Persistent Data Types + +""" + +import operator as op +import sys + +from collections import OrderedDict +from contextlib import contextmanager +from shutil import rmtree + +from .core import BytesType, Cache, ENOVAL, TextType + +############################################################################ +# BEGIN Python 2/3 Shims +############################################################################ + +try: + from collections.abc import MutableMapping, Sequence + from collections.abc import KeysView, ValuesView, ItemsView +except ImportError: + from collections import MutableMapping, Sequence + from collections import KeysView, ValuesView, ItemsView + +if sys.hexversion < 0x03000000: + from itertools import izip as zip # pylint: disable=redefined-builtin,no-name-in-module,ungrouped-imports + range = xrange # pylint: disable=redefined-builtin,invalid-name,undefined-variable + +############################################################################ +# END Python 2/3 Shims +############################################################################ + + +def _make_compare(seq_op, doc): + "Make compare method with Sequence semantics." + def compare(self, that): + "Compare method for deque and sequence." + if not isinstance(that, Sequence): + return NotImplemented + + len_self = len(self) + len_that = len(that) + + if len_self != len_that: + if seq_op is op.eq: + return False + if seq_op is op.ne: + return True + + for alpha, beta in zip(self, that): + if alpha != beta: + return seq_op(alpha, beta) + + return seq_op(len_self, len_that) + + compare.__name__ = '__{0}__'.format(seq_op.__name__) + doc_str = 'Return True if and only if deque is {0} `that`.' + compare.__doc__ = doc_str.format(doc) + + return compare + + +class Deque(Sequence): + """Persistent sequence with double-ended queue semantics. + + Double-ended queue is an ordered collection with optimized access at its + endpoints. + + Items are serialized to disk. Deque may be initialized from directory path + where items are stored. + + >>> deque = Deque() + >>> deque += range(5) + >>> list(deque) + [0, 1, 2, 3, 4] + >>> for value in range(5): + ... deque.appendleft(-value) + >>> len(deque) + 10 + >>> list(deque) + [-4, -3, -2, -1, 0, 0, 1, 2, 3, 4] + >>> deque.pop() + 4 + >>> deque.popleft() + -4 + >>> deque.reverse() + >>> list(deque) + [3, 2, 1, 0, 0, -1, -2, -3] + + """ + def __init__(self, iterable=(), directory=None): + """Initialize deque instance. + + If directory is None then temporary directory created. The directory + will *not* be automatically removed. + + :param iterable: iterable of items to append to deque + :param directory: deque directory (default None) + + """ + self._cache = Cache(directory, eviction_policy='none') + with self.transact(): + self.extend(iterable) + + + @classmethod + def fromcache(cls, cache, iterable=()): + """Initialize deque using `cache`. + + >>> cache = Cache() + >>> deque = Deque.fromcache(cache, [5, 6, 7, 8]) + >>> deque.cache is cache + True + >>> len(deque) + 4 + >>> 7 in deque + True + >>> deque.popleft() + 5 + + :param Cache cache: cache to use + :param iterable: iterable of items + :return: initialized Deque + + """ + # pylint: disable=no-member,protected-access + self = cls.__new__(cls) + self._cache = cache + self.extend(iterable) + return self + + + @property + def cache(self): + "Cache used by deque." + return self._cache + + + @property + def directory(self): + "Directory path where deque is stored." + return self._cache.directory + + + def _index(self, index, func): + len_self = len(self) + + if index >= 0: + if index >= len_self: + raise IndexError('deque index out of range') + + for key in self._cache.iterkeys(): + if index == 0: + try: + return func(key) + except KeyError: + continue + index -= 1 + else: + if index < -len_self: + raise IndexError('deque index out of range') + + index += 1 + + for key in self._cache.iterkeys(reverse=True): + if index == 0: + try: + return func(key) + except KeyError: + continue + index += 1 + + raise IndexError('deque index out of range') + + + def __getitem__(self, index): + """deque.__getitem__(index) <==> deque[index] + + Return corresponding item for `index` in deque. + + See also `Deque.peekleft` and `Deque.peek` for indexing deque at index + ``0`` or ``-1``. + + >>> deque = Deque() + >>> deque.extend('abcde') + >>> deque[1] + 'b' + >>> deque[-2] + 'd' + + :param int index: index of item + :return: corresponding item + :raises IndexError: if index out of range + + """ + return self._index(index, self._cache.__getitem__) + + + def __setitem__(self, index, value): + """deque.__setitem__(index, value) <==> deque[index] = value + + Store `value` in deque at `index`. + + >>> deque = Deque() + >>> deque.extend([None] * 3) + >>> deque[0] = 'a' + >>> deque[1] = 'b' + >>> deque[-1] = 'c' + >>> ''.join(deque) + 'abc' + + :param int index: index of value + :param value: value to store + :raises IndexError: if index out of range + + """ + set_value = lambda key: self._cache.__setitem__(key, value) + self._index(index, set_value) + + + def __delitem__(self, index): + """deque.__delitem__(index) <==> del deque[index] + + Delete item in deque at `index`. + + >>> deque = Deque() + >>> deque.extend([None] * 3) + >>> del deque[0] + >>> del deque[1] + >>> del deque[-1] + >>> len(deque) + 0 + + :param int index: index of item + :raises IndexError: if index out of range + + """ + self._index(index, self._cache.__delitem__) + + + def __repr__(self): + """deque.__repr__() <==> repr(deque) + + Return string with printable representation of deque. + + """ + name = type(self).__name__ + return '{0}(directory={1!r})'.format(name, self.directory) + + + __eq__ = _make_compare(op.eq, 'equal to') + __ne__ = _make_compare(op.ne, 'not equal to') + __lt__ = _make_compare(op.lt, 'less than') + __gt__ = _make_compare(op.gt, 'greater than') + __le__ = _make_compare(op.le, 'less than or equal to') + __ge__ = _make_compare(op.ge, 'greater than or equal to') + + + def __iadd__(self, iterable): + """deque.__iadd__(iterable) <==> deque += iterable + + Extend back side of deque with items from iterable. + + :param iterable: iterable of items to append to deque + :return: deque with added items + + """ + self.extend(iterable) + return self + + + def __iter__(self): + """deque.__iter__() <==> iter(deque) + + Return iterator of deque from front to back. + + """ + _cache = self._cache + + for key in _cache.iterkeys(): + try: + yield _cache[key] + except KeyError: + pass + + + def __len__(self): + """deque.__len__() <==> len(deque) + + Return length of deque. + + """ + return len(self._cache) + + + def __reversed__(self): + """deque.__reversed__() <==> reversed(deque) + + Return iterator of deque from back to front. + + >>> deque = Deque() + >>> deque.extend('abcd') + >>> iterator = reversed(deque) + >>> next(iterator) + 'd' + >>> list(iterator) + ['c', 'b', 'a'] + + """ + _cache = self._cache + + for key in _cache.iterkeys(reverse=True): + try: + yield _cache[key] + except KeyError: + pass + + + def __getstate__(self): + return self.directory + + + def __setstate__(self, state): + self.__init__(directory=state) + + + def append(self, value): + """Add `value` to back of deque. + + >>> deque = Deque() + >>> deque.append('a') + >>> deque.append('b') + >>> deque.append('c') + >>> list(deque) + ['a', 'b', 'c'] + + :param value: value to add to back of deque + + """ + self._cache.push(value, retry=True) + + + def appendleft(self, value): + """Add `value` to front of deque. + + >>> deque = Deque() + >>> deque.appendleft('a') + >>> deque.appendleft('b') + >>> deque.appendleft('c') + >>> list(deque) + ['c', 'b', 'a'] + + :param value: value to add to front of deque + + """ + self._cache.push(value, side='front', retry=True) + + + def clear(self): + """Remove all elements from deque. + + >>> deque = Deque('abc') + >>> len(deque) + 3 + >>> deque.clear() + >>> list(deque) + [] + + """ + self._cache.clear(retry=True) + + + def count(self, value): + """Return number of occurrences of `value` in deque. + + >>> deque = Deque() + >>> deque += [num for num in range(1, 5) for _ in range(num)] + >>> deque.count(0) + 0 + >>> deque.count(1) + 1 + >>> deque.count(4) + 4 + + :param value: value to count in deque + :return: count of items equal to value in deque + + """ + return sum(1 for item in self if value == item) + + + def extend(self, iterable): + """Extend back side of deque with values from `iterable`. + + :param iterable: iterable of values + + """ + for value in iterable: + self.append(value) + + + def extendleft(self, iterable): + """Extend front side of deque with value from `iterable`. + + >>> deque = Deque() + >>> deque.extendleft('abc') + >>> list(deque) + ['c', 'b', 'a'] + + :param iterable: iterable of values + + """ + for value in iterable: + self.appendleft(value) + + + def peek(self): + """Peek at value at back of deque. + + Faster than indexing deque at -1. + + If deque is empty then raise IndexError. + + >>> deque = Deque() + >>> deque.peek() + Traceback (most recent call last): + ... + IndexError: peek from an empty deque + >>> deque += 'abc' + >>> deque.peek() + 'c' + + :return: value at back of deque + :raises IndexError: if deque is empty + + """ + default = None, ENOVAL + _, value = self._cache.peek(default=default, side='back', retry=True) + if value is ENOVAL: + raise IndexError('peek from an empty deque') + return value + + + def peekleft(self): + """Peek at value at back of deque. + + Faster than indexing deque at 0. + + If deque is empty then raise IndexError. + + >>> deque = Deque() + >>> deque.peekleft() + Traceback (most recent call last): + ... + IndexError: peek from an empty deque + >>> deque += 'abc' + >>> deque.peekleft() + 'a' + + :return: value at front of deque + :raises IndexError: if deque is empty + + """ + default = None, ENOVAL + _, value = self._cache.peek(default=default, side='front', retry=True) + if value is ENOVAL: + raise IndexError('peek from an empty deque') + return value + + + def pop(self): + """Remove and return value at back of deque. + + If deque is empty then raise IndexError. + + >>> deque = Deque() + >>> deque += 'ab' + >>> deque.pop() + 'b' + >>> deque.pop() + 'a' + >>> deque.pop() + Traceback (most recent call last): + ... + IndexError: pop from an empty deque + + :return: value at back of deque + :raises IndexError: if deque is empty + + """ + default = None, ENOVAL + _, value = self._cache.pull(default=default, side='back', retry=True) + if value is ENOVAL: + raise IndexError('pop from an empty deque') + return value + + + def popleft(self): + """Remove and return value at front of deque. + + >>> deque = Deque() + >>> deque += 'ab' + >>> deque.popleft() + 'a' + >>> deque.popleft() + 'b' + >>> deque.popleft() + Traceback (most recent call last): + ... + IndexError: pop from an empty deque + + :return: value at front of deque + :raises IndexError: if deque is empty + + """ + default = None, ENOVAL + _, value = self._cache.pull(default=default, retry=True) + if value is ENOVAL: + raise IndexError('pop from an empty deque') + return value + + + def remove(self, value): + """Remove first occurrence of `value` in deque. + + >>> deque = Deque() + >>> deque += 'aab' + >>> deque.remove('a') + >>> list(deque) + ['a', 'b'] + >>> deque.remove('b') + >>> list(deque) + ['a'] + >>> deque.remove('c') + Traceback (most recent call last): + ... + ValueError: deque.remove(value): value not in deque + + :param value: value to remove + :raises ValueError: if value not in deque + + """ + _cache = self._cache + + for key in _cache.iterkeys(): + try: + item = _cache[key] + except KeyError: + continue + else: + if value == item: + try: + del _cache[key] + except KeyError: + continue + return + + raise ValueError('deque.remove(value): value not in deque') + + + def reverse(self): + """Reverse deque in place. + + >>> deque = Deque() + >>> deque += 'abc' + >>> deque.reverse() + >>> list(deque) + ['c', 'b', 'a'] + + """ + # GrantJ 2019-03-22 Consider using an algorithm that swaps the values + # at two keys. Like self._cache.swap(key1, key2, retry=True) The swap + # method would exchange the values at two given keys. Then, using a + # forward iterator and a reverse iterator, the reversis method could + # avoid making copies of the values. + temp = Deque(iterable=reversed(self)) + self.clear() + self.extend(temp) + directory = temp.directory + del temp + rmtree(directory) + + + def rotate(self, steps=1): + """Rotate deque right by `steps`. + + If steps is negative then rotate left. + + >>> deque = Deque() + >>> deque += range(5) + >>> deque.rotate(2) + >>> list(deque) + [3, 4, 0, 1, 2] + >>> deque.rotate(-1) + >>> list(deque) + [4, 0, 1, 2, 3] + + :param int steps: number of steps to rotate (default 1) + + """ + if not isinstance(steps, int): + type_name = type(steps).__name__ + raise TypeError('integer argument expected, got %s' % type_name) + + len_self = len(self) + + if not len_self: + return + + if steps >= 0: + steps %= len_self + + for _ in range(steps): + try: + value = self.pop() + except IndexError: + return + else: + self.appendleft(value) + else: + steps *= -1 + steps %= len_self + + for _ in range(steps): + try: + value = self.popleft() + except IndexError: + return + else: + self.append(value) + + + __hash__ = None + + + @contextmanager + def transact(self): + """Context manager to perform a transaction by locking the deque. + + While the deque is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + + Transactions may be nested and may not be shared between threads. + + >>> from diskcache import Deque + >>> deque = Deque() + >>> deque += range(5) + >>> with deque.transact(): # Atomically rotate elements. + ... value = deque.pop() + ... deque.appendleft(value) + >>> list(deque) + [4, 0, 1, 2, 3] + + :return: context manager for use in `with` statement + + """ + with self._cache.transact(retry=True): + yield + + +class Index(MutableMapping): + """Persistent mutable mapping with insertion order iteration. + + Items are serialized to disk. Index may be initialized from directory path + where items are stored. + + Hashing protocol is not used. Keys are looked up by their serialized + format. See ``diskcache.Disk`` for details. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> index['a'] + 1 + >>> list(index) + ['a', 'b', 'c'] + >>> len(index) + 3 + >>> del index['b'] + >>> index.popitem() + ('c', 3) + + """ + def __init__(self, *args, **kwargs): + """Initialize index in directory and update items. + + Optional first argument may be string specifying directory where items + are stored. When None or not given, temporary directory is created. + + >>> index = Index({'a': 1, 'b': 2, 'c': 3}) + >>> len(index) + 3 + >>> directory = index.directory + >>> inventory = Index(directory, d=4) + >>> inventory['b'] + 2 + >>> len(inventory) + 4 + + """ + if args and isinstance(args[0], (BytesType, TextType)): + directory = args[0] + args = args[1:] + else: + if args and args[0] is None: + args = args[1:] + directory = None + self._cache = Cache(directory, eviction_policy='none') + self.update(*args, **kwargs) + + + @classmethod + def fromcache(cls, cache, *args, **kwargs): + """Initialize index using `cache` and update items. + + >>> cache = Cache() + >>> index = Index.fromcache(cache, {'a': 1, 'b': 2, 'c': 3}) + >>> index.cache is cache + True + >>> len(index) + 3 + >>> 'b' in index + True + >>> index['c'] + 3 + + :param Cache cache: cache to use + :param args: mapping or sequence of items + :param kwargs: mapping of items + :return: initialized Index + + """ + # pylint: disable=no-member,protected-access + self = cls.__new__(cls) + self._cache = cache + self.update(*args, **kwargs) + return self + + + @property + def cache(self): + "Cache used by index." + return self._cache + + + @property + def directory(self): + "Directory path where items are stored." + return self._cache.directory + + + def __getitem__(self, key): + """index.__getitem__(key) <==> index[key] + + Return corresponding value for `key` in index. + + >>> index = Index() + >>> index.update({'a': 1, 'b': 2}) + >>> index['a'] + 1 + >>> index['b'] + 2 + >>> index['c'] + Traceback (most recent call last): + ... + KeyError: 'c' + + :param key: key for item + :return: value for item in index with given key + :raises KeyError: if key is not found + + """ + return self._cache[key] + + + def __setitem__(self, key, value): + """index.__setitem__(key, value) <==> index[key] = value + + Set `key` and `value` item in index. + + >>> index = Index() + >>> index['a'] = 1 + >>> index[0] = None + >>> len(index) + 2 + + :param key: key for item + :param value: value for item + + """ + self._cache[key] = value + + + def __delitem__(self, key): + """index.__delitem__(key) <==> del index[key] + + Delete corresponding item for `key` from index. + + >>> index = Index() + >>> index.update({'a': 1, 'b': 2}) + >>> del index['a'] + >>> del index['b'] + >>> len(index) + 0 + >>> del index['c'] + Traceback (most recent call last): + ... + KeyError: 'c' + + :param key: key for item + :raises KeyError: if key is not found + + """ + del self._cache[key] + + + def setdefault(self, key, default=None): + """Set and get value for `key` in index using `default`. + + If `key` is not in index then set corresponding value to `default`. If + `key` is in index then ignore `default` and return existing value. + + >>> index = Index() + >>> index.setdefault('a', 0) + 0 + >>> index.setdefault('a', 1) + 0 + + :param key: key for item + :param default: value if key is missing (default None) + :return: value for item in index with given key + + """ + _cache = self._cache + while True: + try: + return _cache[key] + except KeyError: + _cache.add(key, default, retry=True) + + + def peekitem(self, last=True): + """Peek at key and value item pair in index based on iteration order. + + >>> index = Index() + >>> for num, letter in enumerate('xyz'): + ... index[letter] = num + >>> index.peekitem() + ('z', 2) + >>> index.peekitem(last=False) + ('x', 0) + + :param bool last: last item in iteration order (default True) + :return: key and value item pair + :raises KeyError: if cache is empty + + """ + return self._cache.peekitem(last, retry=True) + + + def pop(self, key, default=ENOVAL): + """Remove corresponding item for `key` from index and return value. + + If `key` is missing then return `default`. If `default` is `ENOVAL` + then raise KeyError. + + >>> index = Index({'a': 1, 'b': 2}) + >>> index.pop('a') + 1 + >>> index.pop('b') + 2 + >>> index.pop('c', default=3) + 3 + >>> index.pop('d') + Traceback (most recent call last): + ... + KeyError: 'd' + + :param key: key for item + :param default: return value if key is missing (default ENOVAL) + :return: value for item if key is found else default + :raises KeyError: if key is not found and default is ENOVAL + + """ + _cache = self._cache + value = _cache.pop(key, default=default, retry=True) + if value is ENOVAL: + raise KeyError(key) + return value + + + def popitem(self, last=True): + """Remove and return item pair. + + Item pairs are returned in last-in-first-out (LIFO) order if last is + True else first-in-first-out (FIFO) order. LIFO order imitates a stack + and FIFO order imitates a queue. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> index.popitem() + ('c', 3) + >>> index.popitem(last=False) + ('a', 1) + >>> index.popitem() + ('b', 2) + >>> index.popitem() + Traceback (most recent call last): + ... + KeyError: 'dictionary is empty' + + :param bool last: pop last item pair (default True) + :return: key and value item pair + :raises KeyError: if index is empty + + """ + # pylint: disable=arguments-differ + _cache = self._cache + + with _cache.transact(retry=True): + key, value = _cache.peekitem(last=last) + del _cache[key] + + return key, value + + + def push(self, value, prefix=None, side='back'): + """Push `value` onto `side` of queue in index identified by `prefix`. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + Defaults to pushing value on back of queue. Set side to 'front' to push + value on front of queue. Side must be one of 'back' or 'front'. + + See also `Index.pull`. + + >>> index = Index() + >>> print(index.push('apples')) + 500000000000000 + >>> print(index.push('beans')) + 500000000000001 + >>> print(index.push('cherries', side='front')) + 499999999999999 + >>> index[500000000000001] + 'beans' + >>> index.push('dates', prefix='fruit') + 'fruit-500000000000000' + + :param value: value for item + :param str prefix: key prefix (default None, key is integer) + :param str side: either 'back' or 'front' (default 'back') + :return: key for item in cache + + """ + return self._cache.push(value, prefix, side, retry=True) + + + def pull(self, prefix=None, default=(None, None), side='front'): + """Pull key and value item pair from `side` of queue in index. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + If queue is empty, return default. + + Defaults to pulling key and value item pairs from front of queue. Set + side to 'back' to pull from back of queue. Side must be one of 'front' + or 'back'. + + See also `Index.push`. + + >>> index = Index() + >>> for letter in 'abc': + ... print(index.push(letter)) + 500000000000000 + 500000000000001 + 500000000000002 + >>> key, value = index.pull() + >>> print(key) + 500000000000000 + >>> value + 'a' + >>> _, value = index.pull(side='back') + >>> value + 'c' + >>> index.pull(prefix='fruit') + (None, None) + + :param str prefix: key prefix (default None, key is integer) + :param default: value to return if key is missing + (default (None, None)) + :param str side: either 'front' or 'back' (default 'front') + :return: key and value item pair or default if queue is empty + + """ + return self._cache.pull(prefix, default, side, retry=True) + + + def clear(self): + """Remove all items from index. + + >>> index = Index({'a': 0, 'b': 1, 'c': 2}) + >>> len(index) + 3 + >>> index.clear() + >>> dict(index) + {} + + """ + self._cache.clear(retry=True) + + + def __iter__(self): + """index.__iter__() <==> iter(index) + + Return iterator of index keys in insertion order. + + """ + return iter(self._cache) + + + def __reversed__(self): + """index.__reversed__() <==> reversed(index) + + Return iterator of index keys in reversed insertion order. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> iterator = reversed(index) + >>> next(iterator) + 'c' + >>> list(iterator) + ['b', 'a'] + + """ + return reversed(self._cache) + + + def __len__(self): + """index.__len__() <==> len(index) + + Return length of index. + + """ + return len(self._cache) + + + if sys.hexversion < 0x03000000: + def keys(self): + """List of index keys. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> index.keys() + ['a', 'b', 'c'] + + :return: list of keys + + """ + return list(self._cache) + + + def values(self): + """List of index values. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> index.values() + [1, 2, 3] + + :return: list of values + + """ + return list(self.itervalues()) + + + def items(self): + """List of index items. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> index.items() + [('a', 1), ('b', 2), ('c', 3)] + + :return: list of items + + """ + return list(self.iteritems()) + + + def iterkeys(self): + """Iterator of index keys. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> list(index.iterkeys()) + ['a', 'b', 'c'] + + :return: iterator of keys + + """ + return iter(self._cache) + + + def itervalues(self): + """Iterator of index values. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> list(index.itervalues()) + [1, 2, 3] + + :return: iterator of values + + """ + _cache = self._cache + + for key in _cache: + while True: + try: + yield _cache[key] + except KeyError: + pass + break + + + def iteritems(self): + """Iterator of index items. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> list(index.iteritems()) + [('a', 1), ('b', 2), ('c', 3)] + + :return: iterator of items + + """ + _cache = self._cache + + for key in _cache: + while True: + try: + yield key, _cache[key] + except KeyError: + pass + break + + + def viewkeys(self): + """Set-like object providing a view of index keys. + + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> keys_view = index.viewkeys() + >>> 'b' in keys_view + True + + :return: keys view + + """ + return KeysView(self) + + + def viewvalues(self): + """Set-like object providing a view of index values. + + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> values_view = index.viewvalues() + >>> 2 in values_view + True + + :return: values view + + """ + return ValuesView(self) + + + def viewitems(self): + """Set-like object providing a view of index items. + + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> items_view = index.viewitems() + >>> ('b', 2) in items_view + True + + :return: items view + + """ + return ItemsView(self) + + + else: + def keys(self): + """Set-like object providing a view of index keys. + + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> keys_view = index.keys() + >>> 'b' in keys_view + True + + :return: keys view + + """ + return KeysView(self) + + + def values(self): + """Set-like object providing a view of index values. + + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> values_view = index.values() + >>> 2 in values_view + True + + :return: values view + + """ + return ValuesView(self) + + + def items(self): + """Set-like object providing a view of index items. + + >>> index = Index() + >>> index.update({'a': 1, 'b': 2, 'c': 3}) + >>> items_view = index.items() + >>> ('b', 2) in items_view + True + + :return: items view + + """ + return ItemsView(self) + + + __hash__ = None + + + def __getstate__(self): + return self.directory + + + def __setstate__(self, state): + self.__init__(state) + + + def __eq__(self, other): + """index.__eq__(other) <==> index == other + + Compare equality for index and `other`. + + Comparison to another index or ordered dictionary is + order-sensitive. Comparison to all other mappings is order-insensitive. + + >>> index = Index() + >>> pairs = [('a', 1), ('b', 2), ('c', 3)] + >>> index.update(pairs) + >>> from collections import OrderedDict + >>> od = OrderedDict(pairs) + >>> index == od + True + >>> index == {'c': 3, 'b': 2, 'a': 1} + True + + :param other: other mapping in equality comparison + :return: True if index equals other + + """ + if len(self) != len(other): + return False + + if isinstance(other, (Index, OrderedDict)): + alpha = ((key, self[key]) for key in self) + beta = ((key, other[key]) for key in other) + pairs = zip(alpha, beta) + return not any(a != x or b != y for (a, b), (x, y) in pairs) + else: + return all(self[key] == other.get(key, ENOVAL) for key in self) + + + def __ne__(self, other): + """index.__ne__(other) <==> index != other + + Compare inequality for index and `other`. + + Comparison to another index or ordered dictionary is + order-sensitive. Comparison to all other mappings is order-insensitive. + + >>> index = Index() + >>> index.update([('a', 1), ('b', 2), ('c', 3)]) + >>> from collections import OrderedDict + >>> od = OrderedDict([('c', 3), ('b', 2), ('a', 1)]) + >>> index != od + True + >>> index != {'a': 1, 'b': 2} + True + + :param other: other mapping in inequality comparison + :return: True if index does not equal other + + """ + return not self == other + + + def memoize(self, name=None, typed=False): + """Memoizing cache decorator. + + Decorator to wrap callable with memoizing function using cache. + Repeated calls with the same arguments will lookup result in cache and + avoid function evaluation. + + If name is set to None (default), the callable name will be determined + automatically. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as + distinct calls with distinct results. + + The original underlying function is accessible through the __wrapped__ + attribute. This is useful for introspection, for bypassing the cache, + or for rewrapping the function with a different cache. + + >>> from diskcache import Index + >>> mapping = Index() + >>> @mapping.memoize() + ... def fibonacci(number): + ... if number == 0: + ... return 0 + ... elif number == 1: + ... return 1 + ... else: + ... return fibonacci(number - 1) + fibonacci(number - 2) + >>> print(fibonacci(100)) + 354224848179261915075 + + An additional `__cache_key__` attribute can be used to generate the + cache key used for the given arguments. + + >>> key = fibonacci.__cache_key__(100) + >>> print(mapping[key]) + 354224848179261915075 + + Remember to call memoize when decorating a callable. If you forget, + then a TypeError will occur. Note the lack of parenthenses after + memoize below: + + >>> @mapping.memoize + ... def test(): + ... pass + Traceback (most recent call last): + ... + TypeError: name cannot be callable + + :param str name: name given for callable (default None, automatic) + :param bool typed: cache different types separately (default False) + :return: callable decorator + + """ + return self._cache.memoize(name, typed) + + + @contextmanager + def transact(self): + """Context manager to perform a transaction by locking the index. + + While the index is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + + Transactions may be nested and may not be shared between threads. + + >>> from diskcache import Index + >>> mapping = Index() + >>> with mapping.transact(): # Atomically increment two keys. + ... mapping['total'] = mapping.get('total', 0) + 123.4 + ... mapping['count'] = mapping.get('count', 0) + 1 + >>> with mapping.transact(): # Atomically calculate average. + ... average = mapping['total'] / mapping['count'] + >>> average + 123.4 + + :return: context manager for use in `with` statement + + """ + with self._cache.transact(retry=True): + yield + + + def __repr__(self): + """index.__repr__() <==> repr(index) + + Return string with printable representation of index. + + """ + name = type(self).__name__ + return '{0}({1!r})'.format(name, self.directory) diff --git a/third_party/python/diskcache/diskcache/recipes.py b/third_party/python/diskcache/diskcache/recipes.py new file mode 100644 index 0000000000..fb6425090a --- /dev/null +++ b/third_party/python/diskcache/diskcache/recipes.py @@ -0,0 +1,437 @@ +"""Disk Cache Recipes + +""" + +import functools +import math +import os +import random +import sys +import threading +import time + +from .core import ENOVAL, args_to_key, full_name + +############################################################################ +# BEGIN Python 2/3 Shims +############################################################################ + +if sys.hexversion < 0x03000000: + from thread import get_ident # pylint: disable=import-error +else: + from threading import get_ident + +############################################################################ +# END Python 2/3 Shims +############################################################################ + + +class Averager(object): + """Recipe for calculating a running average. + + Sometimes known as "online statistics," the running average maintains the + total and count. The average can then be calculated at any time. + + >>> import diskcache + >>> cache = diskcache.FanoutCache() + >>> ave = Averager(cache, 'latency') + >>> ave.add(0.080) + >>> ave.add(0.120) + >>> ave.get() + 0.1 + >>> ave.add(0.160) + >>> ave.pop() + 0.12 + >>> print(ave.get()) + None + + """ + def __init__(self, cache, key, expire=None, tag=None): + self._cache = cache + self._key = key + self._expire = expire + self._tag = tag + + def add(self, value): + "Add `value` to average." + with self._cache.transact(retry=True): + total, count = self._cache.get(self._key, default=(0.0, 0)) + total += value + count += 1 + self._cache.set( + self._key, (total, count), expire=self._expire, tag=self._tag, + ) + + def get(self): + "Get current average or return `None` if count equals zero." + total, count = self._cache.get(self._key, default=(0.0, 0), retry=True) + return None if count == 0 else total / count + + def pop(self): + "Return current average and delete key." + total, count = self._cache.pop(self._key, default=(0.0, 0), retry=True) + return None if count == 0 else total / count + + +class Lock(object): + """Recipe for cross-process and cross-thread lock. + + >>> import diskcache + >>> cache = diskcache.Cache() + >>> lock = Lock(cache, 'report-123') + >>> lock.acquire() + >>> lock.release() + >>> with lock: + ... pass + + """ + def __init__(self, cache, key, expire=None, tag=None): + self._cache = cache + self._key = key + self._expire = expire + self._tag = tag + + def acquire(self): + "Acquire lock using spin-lock algorithm." + while True: + added = self._cache.add( + self._key, None, expire=self._expire, tag=self._tag, retry=True, + ) + if added: + break + time.sleep(0.001) + + def release(self): + "Release lock by deleting key." + self._cache.delete(self._key, retry=True) + + def __enter__(self): + self.acquire() + + def __exit__(self, *exc_info): + self.release() + + +class RLock(object): + """Recipe for cross-process and cross-thread re-entrant lock. + + >>> import diskcache + >>> cache = diskcache.Cache() + >>> rlock = RLock(cache, 'user-123') + >>> rlock.acquire() + >>> rlock.acquire() + >>> rlock.release() + >>> with rlock: + ... pass + >>> rlock.release() + >>> rlock.release() + Traceback (most recent call last): + ... + AssertionError: cannot release un-acquired lock + + """ + def __init__(self, cache, key, expire=None, tag=None): + self._cache = cache + self._key = key + self._expire = expire + self._tag = tag + + def acquire(self): + "Acquire lock by incrementing count using spin-lock algorithm." + pid = os.getpid() + tid = get_ident() + pid_tid = '{}-{}'.format(pid, tid) + + while True: + with self._cache.transact(retry=True): + value, count = self._cache.get(self._key, default=(None, 0)) + if pid_tid == value or count == 0: + self._cache.set( + self._key, (pid_tid, count + 1), + expire=self._expire, tag=self._tag, + ) + return + time.sleep(0.001) + + def release(self): + "Release lock by decrementing count." + pid = os.getpid() + tid = get_ident() + pid_tid = '{}-{}'.format(pid, tid) + + with self._cache.transact(retry=True): + value, count = self._cache.get(self._key, default=(None, 0)) + is_owned = pid_tid == value and count > 0 + assert is_owned, 'cannot release un-acquired lock' + self._cache.set( + self._key, (value, count - 1), + expire=self._expire, tag=self._tag, + ) + + def __enter__(self): + self.acquire() + + def __exit__(self, *exc_info): + self.release() + + +class BoundedSemaphore(object): + """Recipe for cross-process and cross-thread bounded semaphore. + + >>> import diskcache + >>> cache = diskcache.Cache() + >>> semaphore = BoundedSemaphore(cache, 'max-cons', value=2) + >>> semaphore.acquire() + >>> semaphore.acquire() + >>> semaphore.release() + >>> with semaphore: + ... pass + >>> semaphore.release() + >>> semaphore.release() + Traceback (most recent call last): + ... + AssertionError: cannot release un-acquired semaphore + + """ + def __init__(self, cache, key, value=1, expire=None, tag=None): + self._cache = cache + self._key = key + self._value = value + self._expire = expire + self._tag = tag + + def acquire(self): + "Acquire semaphore by decrementing value using spin-lock algorithm." + while True: + with self._cache.transact(retry=True): + value = self._cache.get(self._key, default=self._value) + if value > 0: + self._cache.set( + self._key, value - 1, + expire=self._expire, tag=self._tag, + ) + return + time.sleep(0.001) + + def release(self): + "Release semaphore by incrementing value." + with self._cache.transact(retry=True): + value = self._cache.get(self._key, default=self._value) + assert self._value > value, 'cannot release un-acquired semaphore' + value += 1 + self._cache.set( + self._key, value, expire=self._expire, tag=self._tag, + ) + + def __enter__(self): + self.acquire() + + def __exit__(self, *exc_info): + self.release() + + +def throttle(cache, count, seconds, name=None, expire=None, tag=None, + time_func=time.time, sleep_func=time.sleep): + """Decorator to throttle calls to function. + + >>> import diskcache, time + >>> cache = diskcache.Cache() + >>> count = 0 + >>> @throttle(cache, 2, 1) # 2 calls per 1 second + ... def increment(): + ... global count + ... count += 1 + >>> start = time.time() + >>> while (time.time() - start) <= 2: + ... increment() + >>> count in (6, 7) # 6 or 7 calls depending on CPU load + True + + """ + def decorator(func): + rate = count / float(seconds) + key = full_name(func) if name is None else name + now = time_func() + cache.set(key, (now, count), expire=expire, tag=tag, retry=True) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + while True: + with cache.transact(retry=True): + last, tally = cache.get(key) + now = time_func() + tally += (now - last) * rate + delay = 0 + + if tally > count: + cache.set(key, (now, count - 1), expire) + elif tally >= 1: + cache.set(key, (now, tally - 1), expire) + else: + delay = (1 - tally) / rate + + if delay: + sleep_func(delay) + else: + break + + return func(*args, **kwargs) + + return wrapper + + return decorator + + +def barrier(cache, lock_factory, name=None, expire=None, tag=None): + """Barrier to calling decorated function. + + Supports different kinds of locks: Lock, RLock, BoundedSemaphore. + + >>> import diskcache, time + >>> cache = diskcache.Cache() + >>> @barrier(cache, Lock) + ... def work(num): + ... print('worker started') + ... time.sleep(1) + ... print('worker finished') + >>> import multiprocessing.pool + >>> pool = multiprocessing.pool.ThreadPool(2) + >>> _ = pool.map(work, range(2)) + worker started + worker finished + worker started + worker finished + >>> pool.terminate() + + """ + def decorator(func): + key = full_name(func) if name is None else name + lock = lock_factory(cache, key, expire=expire, tag=tag) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with lock: + return func(*args, **kwargs) + + return wrapper + + return decorator + + +def memoize_stampede(cache, expire, name=None, typed=False, tag=None, beta=1): + """Memoizing cache decorator with cache stampede protection. + + Cache stampedes are a type of system overload that can occur when parallel + computing systems using memoization come under heavy load. This behaviour + is sometimes also called dog-piling, cache miss storm, cache choking, or + the thundering herd problem. + + The memoization decorator implements cache stampede protection through + early recomputation. Early recomputation of function results will occur + probabilistically before expiration in a background thread of + execution. Early probabilistic recomputation is based on research by + Vattani, A.; Chierichetti, F.; Lowenstein, K. (2015), Optimal Probabilistic + Cache Stampede Prevention, VLDB, pp. 886-897, ISSN 2150-8097 + + If name is set to None (default), the callable name will be determined + automatically. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as distinct + calls with distinct results. + + The original underlying function is accessible through the `__wrapped__` + attribute. This is useful for introspection, for bypassing the cache, or + for rewrapping the function with a different cache. + + >>> from diskcache import Cache + >>> cache = Cache() + >>> @memoize_stampede(cache, expire=1) + ... def fib(number): + ... if number == 0: + ... return 0 + ... elif number == 1: + ... return 1 + ... else: + ... return fib(number - 1) + fib(number - 2) + >>> print(fib(100)) + 354224848179261915075 + + An additional `__cache_key__` attribute can be used to generate the cache + key used for the given arguments. + + >>> key = fib.__cache_key__(100) + >>> del cache[key] + + Remember to call memoize when decorating a callable. If you forget, then a + TypeError will occur. + + :param cache: cache to store callable arguments and return values + :param float expire: seconds until arguments expire + :param str name: name given for callable (default None, automatic) + :param bool typed: cache different types separately (default False) + :param str tag: text to associate with arguments (default None) + :return: callable decorator + + """ + # Caution: Nearly identical code exists in Cache.memoize + def decorator(func): + "Decorator created by memoize call for callable." + base = (full_name(func),) if name is None else (name,) + + def timer(*args, **kwargs): + "Time execution of `func` and return result and time delta." + start = time.time() + result = func(*args, **kwargs) + delta = time.time() - start + return result, delta + + @functools.wraps(func) + def wrapper(*args, **kwargs): + "Wrapper for callable to cache arguments and return values." + key = wrapper.__cache_key__(*args, **kwargs) + pair, expire_time = cache.get( + key, default=ENOVAL, expire_time=True, retry=True, + ) + + if pair is not ENOVAL: + result, delta = pair + now = time.time() + ttl = expire_time - now + + if (-delta * beta * math.log(random.random())) < ttl: + return result # Cache hit. + + # Check whether a thread has started for early recomputation. + + thread_key = key + (ENOVAL,) + thread_added = cache.add( + thread_key, None, expire=delta, retry=True, + ) + + if thread_added: + # Start thread for early recomputation. + def recompute(): + with cache: + pair = timer(*args, **kwargs) + cache.set( + key, pair, expire=expire, tag=tag, retry=True, + ) + thread = threading.Thread(target=recompute) + thread.daemon = True + thread.start() + + return result + + pair = timer(*args, **kwargs) + cache.set(key, pair, expire=expire, tag=tag, retry=True) + return pair[0] + + def __cache_key__(*args, **kwargs): + "Make key for cache given function arguments." + return args_to_key(base, args, kwargs, typed) + + wrapper.__cache_key__ = __cache_key__ + return wrapper + + return decorator |