Source code for PyXWF.Cache

# File name: Cache.py
# This file is part of: pyxwf
#
# LICENSE
#
# The contents of this file are subject to the Mozilla Public License
# Version 1.1 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
# the License for the specific language governing rights and limitations
# under the License.
#
# Alternatively, the contents of this file may be used under the terms
# of the GNU General Public license (the  "GPL License"), in which case
# the provisions of GPL License are applicable instead of those above.
#
# FEEDBACK & QUESTIONS
#
# For feedback and questions about pyxwf please e-mail one of the
# authors named in the AUTHORS file.
########################################################################
"""
Caching framework for PyXWF. This is probably not as mature as we may need it
in the future, but it provides basic means for caching resources used by the
PyXWF instance.

It may, in the future, also be used to cache complete rendered pages.
"""
from __future__ import unicode_literals

import abc, functools, time, os, operator
import logging

from PyXWF.utils import threading, _F
import PyXWF.Nodes as Nodes
import PyXWF.Errors as Errors
import PyXWF.utils as utils

logging = logging.getLogger(__name__)

[docs]class Cachable(object): """ Represents an object which can reside in a cache. This class defines the interface neccessary for a cache entry to work properly. The following attributes are used (and reserved by) the caching framework on Cachables: * `_cache_lastaccess` -- timestamp of last access of the object via the cache. This is used as a metric of when an entry can be uncached if limits are reached. * `_cache_master` -- The :class:`Cache` instance holding the object. """ __metaclass__ = abc.ABCMeta def __init__(self): super(Cachable, self).__init__() self._cache_lastaccess = time.time() self._cache_master = None
[docs] def touch(self): """ Set the internal timestamp of last use to the current timestamp. This will also inform the master (if known) of the changed value for uncache metrics. """ self._cache_lastaccess = time.time() if self._cache_master is not None: self._cache_master._changed(self)
[docs] def uncache(self): """ Remove the object from the cache it is associated to. """ if self._cache_master is not None: self._cache_master._remove(self)
[docs] def propose_uncache(self): """ Set the timestamp of last use in the past so that uncaching of this object in case of reached limits is more likely than for other objects. """ # it must be something which is definetly in the past (a date which is # easier to find than a date which is definetly in the future. Greetings # to MTA folks). self._cache_lastaccess = 0
@staticmethod def _cache_key(self): return self._cache_lastaccess
[docs]class SubCache(object): """ The big master cache (:class:`Cache` instance) is subdivided into smaller parts, :class:`SubCache` instances, which can be thought of namespaces inside the cache. They're used to separate different kinds of cachable objects, which can be handled by different `SubCache`-derived classes. See :class:`FileSourcedCache` as an example for optimizations possible using this. SubCaches provide dictionary-like access to cachables, associating *keys* to the cachables. An object can be added to the cache by simply assigning a key to it. Objects can also be uncached by using the `del` operator. The `in` operator and the `len` function work properly. """ def __init__(self, cache): self.site = cache.site self.master = cache self.entries = {} self.reversemap = {} self._lookuplock = threading.RLock() def _kill(self, cachable): """ Delete a cache entry from the subcache. Do not call this method. Uncache entries using :meth:`remove()`, :meth:`Cachable.uncache()` or :meth:`Cache.remove()`. """ del self.entries[self.reversemap[cachable]] del self.reversemap[cachable]
[docs] def get(self, key, default=None): """ Try to get an object from the cache and return *default* (defaults to ``None``) if no object is associated with *key*. """ with self._lookuplock: try: return self[key] except KeyError: return default
[docs] def remove(self, cachable): """ Remove a cachable from the Cache. """ self.master.remove(cachable)
def __getitem__(self, key): cachable = self.entries[key] cachable.touch() return cachable def __setitem__(self, key, cachable): with self._lookuplock: if key in self: raise KeyError("Cache key already in use: {0}".format(key)) self.entries[key] = cachable self.reversemap[cachable] = key self.master._add(cachable) cachable._cache_master = self.master cachable._cache_subcache = self def __delitem__(self, key): with self._lookuplock: cachable = self.entries[key] cachable.uncache() def __contains__(self, key): with self._lookuplock: return key in self.entries def __len__(self): return len(self.entries)
[docs] def get_last_modified(self, key): """ Return the datetime representing the last modification of the cached content. The default implementation requests the cached element from the cache and queries the LastModified property. Derived classes may (and should!) provide mechanisms which can query the LastModified timestamp without completely loading an object. """ with self._lookuplock: return self[key].LastModified
[docs] def update(self, key): """ Call `update()` on the cache entry referenced by *key*, but only if *key* references a valid entry. Otherwise, nothing happens. This is used to ensure that cached entries are reloaded if they wouldn't be reloaded anyways on the next access. """ with self._lookuplock: try: entry = self.entries[key] except KeyError: return entry.threadsafe_update()
[docs]class Cache(object): """ Master object representing a full application cache. Objects are not directly stored in the :class:`Cache` instance but in sub-caches. The :class:`Cache` object provides dictionary-like access to sub caches. If no cache is associated with a certain key, a new raw :class:`SubCache` is created for that key. Specialized sub caches can be created using :meth:`specialized_cache`. """ def __init__(self, site, limit=0): self._lookuplock = threading.RLock() self._limitlock = threading.RLock() self.site = site self.subcaches = {} self._limit = 0 self.Limit = limit def _add(self, cachable): """ Add a cachable. Do not call this directly. Only used for bookkeeping. """ with self._limitlock: if self._limit: self.entries.append(cachable) def _changed(self, entry): """ Resort the container keeping track of all entries to enforce cache limits. """ with self._limitlock: if not self._limit: return self.entries.sort() def _remove(self, cachable): """ Remove a cachable from the cache. This already holds the limit lock. """ if self._limit: # no need to resort here self.entries.remove(cachable) cachable._cache_subcache._kill(cachable) del cachable._cache_master del cachable._cache_subcache def __getitem__(self, key): with self._lookuplock: try: return self.subcaches[key] except KeyError: subcache = SubCache(self) self.subcaches[key] = subcache return subcache def __delitem__(self, key): with self._lookuplock: cache = self.subcaches[key] for entry in cache.entries.values(): entry.uncache() del self.subcaches[key]
[docs] def specialized_cache(self, key, cls, *args, **kwargs): """ Create a specialized subcache using the given class *cls* at the given *key*. Further arguments and keyword arguments are passed to the constructor of *cls*. Return the new *cls* instance. """ cache = cls(self, *args, **kwargs) with self._lookuplock: if key in self.subcaches: raise Errors.CacheConflict(key) self.subcaches[key] = cache return cache
[docs] def remove(self, cachable): """ Remove one entry from the cache. You can either use this or :meth:`CacheEntry.delete`, which does the same thing. """ with self._limitlock: self._remove(cachable)
[docs] def enforce_limit(self): """ Remove those entries with the oldest lastAccess from the cache. """ with self._limitlock: if not self._limit: return toomany = len(self.entries) - self._limit if toomany > 0: overflow = self.entries[:toomany] self.entries = self.entries[toomany:] for entry in overflow: logging.debug(_F("PURGE: {0}", entry)) entry._cache_subcache._kill(entry)
@property def Limit(self): """ How many cache entries are kept at max. This does not differentiate between different sub-caches and is a global hard-limit. If this limit is exceeded, old (i.e. not used for some time) entries are purged. Setting this limit to 0 will disable limiting. """ with self._limitlock: return self._limit @Limit.setter
[docs] def Limit(self, value): with self._limitlock: if value is None: value = 0 value = int(value) if value == self._limit: return if value < 0: raise ValueError("Cache limit must be non-negative.") if value == 0: del self.entries else: self.entries = [] for cache in self.subcaches.viewvalues(): self.entries.extend(cache.entries.viewvalues()) self.entries.sort(key=operator.attrgetter("_cache_lastaccess")) self.enforce_limit() self._limit = value logging.debug(_F("CONF: Limit now at {0}", value))
[docs]class FileSourcedCache(SubCache): """ This is an abstract baseclass for caches which are backed by files on the file system. The file names are used as keys relative to a given root directory *rootpath*. A deriving class has to implement the *_load* method which is called if a file accessed through this cache is not available in the cache. """ __metaclass__ = abc.ABCMeta def __init__(self, master, rootpath): super(FileSourcedCache, self).__init__(master) if rootpath is None: raise ValueError("rootpath must not be None") self.rootpath = rootpath @abc.abstractmethod def _load(self, path): """ Derived classes have to implement this method. It must return the loaded object behind *path* or raise. """ def _transform_key(self, key): return os.path.join(self.rootpath, key) def __getitem__(self, key, **kwargs): with self._lookuplock: path = self._transform_key(key) try: return super(FileSourcedCache, self).__getitem__(path) except KeyError: logging.debug(_F("MISS: {0}", path, self)) obj = self._load(path, **kwargs) super(FileSourcedCache, self).__setitem__(path, obj) return obj
[docs] def get_last_modified(self, key): """ In contrast to the implementation given in :class:`SubCache`, this implementation uses the timestamp of last modification of the file referenced by *key*. This implies that a resource is not neccessarily loaded (or even loadable!) even if a LastModified can be retrieved successfully. """ timestamp = utils.file_last_modified(self._transform_key(key)) if timestamp is None: raise Errors.ResourceLost(self._transform_key(key)) return timestamp
def update(self, key): super(FileSourcedCache, self).update(self._transform_key(key)) def __repr__(self): return "<FSCache {0}>".format(self.rootpath) __setitem__ = None