Package cherrypy :: Package lib :: Module caching
[hide private]
[frames] | no frames]

Source Code for Module cherrypy.lib.caching

  1  """ 
  2  CherryPy implements a simple caching system as a pluggable Tool. This tool 
  3  tries to be an (in-process) HTTP/1.1-compliant cache. It's not quite there 
  4  yet, but it's probably good enough for most sites. 
  5   
  6  In general, GET responses are cached (along with selecting headers) and, if 
  7  another request arrives for the same resource, the caching Tool will return 304 
  8  Not Modified if possible, or serve the cached response otherwise. It also sets 
  9  request.cached to True if serving a cached representation, and sets 
 10  request.cacheable to False (so it doesn't get cached again). 
 11   
 12  If POST, PUT, or DELETE requests are made for a cached resource, they 
 13  invalidate (delete) any cached response. 
 14   
 15  Usage 
 16  ===== 
 17   
 18  Configuration file example:: 
 19   
 20      [/] 
 21      tools.caching.on = True 
 22      tools.caching.delay = 3600 
 23   
 24  You may use a class other than the default 
 25  :class:`MemoryCache<cherrypy.lib.caching.MemoryCache>` by supplying the config 
 26  entry ``cache_class``; supply the full dotted name of the replacement class 
 27  as the config value. It must implement the basic methods ``get``, ``put``, 
 28  ``delete``, and ``clear``. 
 29   
 30  You may set any attribute, including overriding methods, on the cache 
 31  instance by providing them in config. The above sets the 
 32  :attr:`delay<cherrypy.lib.caching.MemoryCache.delay>` attribute, for example. 
 33  """ 
 34   
 35  import datetime 
 36  import sys 
 37  import threading 
 38  import time 
 39   
 40  import cherrypy 
 41  from cherrypy.lib import cptools, httputil 
 42  from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted, Event 
 43   
 44   
45 -class Cache(object):
46 47 """Base class for Cache implementations.""" 48
49 - def get(self):
50 """Return the current variant if in the cache, else None.""" 51 raise NotImplemented
52
53 - def put(self, obj, size):
54 """Store the current variant in the cache.""" 55 raise NotImplemented
56
57 - def delete(self):
58 """Remove ALL cached variants of the current resource.""" 59 raise NotImplemented
60
61 - def clear(self):
62 """Reset the cache to its initial, empty state.""" 63 raise NotImplemented
64 65 66 # ------------------------------ Memory Cache ------------------------------- #
67 -class AntiStampedeCache(dict):
68 69 """A storage system for cached items which reduces stampede collisions.""" 70
71 - def wait(self, key, timeout=5, debug=False):
72 """Return the cached value for the given key, or None. 73 74 If timeout is not None, and the value is already 75 being calculated by another thread, wait until the given timeout has 76 elapsed. If the value is available before the timeout expires, it is 77 returned. If not, None is returned, and a sentinel placed in the cache 78 to signal other threads to wait. 79 80 If timeout is None, no waiting is performed nor sentinels used. 81 """ 82 value = self.get(key) 83 if isinstance(value, Event): 84 if timeout is None: 85 # Ignore the other thread and recalc it ourselves. 86 if debug: 87 cherrypy.log('No timeout', 'TOOLS.CACHING') 88 return None 89 90 # Wait until it's done or times out. 91 if debug: 92 cherrypy.log('Waiting up to %s seconds' % 93 timeout, 'TOOLS.CACHING') 94 value.wait(timeout) 95 if value.result is not None: 96 # The other thread finished its calculation. Use it. 97 if debug: 98 cherrypy.log('Result!', 'TOOLS.CACHING') 99 return value.result 100 # Timed out. Stick an Event in the slot so other threads wait 101 # on this one to finish calculating the value. 102 if debug: 103 cherrypy.log('Timed out', 'TOOLS.CACHING') 104 e = threading.Event() 105 e.result = None 106 dict.__setitem__(self, key, e) 107 108 return None 109 elif value is None: 110 # Stick an Event in the slot so other threads wait 111 # on this one to finish calculating the value. 112 if debug: 113 cherrypy.log('Timed out', 'TOOLS.CACHING') 114 e = threading.Event() 115 e.result = None 116 dict.__setitem__(self, key, e) 117 return value
118
119 - def __setitem__(self, key, value):
120 """Set the cached value for the given key.""" 121 existing = self.get(key) 122 dict.__setitem__(self, key, value) 123 if isinstance(existing, Event): 124 # Set Event.result so other threads waiting on it have 125 # immediate access without needing to poll the cache again. 126 existing.result = value 127 existing.set()
128 129
130 -class MemoryCache(Cache):
131 132 """An in-memory cache for varying response content. 133 134 Each key in self.store is a URI, and each value is an AntiStampedeCache. 135 The response for any given URI may vary based on the values of 136 "selecting request headers"; that is, those named in the Vary 137 response header. We assume the list of header names to be constant 138 for each URI throughout the lifetime of the application, and store 139 that list in ``self.store[uri].selecting_headers``. 140 141 The items contained in ``self.store[uri]`` have keys which are tuples of 142 request header values (in the same order as the names in its 143 selecting_headers), and values which are the actual responses. 144 """ 145 146 maxobjects = 1000 147 """The maximum number of cached objects; defaults to 1000.""" 148 149 maxobj_size = 100000 150 """The maximum size of each cached object in bytes; defaults to 100 KB.""" 151 152 maxsize = 10000000 153 """The maximum size of the entire cache in bytes; defaults to 10 MB.""" 154 155 delay = 600 156 """Seconds until the cached content expires; defaults to 600 (10 minutes). 157 """ 158 159 antistampede_timeout = 5 160 """Seconds to wait for other threads to release a cache lock.""" 161 162 expire_freq = 0.1 163 """Seconds to sleep between cache expiration sweeps.""" 164 165 debug = False 166
167 - def __init__(self):
168 self.clear() 169 170 # Run self.expire_cache in a separate daemon thread. 171 t = threading.Thread(target=self.expire_cache, name='expire_cache') 172 self.expiration_thread = t 173 set_daemon(t, True) 174 t.start()
175
176 - def clear(self):
177 """Reset the cache to its initial, empty state.""" 178 self.store = {} 179 self.expirations = {} 180 self.tot_puts = 0 181 self.tot_gets = 0 182 self.tot_hist = 0 183 self.tot_expires = 0 184 self.tot_non_modified = 0 185 self.cursize = 0
186
187 - def expire_cache(self):
188 """Continuously examine cached objects, expiring stale ones. 189 190 This function is designed to be run in its own daemon thread, 191 referenced at ``self.expiration_thread``. 192 """ 193 # It's possible that "time" will be set to None 194 # arbitrarily, so we check "while time" to avoid exceptions. 195 # See tickets #99 and #180 for more information. 196 while time: 197 now = time.time() 198 # Must make a copy of expirations so it doesn't change size 199 # during iteration 200 for expiration_time, objects in copyitems(self.expirations): 201 if expiration_time <= now: 202 for obj_size, uri, sel_header_values in objects: 203 try: 204 del self.store[uri][tuple(sel_header_values)] 205 self.tot_expires += 1 206 self.cursize -= obj_size 207 except KeyError: 208 # the key may have been deleted elsewhere 209 pass 210 del self.expirations[expiration_time] 211 time.sleep(self.expire_freq)
212
213 - def get(self):
214 """Return the current variant if in the cache, else None.""" 215 request = cherrypy.serving.request 216 self.tot_gets += 1 217 218 uri = cherrypy.url(qs=request.query_string) 219 uricache = self.store.get(uri) 220 if uricache is None: 221 return None 222 223 header_values = [request.headers.get(h, '') 224 for h in uricache.selecting_headers] 225 variant = uricache.wait(key=tuple(sorted(header_values)), 226 timeout=self.antistampede_timeout, 227 debug=self.debug) 228 if variant is not None: 229 self.tot_hist += 1 230 return variant
231
232 - def put(self, variant, size):
233 """Store the current variant in the cache.""" 234 request = cherrypy.serving.request 235 response = cherrypy.serving.response 236 237 uri = cherrypy.url(qs=request.query_string) 238 uricache = self.store.get(uri) 239 if uricache is None: 240 uricache = AntiStampedeCache() 241 uricache.selecting_headers = [ 242 e.value for e in response.headers.elements('Vary')] 243 self.store[uri] = uricache 244 245 if len(self.store) < self.maxobjects: 246 total_size = self.cursize + size 247 248 # checks if there's space for the object 249 if (size < self.maxobj_size and total_size < self.maxsize): 250 # add to the expirations list 251 expiration_time = response.time + self.delay 252 bucket = self.expirations.setdefault(expiration_time, []) 253 bucket.append((size, uri, uricache.selecting_headers)) 254 255 # add to the cache 256 header_values = [request.headers.get(h, '') 257 for h in uricache.selecting_headers] 258 uricache[tuple(sorted(header_values))] = variant 259 self.tot_puts += 1 260 self.cursize = total_size
261
262 - def delete(self):
263 """Remove ALL cached variants of the current resource.""" 264 uri = cherrypy.url(qs=cherrypy.serving.request.query_string) 265 self.store.pop(uri, None)
266 267
268 -def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs):
269 """Try to obtain cached output. If fresh enough, raise HTTPError(304). 270 271 If POST, PUT, or DELETE: 272 * invalidates (deletes) any cached response for this resource 273 * sets request.cached = False 274 * sets request.cacheable = False 275 276 else if a cached copy exists: 277 * sets request.cached = True 278 * sets request.cacheable = False 279 * sets response.headers to the cached values 280 * checks the cached Last-Modified response header against the 281 current If-(Un)Modified-Since request headers; raises 304 282 if necessary. 283 * sets response.status and response.body to the cached values 284 * returns True 285 286 otherwise: 287 * sets request.cached = False 288 * sets request.cacheable = True 289 * returns False 290 """ 291 request = cherrypy.serving.request 292 response = cherrypy.serving.response 293 294 if not hasattr(cherrypy, "_cache"): 295 # Make a process-wide Cache object. 296 cherrypy._cache = kwargs.pop("cache_class", MemoryCache)() 297 298 # Take all remaining kwargs and set them on the Cache object. 299 for k, v in kwargs.items(): 300 setattr(cherrypy._cache, k, v) 301 cherrypy._cache.debug = debug 302 303 # POST, PUT, DELETE should invalidate (delete) the cached copy. 304 # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10. 305 if request.method in invalid_methods: 306 if debug: 307 cherrypy.log('request.method %r in invalid_methods %r' % 308 (request.method, invalid_methods), 'TOOLS.CACHING') 309 cherrypy._cache.delete() 310 request.cached = False 311 request.cacheable = False 312 return False 313 314 if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]: 315 request.cached = False 316 request.cacheable = True 317 return False 318 319 cache_data = cherrypy._cache.get() 320 request.cached = bool(cache_data) 321 request.cacheable = not request.cached 322 if request.cached: 323 # Serve the cached copy. 324 max_age = cherrypy._cache.delay 325 for v in [e.value for e in request.headers.elements('Cache-Control')]: 326 atoms = v.split('=', 1) 327 directive = atoms.pop(0) 328 if directive == 'max-age': 329 if len(atoms) != 1 or not atoms[0].isdigit(): 330 raise cherrypy.HTTPError( 331 400, "Invalid Cache-Control header") 332 max_age = int(atoms[0]) 333 break 334 elif directive == 'no-cache': 335 if debug: 336 cherrypy.log( 337 'Ignoring cache due to Cache-Control: no-cache', 338 'TOOLS.CACHING') 339 request.cached = False 340 request.cacheable = True 341 return False 342 343 if debug: 344 cherrypy.log('Reading response from cache', 'TOOLS.CACHING') 345 s, h, b, create_time = cache_data 346 age = int(response.time - create_time) 347 if (age > max_age): 348 if debug: 349 cherrypy.log('Ignoring cache due to age > %d' % max_age, 350 'TOOLS.CACHING') 351 request.cached = False 352 request.cacheable = True 353 return False 354 355 # Copy the response headers. See 356 # https://bitbucket.org/cherrypy/cherrypy/issue/721. 357 response.headers = rh = httputil.HeaderMap() 358 for k in h: 359 dict.__setitem__(rh, k, dict.__getitem__(h, k)) 360 361 # Add the required Age header 362 response.headers["Age"] = str(age) 363 364 try: 365 # Note that validate_since depends on a Last-Modified header; 366 # this was put into the cached copy, and should have been 367 # resurrected just above (response.headers = cache_data[1]). 368 cptools.validate_since() 369 except cherrypy.HTTPRedirect: 370 x = sys.exc_info()[1] 371 if x.status == 304: 372 cherrypy._cache.tot_non_modified += 1 373 raise 374 375 # serve it & get out from the request 376 response.status = s 377 response.body = b 378 else: 379 if debug: 380 cherrypy.log('request is not cached', 'TOOLS.CACHING') 381 return request.cached
382 383
384 -def tee_output():
385 """Tee response output to cache storage. Internal.""" 386 # Used by CachingTool by attaching to request.hooks 387 388 request = cherrypy.serving.request 389 if 'no-store' in request.headers.values('Cache-Control'): 390 return 391 392 def tee(body): 393 """Tee response.body into a list.""" 394 if ('no-cache' in response.headers.values('Pragma') or 395 'no-store' in response.headers.values('Cache-Control')): 396 for chunk in body: 397 yield chunk 398 return 399 400 output = [] 401 for chunk in body: 402 output.append(chunk) 403 yield chunk 404 405 # save the cache data 406 body = ntob('').join(output) 407 cherrypy._cache.put((response.status, response.headers or {}, 408 body, response.time), len(body))
409 410 response = cherrypy.serving.response 411 response.body = tee(response.body) 412 413
414 -def expires(secs=0, force=False, debug=False):
415 """Tool for influencing cache mechanisms using the 'Expires' header. 416 417 secs 418 Must be either an int or a datetime.timedelta, and indicates the 419 number of seconds between response.time and when the response should 420 expire. The 'Expires' header will be set to response.time + secs. 421 If secs is zero, the 'Expires' header is set one year in the past, and 422 the following "cache prevention" headers are also set: 423 424 * Pragma: no-cache 425 * Cache-Control': no-cache, must-revalidate 426 427 force 428 If False, the following headers are checked: 429 430 * Etag 431 * Last-Modified 432 * Age 433 * Expires 434 435 If any are already present, none of the above response headers are set. 436 437 """ 438 439 response = cherrypy.serving.response 440 headers = response.headers 441 442 cacheable = False 443 if not force: 444 # some header names that indicate that the response can be cached 445 for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'): 446 if indicator in headers: 447 cacheable = True 448 break 449 450 if not cacheable and not force: 451 if debug: 452 cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES') 453 else: 454 if debug: 455 cherrypy.log('request is cacheable', 'TOOLS.EXPIRES') 456 if isinstance(secs, datetime.timedelta): 457 secs = (86400 * secs.days) + secs.seconds 458 459 if secs == 0: 460 if force or ("Pragma" not in headers): 461 headers["Pragma"] = "no-cache" 462 if cherrypy.serving.request.protocol >= (1, 1): 463 if force or "Cache-Control" not in headers: 464 headers["Cache-Control"] = "no-cache, must-revalidate" 465 # Set an explicit Expires date in the past. 466 expiry = httputil.HTTPDate(1169942400.0) 467 else: 468 expiry = httputil.HTTPDate(response.time + secs) 469 if force or "Expires" not in headers: 470 headers["Expires"] = expiry
471