Files
Iristyle a000ce8acc feat(ST2.UtilPackages): bump up all packages
- Refresh PackageCache with latest versions of everything
2013-09-16 22:35:46 -04:00

186 lines
5.2 KiB
Python

import sys
import re
import json
import hashlib
from ..console_write import console_write
class CachingDownloader(object):
"""
A base downloader that will use a caching backend to cache HTTP requests
and make conditional requests.
"""
def add_conditional_headers(self, url, headers):
"""
Add `If-Modified-Since` and `If-None-Match` headers to a request if a
cached copy exists
:param headers:
A dict with the request headers
:return:
The request headers dict, possibly with new headers added
"""
if not self.settings.get('cache'):
return headers
info_key = self.generate_key(url, '.info')
info_json = self.settings['cache'].get(info_key)
if not info_json:
return headers
# Make sure we have the cached content to use if we get a 304
key = self.generate_key(url)
if not self.settings['cache'].has(key):
return headers
try:
info = json.loads(info_json.decode('utf-8'))
except ValueError:
return headers
etag = info.get('etag')
if etag:
headers['If-None-Match'] = etag
last_modified = info.get('last-modified')
if last_modified:
headers['If-Modified-Since'] = last_modified
return headers
def cache_result(self, method, url, status, headers, content):
"""
Processes a request result, either caching the result, or returning
the cached version of the url.
:param method:
The HTTP method used for the request
:param url:
The url of the request
:param status:
The numeric response status of the request
:param headers:
A dict of reponse headers, with keys being lowercase
:param content:
The response content
:return:
The response content
"""
debug = self.settings.get('debug', False)
if not self.settings.get('cache'):
if debug:
console_write(u"Skipping cache since there is no cache object", True)
return content
if method.lower() != 'get':
if debug:
console_write(u"Skipping cache since the HTTP method != GET", True)
return content
status = int(status)
# Don't do anything unless it was successful or not modified
if status not in [200, 304]:
if debug:
console_write(u"Skipping cache since the HTTP status code not one of: 200, 304", True)
return content
key = self.generate_key(url)
if status == 304:
cached_content = self.settings['cache'].get(key)
if cached_content:
if debug:
console_write(u"Using cached content for %s" % url, True)
return cached_content
# If we got a 304, but did not have the cached content
# stop here so we don't cache an empty response
return content
# If we got here, the status is 200
# Respect some basic cache control headers
cache_control = headers.get('cache-control', '')
if cache_control:
fields = re.split(',\s*', cache_control)
for field in fields:
if field == 'no-store':
return content
# Don't ever cache zip/binary files for the sake of hard drive space
if headers.get('content-type') in ['application/zip', 'application/octet-stream']:
if debug:
console_write(u"Skipping cache since the response is a zip file", True)
return content
etag = headers.get('etag')
last_modified = headers.get('last-modified')
if not etag and not last_modified:
return content
struct = {'etag': etag, 'last-modified': last_modified}
struct_json = json.dumps(struct, indent=4)
info_key = self.generate_key(url, '.info')
if debug:
console_write(u"Caching %s in %s" % (url, key), True)
self.settings['cache'].set(info_key, struct_json.encode('utf-8'))
self.settings['cache'].set(key, content)
return content
def generate_key(self, url, suffix=''):
"""
Generates a key to store the cache under
:param url:
The URL being cached
:param suffix:
A string to append to the key
:return:
A string key for the URL
"""
if sys.version_info >= (3,) or isinstance(url, unicode):
url = url.encode('utf-8')
key = hashlib.md5(url).hexdigest()
return key + suffix
def retrieve_cached(self, url):
"""
Tries to return the cached content for a URL
:param url:
The URL to get the cached content for
:return:
The cached content
"""
key = self.generate_key(url)
if not self.settings['cache'].has(key):
return False
if self.settings.get('debug'):
console_write(u"Using cached content for %s" % url, True)
return self.settings['cache'].get(key)