import sys
import re
import json
import hashlib

from ..console_write import console_write


class CachingDownloader(object):
    """
    A base downloader that will use a caching backend to cache HTTP requests
    and make conditional requests.
    """

    def add_conditional_headers(self, url, headers):
        """
        Add `If-Modified-Since` and `If-None-Match` headers to a request if a
        cached copy exists

        :param headers:
            A dict with the request headers

        :return:
            The request headers dict, possibly with new headers added
        """

        if not self.settings.get('cache'):
            return headers

        info_key = self.generate_key(url, '.info')
        info_json = self.settings['cache'].get(info_key)

        if not info_json:
            return headers

        # Make sure we have the cached content to use if we get a 304
        key = self.generate_key(url)
        if not self.settings['cache'].has(key):
            return headers

        try:
            info = json.loads(info_json.decode('utf-8'))
        except ValueError:
            return headers

        etag = info.get('etag')
        if etag:
            headers['If-None-Match'] = etag

        last_modified = info.get('last-modified')
        if last_modified:
            headers['If-Modified-Since'] = last_modified

        return headers

    def cache_result(self, method, url, status, headers, content):
        """
        Processes a request result, either caching the result, or returning
        the cached version of the url.

        :param method:
            The HTTP method used for the request

        :param url:
            The url of the request

        :param status:
            The numeric response status of the request

        :param headers:
            A dict of reponse headers, with keys being lowercase

        :param content:
            The response content

        :return:
            The response content
        """

        debug = self.settings.get('debug', False)

        if not self.settings.get('cache'):
            if debug:
                console_write(u"Skipping cache since there is no cache object", True)
            return content

        if method.lower() != 'get':
            if debug:
                console_write(u"Skipping cache since the HTTP method != GET", True)
            return content

        status = int(status)

        # Don't do anything unless it was successful or not modified
        if status not in [200, 304]:
            if debug:
                console_write(u"Skipping cache since the HTTP status code not one of: 200, 304", True)
            return content

        key = self.generate_key(url)

        if status == 304:
            cached_content = self.settings['cache'].get(key)
            if cached_content:
                if debug:
                    console_write(u"Using cached content for %s" % url, True)
                return cached_content

            # If we got a 304, but did not have the cached content
            # stop here so we don't cache an empty response
            return content

        # If we got here, the status is 200

        # Respect some basic cache control headers
        cache_control = headers.get('cache-control', '')
        if cache_control:
            fields = re.split(',\s*', cache_control)
            for field in fields:
                if field == 'no-store':
                    return content

        # Don't ever cache zip/binary files for the sake of hard drive space
        if headers.get('content-type') in ['application/zip', 'application/octet-stream']:
            if debug:
                console_write(u"Skipping cache since the response is a zip file", True)
            return content

        etag = headers.get('etag')
        last_modified = headers.get('last-modified')

        if not etag and not last_modified:
            return content

        struct = {'etag': etag, 'last-modified': last_modified}
        struct_json = json.dumps(struct, indent=4)

        info_key = self.generate_key(url, '.info')
        if debug:
            console_write(u"Caching %s in %s" % (url, key), True)

        self.settings['cache'].set(info_key, struct_json.encode('utf-8'))
        self.settings['cache'].set(key, content)

        return content

    def generate_key(self, url, suffix=''):
        """
        Generates a key to store the cache under

        :param url:
            The URL being cached

        :param suffix:
            A string to append to the key

        :return:
            A string key for the URL
        """

        if sys.version_info >= (3,) or isinstance(url, unicode):
            url = url.encode('utf-8')

        key = hashlib.md5(url).hexdigest()
        return key + suffix

    def retrieve_cached(self, url):
        """
        Tries to return the cached content for a URL

        :param url:
            The URL to get the cached content for

        :return:
            The cached content
        """

        key = self.generate_key(url)
        if not self.settings['cache'].has(key):
            return False

        if self.settings.get('debug'):
            console_write(u"Using cached content for %s" % url, True)

        return self.settings['cache'].get(key)