import re
import os
import base64

try:
    # Python 3
    from urllib.parse import urlencode
except (ImportError):
    # Python 2
    from urllib import urlencode

from .json_api_client import JSONApiClient
from ..downloaders.downloader_exception import DownloaderException


# Used to map file extensions to formats
_readme_formats = {
    '.md': 'markdown',
    '.mkd': 'markdown',
    '.mdown': 'markdown',
    '.markdown': 'markdown',
    '.textile': 'textile',
    '.creole': 'creole',
    '.rst': 'rst'
}


class ReadmeClient(JSONApiClient):

    def readme_info(self, url):
        """
        Retrieve the readme and info about it

        :param url:
            The URL of the readme file

        :raises:
            DownloaderException: if there is an error downloading the readme
            ClientException: if there is an error parsing the response

        :return:
            A dict with the following keys:
              `filename`
              `format` - `markdown`, `textile`, `creole`, `rst` or `txt`
              `contents` - contents of the readme as str/unicode
        """

        contents = None

        # Try to grab the contents of a GitHub-based readme by grabbing the cached
        # content of the readme API call
        github_match = re.match('https://raw.github.com/([^/]+/[^/]+)/([^/]+)/readme(\.(md|mkd|mdown|markdown|textile|creole|rst|txt))?$', url, re.I)
        if github_match:
            user_repo = github_match.group(1)
            branch = github_match.group(2)

            query_string = urlencode({'ref': branch})
            readme_json_url = 'https://api.github.com/repos/%s/readme?%s' % (user_repo, query_string)
            try:
                info = self.fetch_json(readme_json_url, prefer_cached=True)
                contents = base64.b64decode(info['content'])
            except (ValueError) as e:
                pass

        if not contents:
            contents = self.fetch(url)

        basename, ext = os.path.splitext(url)
        format = 'txt'
        ext = ext.lower()
        if ext in _readme_formats:
            format = _readme_formats[ext]

        try:
            contents = contents.decode('utf-8')
        except (UnicodeDecodeError) as e:
            contents = contents.decode('cp1252', errors='replace')

        return {
            'filename': os.path.basename(url),
            'format': format,
            'contents': contents
        }