84 lines
2.4 KiB
Python
84 lines
2.4 KiB
Python
import re
|
|
import os
|
|
import base64
|
|
|
|
try:
|
|
# Python 3
|
|
from urllib.parse import urlencode
|
|
except (ImportError):
|
|
# Python 2
|
|
from urllib import urlencode
|
|
|
|
from .json_api_client import JSONApiClient
|
|
from ..downloaders.downloader_exception import DownloaderException
|
|
|
|
|
|
# Used to map file extensions to formats
|
|
_readme_formats = {
|
|
'.md': 'markdown',
|
|
'.mkd': 'markdown',
|
|
'.mdown': 'markdown',
|
|
'.markdown': 'markdown',
|
|
'.textile': 'textile',
|
|
'.creole': 'creole',
|
|
'.rst': 'rst'
|
|
}
|
|
|
|
|
|
class ReadmeClient(JSONApiClient):
|
|
|
|
def readme_info(self, url):
|
|
"""
|
|
Retrieve the readme and info about it
|
|
|
|
:param url:
|
|
The URL of the readme file
|
|
|
|
:raises:
|
|
DownloaderException: if there is an error downloading the readme
|
|
ClientException: if there is an error parsing the response
|
|
|
|
:return:
|
|
A dict with the following keys:
|
|
`filename`
|
|
`format` - `markdown`, `textile`, `creole`, `rst` or `txt`
|
|
`contents` - contents of the readme as str/unicode
|
|
"""
|
|
|
|
contents = None
|
|
|
|
# Try to grab the contents of a GitHub-based readme by grabbing the cached
|
|
# content of the readme API call
|
|
github_match = re.match('https://raw.github.com/([^/]+/[^/]+)/([^/]+)/readme(\.(md|mkd|mdown|markdown|textile|creole|rst|txt))?$', url, re.I)
|
|
if github_match:
|
|
user_repo = github_match.group(1)
|
|
branch = github_match.group(2)
|
|
|
|
query_string = urlencode({'ref': branch})
|
|
readme_json_url = 'https://api.github.com/repos/%s/readme?%s' % (user_repo, query_string)
|
|
try:
|
|
info = self.fetch_json(readme_json_url, prefer_cached=True)
|
|
contents = base64.b64decode(info['content'])
|
|
except (ValueError) as e:
|
|
pass
|
|
|
|
if not contents:
|
|
contents = self.fetch(url)
|
|
|
|
basename, ext = os.path.splitext(url)
|
|
format = 'txt'
|
|
ext = ext.lower()
|
|
if ext in _readme_formats:
|
|
format = _readme_formats[ext]
|
|
|
|
try:
|
|
contents = contents.decode('utf-8')
|
|
except (UnicodeDecodeError) as e:
|
|
contents = contents.decode('cp1252', errors='replace')
|
|
|
|
return {
|
|
'filename': os.path.basename(url),
|
|
'format': format,
|
|
'contents': contents
|
|
}
|