updates in oracc link; reading project that failed
This commit is contained in:
@@ -32,7 +32,7 @@ def _load_json_from_path(json_path: str) -> Dict:
|
||||
return json.load(json_file)
|
||||
|
||||
|
||||
def get_raw_english_texts_of_project(project_dirname: str) -> List[Dict]:
|
||||
def get_raw_english_texts_of_project(project_dirname: str, oracc_site: str = 'oracc.museum.upenn.edu') -> List[Dict]:
|
||||
raw_jsons = list()
|
||||
all_paths = glob.glob(f'jsons_unzipped/{project_dirname}/**/corpusjson/*.json', recursive=True)
|
||||
# path = Path(os.path.join(JSONS_DIR, project_dirname, 'catalogue.json'))
|
||||
@@ -51,7 +51,7 @@ def get_raw_english_texts_of_project(project_dirname: str) -> List[Dict]:
|
||||
|
||||
# id_text = member.get('id_text', "") + member.get('id_composite', "")
|
||||
# html_dir = "/".join(path.parts[1:-1])
|
||||
url = f"http://oracc.iaas.upenn.edu/{project_name}/{cur_json['textid']}/html"
|
||||
url = f"http://{oracc_site}/{project_name}/{cur_json['textid']}/html"
|
||||
# print(url)
|
||||
logging.info(url)
|
||||
try:
|
||||
|
Reference in New Issue
Block a user