updates in oracc link; reading project that failed

This commit is contained in:
2023-04-12 11:23:35 +03:00
parent 5b071fbac3
commit 15c9b56fd0
29 changed files with 7957 additions and 31 deletions

View File

@@ -21,6 +21,7 @@
"import re\n",
"import logging\n",
"import datetime\n",
"import os\n",
"logging.basicConfig(\n",
" level=logging.INFO, filename=f'{datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")}.log', filemode='w',\n",
" format='%(name)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')\n",
@@ -29,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
@@ -56,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -68,11 +69,14 @@
"logging.info('Start')\n",
"with open('project_list') as f:\n",
" # project = f.read().split('\\n')[2]\n",
" scrap = scrapping.get_raw_english_texts_of_project('contrib/amarna')\n",
" execute_batch(\n",
" connection, \"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s) ON CONFLICT DO NOTHING\",\n",
" scrap)\n",
" conn.commit()\n"
" for project in f.read().split('\\n'):\n",
" # connection = connection.execute(\"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)\", scrap)\n",
" if os.path.exists(project):\n",
" scrap = json.load(project)\n",
" execute_batch(\n",
" connection, \"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s) ON CONFLICT DO NOTHING\",\n",
" scrap)\n",
" conn.commit()\n"
]
},
{