Files
DH/project_notebook.ipynb

3.8 KiB

Imported modules

In [1]:
import json
import scrapping
import psycopg2
from psycopg2.extras import execute_batch
import re
import logging
import datetime
logging.basicConfig(
    level=logging.INFO, filename=f'{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.log', filemode='w',
    format='%(name)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')
# from psycopg2.extensions import register_adapter
In [28]:
conn = psycopg2.connect("dbname='dh' user='dh' host='dh.saret.tk' password='qwerty'")
connection = conn.cursor()
with open('project_list') as f:
    for project in f.read().split('\n'):
        # connection = connection.execute("insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)", scrap)
        try:
            scrap = scrapping.get_raw_english_texts_of_project(project)
        except Exception:
            logging.error(f"Error in {project}:{Exception}")
        try:
            execute_batch(
                connection,
                "insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s) ON CONFLICT DO NOTHING",
                scrap)
            conn.commit()
        except Exception:
            with open(project, 'w') as f:
                f.write(json.dumps(scrap))
            logging.error(f"Error in {project}:{Exception}")
In [33]:
conn = psycopg2.connect("dbname='dh' user='dh' host='dh.saret.tk' password='qwerty'")
connection = conn.cursor()
logging.basicConfig(
    level=logging.INFO, filename=f'./now.log',
    format='%(name)s - %(levelname)s - %(message)s')
logging.info('Start')
with open('project_list') as f:
    # project = f.read().split('\n')[2]
    scrap = scrapping.get_raw_english_texts_of_project('contrib/amarna')
    execute_batch(
        connection, "insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s) ON CONFLICT DO NOTHING",
        scrap)
    conn.commit()
In [25]:
with open('cams') as file:
    conn = psycopg2.connect("dbname='dh' user='dh' host='dh.saret.tk' password='qwerty'")
    connection = conn.cursor()
    execute_batch(
        connection, "insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s) ON CONFLICT DO NOTHING",
        json.load(file))
    conn.commit()
In [ ]:
 
In [ ]: