Files
DH/project_notebook.ipynb
2023-04-12 01:00:18 +03:00

2.2 KiB

Imported modules

In [16]:
import json
import scrapping
import psycopg2
from psycopg2.extras import execute_batch
import re
import logging
import datetime
logging.basicConfig(
    level=logging.INFO, filename=f'{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.log', filemode='w',
    format='%(name)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')
# from psycopg2.extensions import register_adapter
In [17]:
conn = psycopg2.connect("dbname='dh' user='dh' host='dh.saret.tk' password='qwerty'")
connection = conn.cursor()
with open('project_list') as f:
    for project in f.read().split('\n'):
        scrap = scrapping.get_raw_english_texts_of_project(project)
        # connection = connection.execute("insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)", scrap)
        try:
            execute_batch(connection, "insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)", scrap)
            conn.commit()
        except Exception:
            with open(project, 'w') as f:
                f.write(json.dumps(scrap))
            logging.error(f"Error in {project}:{Exception}")
In [ ]: