Compare commits
42 Commits
f592236971
...
master
Author | SHA1 | Date | |
---|---|---|---|
8b8e15b082 | |||
8352a0a097 | |||
0e26118247 | |||
f8e1c4d062 | |||
46152eadbf | |||
01525451c7 | |||
be4e16ed35 | |||
5f91215acd | |||
|
1e4f87368e | ||
|
aad15a2a5a | ||
|
ee5983a7c5 | ||
|
acc006df1b | ||
|
31d2007bcb | ||
|
0b66f6cf1d | ||
|
e7e18c3300 | ||
|
cc8dfeea0d | ||
|
8f0dd858e2 | ||
|
0448b6c447 | ||
|
1b6d0d2129 | ||
|
78e9e7502a | ||
|
afe0eaf41d | ||
|
adad325c44 | ||
|
0118af822c | ||
|
f784ad9999 | ||
|
bab0735bf7 | ||
|
5af637c650 | ||
|
1545cdac8d | ||
|
fcdbfe86fa | ||
|
201626c66a | ||
|
5233079481 | ||
2735fb9ea2 | |||
09aa16dcc8 | |||
98d3d5994f | |||
03f1d663d0 | |||
826a100f24 | |||
db8244d902 | |||
|
26bbbe7d8c | ||
89ec3a3578 | |||
a9e93bd99f | |||
4aaeb48ffb | |||
218a3d8135 | |||
df548fa29d |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -1 +1,6 @@
|
||||
venv/*
|
||||
venv/*
|
||||
logs/*
|
||||
.vscode/*
|
||||
.ipynb_checkpoints/*
|
||||
__pycache__/*
|
||||
*.csv
|
86891
.ipynb_checkpoints/project_notebook-checkpoint.ipynb
Normal file
86891
.ipynb_checkpoints/project_notebook-checkpoint.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +0,0 @@
|
||||
root - INFO - http://oracc.iaas.upenn.edu/ario/Q006653/html
|
||||
root - INFO - http://oracc.iaas.upenn.edu/ario/Q006654/html
|
||||
root - INFO - http://oracc.iaas.upenn.edu/ario/Q006655/html
|
||||
root - INFO - http://oracc.iaas.upenn.edu/ario/Q007129/html
|
||||
root - INFO - http://oracc.iaas.upenn.edu/ario/Q007130/html
|
22399
2023-04-12_01-43-06.log
22399
2023-04-12_01-43-06.log
File diff suppressed because it is too large
Load Diff
@@ -1,447 +0,0 @@
|
||||
root - INFO - Start
|
||||
root - INFO - Start
|
||||
root - INFO - reading adsd
|
||||
root - INFO - reading aemw
|
||||
root - INFO - reading akklove
|
||||
root - INFO - reading asbp
|
||||
root - INFO - reading atae
|
||||
root - INFO - reading babcity
|
||||
root - INFO - reading blms
|
||||
root - INFO - reading btmao
|
||||
root - INFO - reading btto
|
||||
root - INFO - reading cams
|
||||
root - INFO - reading ccpo
|
||||
root - INFO - reading cdli
|
||||
root - INFO - reading ckst
|
||||
root - INFO - reading cmawro
|
||||
root - INFO - reading contrib
|
||||
root - INFO - reading dcclt
|
||||
root - INFO - reading dccmt
|
||||
root - INFO - reading doc
|
||||
root - INFO - reading dsst
|
||||
root - INFO - reading ecut
|
||||
root - INFO - reading eisl
|
||||
root - INFO - reading epsd2
|
||||
root - INFO - reading etcsri
|
||||
root - INFO - reading glass
|
||||
root - INFO - reading hbtin
|
||||
root - INFO - reading lacost
|
||||
root - INFO - reading lovelyrics
|
||||
root - INFO - reading neo
|
||||
root - INFO - reading nere
|
||||
root - INFO - reading nimrud
|
||||
root - INFO - reading obel
|
||||
root - INFO - reading obmc
|
||||
root - INFO - reading obta
|
||||
root - INFO - reading ogsl
|
||||
root - INFO - reading oimea
|
||||
root - INFO - reading pnao
|
||||
root - INFO - reading qcat
|
||||
root - INFO - reading riao
|
||||
root - INFO - reading ribo
|
||||
root - INFO - reading rimanum
|
||||
root - INFO - reading rinap
|
||||
root - INFO - reading saao
|
||||
root - INFO - reading suhu
|
||||
root - INFO - reading tcma
|
||||
root - INFO - reading tsae
|
||||
root - INFO - reading xcat
|
||||
sroot - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224395/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224403/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224417/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224431/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224433/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224447/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224485/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224487/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224587/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313416/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313417/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313425/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313427/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313435/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313437/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313439/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313447/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313458/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313487/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313491/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313497/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313502/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313505/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313509/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313511/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313523/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313527/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313543/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313551/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313559/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313571/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313598/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313600/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313614/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313623/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313626/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313627/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313629/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313644/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313648/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313660/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313677/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313684/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313699/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313719/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313722/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313726/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313742/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313748/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313755/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313759/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313762/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313807/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313815/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313832/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313854/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313864/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313871/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313874/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313876/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313878/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313879/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313885/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313897/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313904/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313907/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313915/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313919/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313923/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313938/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313947/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313974/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313975/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314001/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314003/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314022/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314026/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314030/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314032/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314048/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314051/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314054/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314056/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314134/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314144/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314157/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314211/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314223/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314227/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314232/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314238/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314243/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314248/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314257/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314260/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314272/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314273/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314275/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314282/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314287/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314297/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314316/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334036/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334037/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334038/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334039/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334040/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334041/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334042/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334043/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334044/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334045/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334046/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334047/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334048/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334049/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334050/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334051/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334053/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334054/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334055/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334056/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334078/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334079/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334080/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334081/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334082/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334083/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334100/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334101/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334102/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334113/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334118/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334120/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334124/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334125/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334127/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334135/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334136/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334141/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334142/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334143/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334144/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334158/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334160/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334165/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334166/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334170/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334171/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334172/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334173/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334174/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334175/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334176/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334190/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334193/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334194/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334195/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334209/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334210/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334211/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334212/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334213/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334214/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334271/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334272/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334273/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334284/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334288/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334298/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334314/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334317/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334328/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334329/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334330/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334331/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334332/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334333/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334334/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334335/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334336/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334359/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334372/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334384/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334385/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334390/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334394/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334396/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334397/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334403/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334422/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334432/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334435/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334442/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334443/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334444/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334445/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334496/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334499/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334512/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334519/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334520/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334567/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334568/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334586/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334587/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334588/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334592/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334598/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334610/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334621/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334631/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334632/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334634/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334643/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334644/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334658/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334665/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334667/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334676/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334687/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334689/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334693/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334699/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334709/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334715/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334718/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334721/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334727/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334728/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334729/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334739/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334773/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334774/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334776/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334789/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334791/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334792/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334794/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334804/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334807/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334810/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334820/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334826/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334830/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334834/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334849/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334864/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334895/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334903/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334904/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334910/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334912/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334918/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334922/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334923/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336167/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336172/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336595/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336596/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336597/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P393855/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P393866/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P428858/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/X010028/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P237185/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P240211/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P296062/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P314346/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P334814/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336039/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336040/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336126/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336216/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336217/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336218/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336220/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336317/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P500551/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/Q009186/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P223388/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P237351/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P238051/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P238321/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P238357/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P313430/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P313818/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334919/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334925/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334926/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334929/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334930/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334931/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334932/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336128/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336130/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336144/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336149/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336150/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336151/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336158/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336161/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336175/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336225/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336226/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336234/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336242/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336243/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336244/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336245/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336291/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336306/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336599/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336600/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336601/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336602/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336603/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336604/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336605/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336606/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336607/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336608/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336609/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336796/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P337164/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338360/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338383/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338404/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338675/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338681/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/Q009249/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/Q009250/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236925/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236926/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236927/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236928/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236929/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236937/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236943/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236955/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236956/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236960/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237009/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237018/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237027/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237030/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237053/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237081/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237119/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237127/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237128/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237168/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237173/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237190/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237191/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237203/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237208/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237209/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237210/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237212/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237213/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237222/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237224/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237231/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237355/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237358/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237360/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237361/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237362/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237363/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237364/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237365/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237366/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237367/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237369/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237370/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237371/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237372/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237373/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237374/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237376/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237377/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237378/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237380/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237383/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237386/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237387/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237405/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237410/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237412/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237413/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237416/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237417/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237423/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237435/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237443/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237449/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237461/html
|
||||
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237479/html
|
0
2023-05-15_21-45-30.log
Normal file
0
2023-05-15_21-45-30.log
Normal file
45
README.md
45
README.md
@@ -1,5 +1,48 @@
|
||||
# DH
|
||||
|
||||
This is the project for course {ENTERCOUSENUMBER} of Dr. Renana Keidar
|
||||
This is the project for course 33503 of Dr. Renana Keidar
|
||||
|
||||
Project, By Benny Saret
|
||||
|
||||
# דו"ח התקדמות
|
||||
|
||||
## מטרות
|
||||
מטרת הפרוייקט היא לייצר דרך למצוא קרבה או אינטראקסטואליות בין טקסטים שונים באכדית בין תקופות שונות, סוגות שונות ומרחקים גיאוגרפיים. [אינטרטקסטואליות](https://www.merriam-webster.com/dictionary/intertextually) הוא מונח המתאר מערכת קרבה וקשר בין טקסט מסויים לטקסטים אחרים, המשתמשים כחומר מצע, התכתבות, או ויכוח לאותו טקסט. את אותה קרבה ניתן לראות בעזרת מינוחים דומים, דימויים דומים, שיבוצי כתובים ועוד.
|
||||
|
||||
## נתוני מקור
|
||||
נתוני המקור כולם נלקחו מפרוייקט ORACC [The Open Richly Annotated Cuneiform Corpus](http://oracc.museum.upenn.edu/ "ORACC, (לקמן, אוראקק)"). פרוייקט זה, הוא הפרוייקט הגדול והמקיף ביותר של טקסטים בכתב יתדות, פתוחים ונגישים לשימוש לקהל הרחב, ולחוקרים מכול הסוגים. הנתונים מגיעים בפורמטי JSON,TEI,XML ו־HTML, ומתעדכנים בכול עת.
|
||||
בפרוייקט ישנם לא רק טקסטים באכדית, אלא גם טקסטים באוררטית, שומרית וכן גם טקסטים בשפות משולבות של איזורי סְפָֿר.
|
||||
|
||||
## אופן העבודה
|
||||
|
||||
### איסוף הנתונים
|
||||
<style>
|
||||
ul{
|
||||
align: right;
|
||||
direction: rtl;
|
||||
}
|
||||
li{
|
||||
align: right;
|
||||
direction: rtl;
|
||||
}
|
||||
</style>
|
||||
השלב הראשון בפרוייקט היה איסוף הנתונים מאוראקק. תת השלבים של האיסוף היו:
|
||||
1. הקמת נתונון לשמירה של המידע הנאסף. הנתונון שנבחר היה postgresql, נתונון יחסי המממש את שפת SQL.
|
||||
1. יצירת טבלאות להכנסת הנתונים. לשם כך נוצרו הטבלאות הבאות
|
||||
- סוגה: טבלא בשם סוגה (genre) שמרה בתוכה את הסוגה של כול טקסט, לפי קוד הטקסט. [Genre](https://dh.saret.tk/dh/api/ggenre)
|
||||
- פרוייקט: טבלא בשם פרוייקט (project) שמרה בתוכה את כלל שמות הפרוייקטים ותתי הפרוייקטים. טבלא זו נדרש בעיקר בשלב גרידת הטקסטים.[Project](https://dh.saret.tk/dh/api/gprojects)
|
||||
- תעתיק: טבלא בשם new כללה את התעתיק המפוצל לאכדית, יחד עם המזהה של הטקסט, על מנת להצמיד ביניהם בהמשך. [New](https://dh.saret.tk/dh/api/gnew)
|
||||
- תרגום: טבלא נוספת הייתה טבלא בשם raw_texts שמטרתה הייתה להחזיק את כלל התרגומים של הטקסטים. [Jsons](https://dh.saret.tk/dh/api/gjson)
|
||||
- ניתן לראות את כלל הקישרוים ב[קישורים](https://dh.saret.tk/dh/api/links)
|
||||
1. כתיבת קוד פייתון אשר יוריד את כלל המידע, ויכניס אותו לנתונון.
|
||||
|
||||
### עיבוד הנתונים
|
||||
השלב הבא, לאחר איסוף הנתונים, הוא שלב העיבוד. שלב זה היה יחסית מאתגר. לאחר חודשים שבהם ניסיתי להריץ מספר מודלים פשוטים כגון Word2Vec, TF-IDF, Doc2Vec ועוד, התקבלו תוצאות מוזרות, של קשרים שהתאימו רק בין טקסט לבין עצמו, התאמה של 1, והשאר, היו על התאמה של 0.
|
||||
|
||||
לאחר מספר חודשים של ניסיונות, ונטישות, פניתי לעזרת פורום פייסבוק בקבוצת MDLI, שם הציעו לי מחדש ללכת על מודלים פשוטים, ואף שלחו לי מספר קישורים מתוך medium ([TF-IDF Vectorizer scikit-learn](https://medium.com/@cmukesh8688/tf-idf-vectorizer-scikit-learn-dbc0244a911a) ו־[Understanding TF-IDF and Cosine Similarity for Recommendation Engine](https://medium.com/geekculture/understanding-tf-idf-and-cosine-similarity-for-recommendation-engine-64d8b51aa9f9) ), והייתה לי התקדמות במודל. ואולם, על אף שהצליחו לצאת לי תוצאות, לא הצלחתי לייצר גרף מהווקטורים הללו.
|
||||
|
||||
### הדגמת תוצאות
|
||||
שני טקסטים שנמצאו בעלי קרבה של כ־87% הם למשל, [P394767](http://oracc.iaas.upenn.edu/btto/P394767/html) ו־[P395011](http://oracc.iaas.upenn.edu/btto/P395011/html). לאחר בדיקה קצרה של הטקסטים הללו, גם לעיניים שלי, הם נראו דומים. ובאמת, שני הטקסטים הללו מגיעים מאותה רשימה קאנונית המכונה "House most high". באוראקק אין כול אזכור ש־P394767 הוא מתוך הרשימה ההיא, אך המודל מצא את הדמיון, והעלה זאת לבדו.
|
||||
|
||||
# סיכום
|
||||
בסופו של דבר, המודל הצליח להציג תוצאות טובות, אך עדיין לא מספקות. על כן, יש צורך בעבודה נוספת על המודל, ובפרט על הנתונים שהוכנסו למודל. כמו כן, יש צורך בעבודה על הגרף עצמו, ובפרט על הצגתו למשתמש באופן נוח וידידותי. המודל, והשיטה יכולים להוות התקדמות למחקר עתידי, לפיתוחו ולשימוש להבנת האכדית בצורה טובה יותר.
|
||||
|
Binary file not shown.
File diff suppressed because one or more lines are too long
147
datat.ipynb
Normal file
147
datat.ipynb
Normal file
@@ -0,0 +1,147 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sklearn\n",
|
||||
"import sklearn.model_selection\n",
|
||||
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n",
|
||||
"import pandas as pd\n",
|
||||
"import scipy\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"df_eng = pd.read_csv('raw_texts.csv')\n",
|
||||
"df_akk = pd.read_csv('new.csv')\n",
|
||||
"# akk_raw_train, akk_raw_test = sklearn.model_selection.train_test_split(df_akk, test_size=0.2, random_state=0)\n",
|
||||
"# eng_raw_train, eng_raw_test = sklearn.model_selection.train_test_split(df_eng, test_size=0.2, random_state=0)\n",
|
||||
"tf_vectorizer = TfidfVectorizer(analyzer='word')\n",
|
||||
"# tf_vectorizer.fit(akk_raw_train['Text'].to_list())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tf_vectorizer = TfidfVectorizer(analyzer='word')\n",
|
||||
"save_vect = tf_vectorizer.fit_transform(df_akk['Text'].dropna().to_list())\n",
|
||||
"# save_vect = tf_vectorizer.fit_transform(['The sun in the sky is bright', 'We can see the shining sun, the bright sun.'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tfidf_tokens = tf_vectorizer.get_feature_names_out()\n",
|
||||
"df_tfidfvect = pd.DataFrame(data=save_vect.toarray(), columns=tfidf_tokens)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_mat = tf_vectorizer.transform(df_akk['Text'].dropna().to_list())\n",
|
||||
"cc = cosine_similarity(save_vect,save_vect)\n",
|
||||
"bool_similarity = cc > 0.5\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"abcd = np.where((cc > 0.5)&( cc< 1))\n",
|
||||
"abcd[0].tofile(\"data.csv\", sep = \",\", format = \"%d\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using matplotlib backend: <object object at 0x00000212CB626CA0>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%matplotlib\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"f = sns.scatterplot(bool_similarity)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Project P394767\n",
|
||||
"Text x x x BAD₃-ku-ri-gal-zi x E₂ 44 ša₂ BAD₃-{d}su...\n",
|
||||
"Genre lexical\n",
|
||||
"Name: 4, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_akk.iloc[4,:]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@@ -1 +1 @@
|
||||
C:/Users/Saret/Neutral Folder/dh/
|
||||
C:/Users/Saret/WaitForIt/oracc/
|
@@ -1 +0,0 @@
|
||||
[]
|
4114
missing_list.txt
Normal file
4114
missing_list.txt
Normal file
File diff suppressed because it is too large
Load Diff
1
obel
1
obel
@@ -1 +0,0 @@
|
||||
[{"id_text": "P345452", "project_name": "obel", "raw_text": "Pure barge of the heavens, you are authoritative all on your own. Father Nanna, lord of Ur. Father Nanna, lord of the Eki\u0161nu\u014bal. Father Nanna, lord Dilimbabbar. Lord Nanna, foremost son of Enlil. When you float, when you float, When you appear authoritatively before your father, before Enlil, Father Nanna, when you appear authoritatively, when you raise your chest, When you appear authoritatively in your barge which is floating through the midst of heavens, Father Nanna, you, when you ride to your pure shrine, Father Nanna, when you float like a boat in a flood wave, When you float, when you float, you, when you float, When you float, when you pour out beer, you, when you float, When you pour out beer in a joyful mood, you, when you float, Father Nanna, when you tend to the ur cows and \u0161ar cows, Your father (Enlil) looks upon you with joyful eyes, and tends to you truly. Behold, he shines forth for the king; Enlil entrusted the sceptre of a lengthy reign to your hands. When you take care of lord Nudimmud, ... Having filled water into the .. canal ... Having filled water into the .. canal ... Having filled water into the Tigris, it is Nanna's. Having filled water into the Euphrates, it is Nanna's. Having filled water into canal and ditch for purification, they are Nanna's. Having filled the great marsh and the small marsh with water, they are Nanna's. An er\u0161ema song of Suen."}, {"id_text": "P355693", "project_name": "obel", "raw_text": "Oh my brother! ... Oh my brother! ... Oh my brother, son of Ga\u0161anmah! I lament for my brother, I lament, I lament in every way. I lament, the song of youthfulness I lament, in crying for the ... man She makes the woes plentyful, she makes the woes plentiful, standing up she makes the woes plentiful, young man, your mother makes the woes plentiful, your mother, Ninhursa\u014b, makes the woes plentiful. Our Princess in the Emah, the princess makes the woes plentiful. Atutur, the minister with hair hanging down makes the woes plentiful. My brother, you mother makes the woes plentiful. The palace of Ke\u0161 makes the woes plentiful. The brickwork of Iri\u0161ar makes the woes plentiful. The Emah of Adab makes the woes plentiful. The brickwork of Adab makes the woes plentiful. 'Where shall my son be handed over?' she is saying. 'Where shall my son, the Foolish One, be handed over?' she is saying. 'Where shall my son, the one I love, be handed over?' she is saying. The spouse calls out to her man. My brother, rise from your bed, may your mother rejoice over you. Your mother, Ga\u0161anhursa\u014b, may your mother rejoice over you. The en-priest, the lord, the great ruler of Adab may he rejoice over you. A\u0161irgi, the lord of Ke\u0161, may he rejoice over you. Atutur, in mourning, may she rejoice over you. Damgalnuna, of the Ema\u1e2b, the princess, may she rejoice over you. Lisin, the one of liver and heart, may she rejoice over you. ... ... ... ... ... Let me hear your sweet lips, let me hear your sweet voice let (my) heart be close to your good looks. Young man, do not let your mother, sit in tears, do not let your mother, Ninhursa\u014b, sit moaning, do not let Our Princess, sit (witnessing) your pain, do not let them do \"ua!\" Rise from your bed! Foolish One, do not let them do \"ua!\" Rise from your bed! The brother replied to his sister: My release, my sister, my release, Our Princess, my release, my sister, my release, Oh sister, do not speak so much, I am not one who can see. Our Princess, do not speak so much, I am not one who can see, My mother, Ga\u0161anmah, do not speak so much, I am not one who can see. In my bed, the dust of the netherworld, the ... lie with me. In my sleep, terror, the enemy sits with me. My sister, when I lie down and when I do not rise, my mother is the one who is anguished(?) over me, may I loosen the silah. Ga\u0161anhursa\u014b is the one who is anguished over me, may I loosen the simlah. My sister, stand up, give me my share, the estate of my father. My father made the woes over me plentiful, that be my share. Let my mother let her hair hang down for me, so that my ribs may lay down. May the bride whom my father (chose for me) measure grain for me, so that I may listen to it. Acquire a bed for me, (and recite) \"Its spirit is blown off.\" Set up the throne, seat the silah. Place the clothes on the throne, cover the simlah. Make funerary offerings, turn, accept them for me. Pour water into the libation pipe, and stir in the dust of the netherworld. Pour out the hot soup, let me drink its radiance. My sister, alas! Where ...? Our Princess ... Tears ... ... ... ... ... ..."}]
|
@@ -51,4 +51,4 @@ saao
|
||||
suhu
|
||||
tcma
|
||||
tsae
|
||||
xcat
|
||||
xcat
|
||||
|
86784
project_notebook.ipynb
86784
project_notebook.ipynb
File diff suppressed because it is too large
Load Diff
15
report.html
Normal file
15
report.html
Normal file
@@ -0,0 +1,15 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>דו"ח התקדמות</title>
|
||||
<style>
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<body dir="rtl">
|
||||
<h1>דו"ח התקדמות
|
||||
<h2>מטרות
|
||||
<p>מטרת הפרוייקט היא לייצר דרך למצוא קרבה או אינטראקסטואליות בין טקסטים שונים באכדית על מנת </p>
|
||||
</h2>
|
||||
</h1>
|
||||
</body>
|
||||
</html>
|
116
requirements.txt
Normal file
116
requirements.txt
Normal file
@@ -0,0 +1,116 @@
|
||||
aiofiles==22.1.0
|
||||
aiosqlite==0.18.0
|
||||
anyio==3.6.2
|
||||
argon2-cffi==21.3.0
|
||||
argon2-cffi-bindings==21.2.0
|
||||
arrow==1.2.3
|
||||
asttokens==2.2.1
|
||||
attrs==22.2.0
|
||||
autopep8==2.0.2
|
||||
Babel==2.12.1
|
||||
backcall==0.2.0
|
||||
bcrypt==4.0.1
|
||||
beautifulsoup4==4.12.2
|
||||
bleach==6.0.0
|
||||
bs4==0.0.1
|
||||
certifi==2022.12.7
|
||||
cffi==1.15.1
|
||||
charset-normalizer==3.1.0
|
||||
colorama==0.4.6
|
||||
comm==0.1.3
|
||||
cryptography==40.0.1
|
||||
debugpy==1.6.7
|
||||
decorator==5.1.1
|
||||
defusedxml==0.7.1
|
||||
entrypoints==0.4
|
||||
executing==1.2.0
|
||||
fastjsonschema==2.16.3
|
||||
fqdn==1.5.1
|
||||
idna==3.4
|
||||
importlib-metadata==6.3.0
|
||||
ipykernel==6.22.0
|
||||
ipython==8.12.0
|
||||
ipython-genutils==0.2.0
|
||||
isoduration==20.11.0
|
||||
jedi==0.18.2
|
||||
Jinja2==3.1.2
|
||||
json5==0.9.11
|
||||
jsonpointer==2.3
|
||||
jsonschema==4.17.3
|
||||
jupyter-contrib-core==0.4.2
|
||||
jupyter-contrib-nbextensions==0.7.0
|
||||
jupyter-events==0.6.3
|
||||
jupyter-highlight-selected-word==0.2.0
|
||||
jupyter-kite==2.0.2
|
||||
jupyter-latex-envs==1.4.6
|
||||
jupyter-nbextensions-configurator==0.6.1
|
||||
jupyter-ydoc==0.2.3
|
||||
jupyter_client==8.1.0
|
||||
jupyter_core==5.3.0
|
||||
jupyter_server==2.5.0
|
||||
jupyter_server_fileid==0.9.0
|
||||
jupyter_server_terminals==0.4.4
|
||||
jupyter_server_ydoc==0.8.0
|
||||
jupyterlab==3.6.3
|
||||
jupyterlab-execute-time==2.3.1
|
||||
jupyterlab-pygments==0.2.2
|
||||
jupyterlab_server==2.22.0
|
||||
lxml==4.9.2
|
||||
MarkupSafe==2.1.2
|
||||
matplotlib-inline==0.1.6
|
||||
mistune==2.0.5
|
||||
nbclassic==0.5.5
|
||||
nbclient==0.7.3
|
||||
nbconvert==7.3.1
|
||||
nbformat==5.8.0
|
||||
nest-asyncio==1.5.6
|
||||
notebook==6.5.4
|
||||
notebook_shim==0.2.2
|
||||
numpy==1.24.2
|
||||
packaging==23.0
|
||||
pandas==2.0.0
|
||||
pandocfilters==1.5.0
|
||||
paramiko==3.1.0
|
||||
parso==0.8.3
|
||||
pickleshare==0.7.5
|
||||
platformdirs==3.2.0
|
||||
prometheus-client==0.16.0
|
||||
prompt-toolkit==3.0.38
|
||||
psutil==5.9.4
|
||||
psycopg2==2.9.6
|
||||
pure-eval==0.2.2
|
||||
pycodestyle==2.10.0
|
||||
pycparser==2.21
|
||||
Pygments==2.15.0
|
||||
PyNaCl==1.5.0
|
||||
pyrsistent==0.19.3
|
||||
python-dateutil==2.8.2
|
||||
python-json-logger==2.0.7
|
||||
pytz==2023.3
|
||||
PyYAML==6.0
|
||||
pyzmq==25.0.2
|
||||
requests==2.28.2
|
||||
rfc3339-validator==0.1.4
|
||||
rfc3986-validator==0.1.1
|
||||
Send2Trash==1.8.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
soupsieve==2.4
|
||||
sshtunnel==0.4.0
|
||||
stack-data==0.6.2
|
||||
terminado==0.17.1
|
||||
tinycss2==1.2.1
|
||||
tomli==2.0.1
|
||||
tornado==6.2
|
||||
traitlets==5.9.0
|
||||
typing_extensions==4.5.0
|
||||
tzdata==2023.3
|
||||
uri-template==1.2.0
|
||||
urllib3==1.26.15
|
||||
wcwidth==0.2.6
|
||||
webcolors==1.13
|
||||
webencodings==0.5.1
|
||||
websocket-client==1.5.1
|
||||
y-py==0.5.9
|
||||
ypy-websocket==0.8.2
|
||||
zipp==3.15.0
|
7497
scrape.log
7497
scrape.log
File diff suppressed because it is too large
Load Diff
@@ -6,4 +6,4 @@ import psycopg2
|
||||
# conn = psycopg2.connect("dbname='dh' user='dh' host='dh.saret.tk' password='qwerty'")
|
||||
# return conn
|
||||
|
||||
def
|
||||
# def
|
43
scrapping.py
43
scrapping.py
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
from typing import Dict, List
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, ResultSet
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
@@ -32,9 +32,25 @@ def _load_json_from_path(json_path: str) -> Dict:
|
||||
return json.load(json_file)
|
||||
|
||||
|
||||
def _download_data_from_website(url: str) -> ResultSet:
|
||||
try:
|
||||
res = requests.get(url)
|
||||
soup = BeautifulSoup(res.text, "html.parser")
|
||||
return soup.find_all("span", {"class": "cell"})
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
return list()
|
||||
|
||||
|
||||
def _clean_raw_text(results: ResultSet) -> str:
|
||||
return " ".join(["".join([content if isinstance(content, str) else content.text
|
||||
for content in result.contents]) for result in results]).replace('\n', ' ')
|
||||
|
||||
|
||||
def get_raw_english_texts_of_project(project_dirname: str, oracc_site: str = 'oracc.museum.upenn.edu') -> List[Dict]:
|
||||
raw_jsons = list()
|
||||
all_paths = glob.glob(f'jsons_unzipped/{project_dirname}/**/corpusjson/*.json', recursive=True)
|
||||
all_paths = glob.glob(f'jsons_unzipped/{project_dirname}/**/corpusjson/*.json', recursive=True) + glob.glob(
|
||||
f'jsons_unzipped/{project_dirname}/corpusjson/*.json', recursive=True)
|
||||
# path = Path(os.path.join(JSONS_DIR, project_dirname, 'catalogue.json'))
|
||||
# if not os.path.isfile(path):
|
||||
# return raw_jsons
|
||||
@@ -43,8 +59,11 @@ def get_raw_english_texts_of_project(project_dirname: str, oracc_site: str = 'or
|
||||
# for member in d.get('members').values():
|
||||
for filename in all_paths:
|
||||
cur_json = _load_json_from_path(filename)
|
||||
project_name = cur_json['project']
|
||||
|
||||
try:
|
||||
project_name = cur_json['project']
|
||||
except TypeError:
|
||||
logging.error(f"Error in {filename}")
|
||||
continue
|
||||
# # Skip in case we are in saa project and the current sub project is not in neo-assyrian
|
||||
# if project_dirname == "saao" and project_name[-2:] not in SUB_PROJECTS_IN_NEO_ASS: # TODO: validate
|
||||
# continue
|
||||
@@ -55,12 +74,7 @@ def get_raw_english_texts_of_project(project_dirname: str, oracc_site: str = 'or
|
||||
# print(url)
|
||||
logging.info(url)
|
||||
try:
|
||||
res = requests.get(url)
|
||||
soup = BeautifulSoup(res.text, "html.parser")
|
||||
results = soup.find_all("span", {"class": "cell"})
|
||||
raw_text = " ".join(["".join([content if isinstance(content, str) else content.text
|
||||
for content in result.contents]) for result in results])
|
||||
raw_text = raw_text.replace('\n', ' ')
|
||||
raw_text = _clean_raw_text(_download_data_from_website(url))
|
||||
if raw_text:
|
||||
raw_jsons.append({
|
||||
"id_text": cur_json['textid'],
|
||||
@@ -128,13 +142,13 @@ def get_raw_akk_texts_of_project(project_dirname: str) -> List[Dict]:
|
||||
:return: A list of jsons containing the raw texts of the given project and basic metadata.
|
||||
"""
|
||||
raw_jsons = list()
|
||||
all_paths = glob.glob(f'jsons_unzipped/{project_dirname}/**/corpusjson/*.json', recursive=True)
|
||||
all_paths = glob.glob(f'jsons_unzipped/{project_dirname}/**/corpusjson/*.json', recursive=True)+glob.glob(
|
||||
f'jsons_unzipped/{project_dirname}/corpusjson/*.json', recursive=True)
|
||||
|
||||
for filename in all_paths:
|
||||
cur_json = _load_json_from_path(filename)
|
||||
|
||||
try:
|
||||
project_name = cur_json['project']
|
||||
sents_dicts = cur_json['cdl'][0]['cdl'][-1]['cdl']
|
||||
except Exception as e:
|
||||
print(f"In file {filename} failed because of {e}")
|
||||
@@ -143,8 +157,7 @@ def get_raw_akk_texts_of_project(project_dirname: str) -> List[Dict]:
|
||||
raw_text = get_raw_akk_text_from_json(sents_dicts)
|
||||
raw_jsons.append({
|
||||
"id_text": cur_json['textid'],
|
||||
"project_name": project_name,
|
||||
"raw_text": raw_text,
|
||||
"raw_text": raw_text
|
||||
})
|
||||
|
||||
# if not texts_jsons or not texts_jsons.get('members'):
|
||||
@@ -198,7 +211,7 @@ def _get_raw_text(json_dict: dict) -> str:
|
||||
raw_texts.extend(_get_raw_text(d['cdl']).split())
|
||||
elif _is_word(d): # If node represents a word
|
||||
if previous_ref != d.get('ref'): # If encountered new instance:
|
||||
cur_text = d['frag'] if d.get('frag') else d['f']['form']
|
||||
cur_text = d['f']['norm'] if d['f'].get('norm') else d['f']['form']
|
||||
raw_texts.append(cur_text + _get_addition(d))
|
||||
previous_ref = d.get('ref')
|
||||
|
||||
|
Reference in New Issue
Block a user