Compare commits

..

5 Commits

Author SHA1 Message Date
f592236971 failing in saao 2023-04-12 12:29:22 +03:00
faab62e768 almost done with scraping 2023-04-12 12:06:49 +03:00
15c9b56fd0 updates in oracc link; reading project that failed 2023-04-12 11:23:35 +03:00
5b071fbac3 solve problems with some projects 2023-04-12 09:57:50 +03:00
1236d91d9a update exceptions in projects 2023-04-12 08:24:03 +03:00
42 changed files with 20247 additions and 44 deletions

File diff suppressed because it is too large Load Diff

447
2023-04-12_12-01-00.log Normal file
View File

@@ -0,0 +1,447 @@
root - INFO - Start
root - INFO - Start
root - INFO - reading adsd
root - INFO - reading aemw
root - INFO - reading akklove
root - INFO - reading asbp
root - INFO - reading atae
root - INFO - reading babcity
root - INFO - reading blms
root - INFO - reading btmao
root - INFO - reading btto
root - INFO - reading cams
root - INFO - reading ccpo
root - INFO - reading cdli
root - INFO - reading ckst
root - INFO - reading cmawro
root - INFO - reading contrib
root - INFO - reading dcclt
root - INFO - reading dccmt
root - INFO - reading doc
root - INFO - reading dsst
root - INFO - reading ecut
root - INFO - reading eisl
root - INFO - reading epsd2
root - INFO - reading etcsri
root - INFO - reading glass
root - INFO - reading hbtin
root - INFO - reading lacost
root - INFO - reading lovelyrics
root - INFO - reading neo
root - INFO - reading nere
root - INFO - reading nimrud
root - INFO - reading obel
root - INFO - reading obmc
root - INFO - reading obta
root - INFO - reading ogsl
root - INFO - reading oimea
root - INFO - reading pnao
root - INFO - reading qcat
root - INFO - reading riao
root - INFO - reading ribo
root - INFO - reading rimanum
root - INFO - reading rinap
root - INFO - reading saao
root - INFO - reading suhu
root - INFO - reading tcma
root - INFO - reading tsae
root - INFO - reading xcat
sroot - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224395/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224403/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224417/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224431/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224433/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224447/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224485/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224487/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P224587/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313416/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313417/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313425/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313427/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313435/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313437/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313439/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313447/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313458/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313487/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313491/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313497/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313502/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313505/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313509/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313511/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313523/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313527/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313543/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313551/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313559/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313571/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313598/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313600/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313614/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313623/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313626/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313627/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313629/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313644/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313648/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313660/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313677/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313684/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313699/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313719/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313722/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313726/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313742/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313748/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313755/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313759/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313762/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313807/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313815/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313832/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313854/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313864/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313871/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313874/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313876/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313878/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313879/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313885/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313897/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313904/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313907/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313915/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313919/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313923/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313938/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313947/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313974/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P313975/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314001/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314003/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314022/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314026/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314030/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314032/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314048/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314051/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314054/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314056/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314134/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314144/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314157/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314211/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314223/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314227/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314232/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314238/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314243/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314248/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314257/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314260/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314272/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314273/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314275/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314282/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314287/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314297/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P314316/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334036/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334037/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334038/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334039/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334040/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334041/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334042/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334043/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334044/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334045/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334046/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334047/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334048/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334049/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334050/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334051/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334053/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334054/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334055/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334056/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334078/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334079/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334080/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334081/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334082/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334083/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334100/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334101/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334102/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334113/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334118/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334120/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334124/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334125/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334127/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334135/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334136/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334141/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334142/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334143/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334144/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334158/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334160/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334165/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334166/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334170/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334171/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334172/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334173/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334174/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334175/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334176/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334190/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334193/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334194/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334195/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334209/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334210/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334211/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334212/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334213/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334214/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334271/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334272/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334273/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334284/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334288/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334298/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334314/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334317/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334328/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334329/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334330/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334331/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334332/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334333/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334334/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334335/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334336/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334359/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334372/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334384/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334385/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334390/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334394/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334396/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334397/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334403/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334422/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334432/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334435/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334442/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334443/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334444/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334445/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334496/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334499/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334512/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334519/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334520/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334567/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334568/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334586/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334587/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334588/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334592/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334598/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334610/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334621/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334631/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334632/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334634/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334643/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334644/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334658/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334665/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334667/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334676/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334687/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334689/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334693/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334699/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334709/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334715/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334718/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334721/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334727/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334728/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334729/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334739/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334773/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334774/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334776/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334789/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334791/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334792/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334794/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334804/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334807/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334810/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334820/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334826/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334830/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334834/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334849/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334864/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334895/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334903/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334904/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334910/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334912/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334918/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334922/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P334923/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336167/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336172/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336595/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336596/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P336597/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P393855/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P393866/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/P428858/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa01/X010028/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P237185/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P240211/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P296062/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P314346/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P334814/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336039/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336040/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336126/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336216/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336217/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336218/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336220/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P336317/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/P500551/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa02/Q009186/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P223388/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P237351/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P238051/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P238321/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P238357/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P313430/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P313818/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334919/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334925/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334926/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334929/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334930/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334931/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P334932/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336128/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336130/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336144/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336149/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336150/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336151/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336158/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336161/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336175/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336225/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336226/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336234/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336242/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336243/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336244/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336245/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336291/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336306/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336599/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336600/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336601/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336602/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336603/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336604/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336605/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336606/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336607/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336608/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336609/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P336796/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P337164/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338360/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338383/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338404/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338675/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/P338681/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/Q009249/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa03/Q009250/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236925/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236926/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236927/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236928/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236929/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236937/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236943/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236955/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236956/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P236960/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237009/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237018/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237027/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237030/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237053/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237081/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237119/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237127/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237128/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237168/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237173/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237190/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237191/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237203/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237208/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237209/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237210/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237212/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237213/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237222/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237224/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237231/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237355/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237358/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237360/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237361/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237362/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237363/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237364/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237365/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237366/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237367/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237369/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237370/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237371/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237372/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237373/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237374/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237376/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237377/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237378/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237380/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237383/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237386/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237387/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237405/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237410/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237412/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237413/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237416/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237417/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237423/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237435/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237443/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237449/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237461/html
root - INFO - http://oracc.museum.upenn.edu/saao/saa04/P237479/html

Binary file not shown.

1
ccpo Normal file

File diff suppressed because one or more lines are too long

1
cdli Normal file

File diff suppressed because one or more lines are too long

1
ckst Normal file

File diff suppressed because one or more lines are too long

1
cmawro Normal file

File diff suppressed because one or more lines are too long

1
contrib Normal file
View File

@@ -0,0 +1 @@
[]

1
dcclt Normal file

File diff suppressed because one or more lines are too long

1
dccmt Normal file

File diff suppressed because one or more lines are too long

1
doc Normal file
View File

@@ -0,0 +1 @@
[]

1
dsst Normal file

File diff suppressed because one or more lines are too long

1
ecut Normal file

File diff suppressed because one or more lines are too long

1
eisl Normal file

File diff suppressed because one or more lines are too long

1
epsd2 Normal file
View File

@@ -0,0 +1 @@
[]

1
etcsri Normal file

File diff suppressed because one or more lines are too long

1
glass Normal file

File diff suppressed because one or more lines are too long

1
hbtin Normal file

File diff suppressed because one or more lines are too long

1
lacost Normal file

File diff suppressed because one or more lines are too long

1
lovelyrics Normal file
View File

@@ -0,0 +1 @@
[]

1
neo Normal file
View File

@@ -0,0 +1 @@
[]

1
nere Normal file

File diff suppressed because one or more lines are too long

1
nimrud Normal file
View File

@@ -0,0 +1 @@
[]

1
obel Normal file
View File

@@ -0,0 +1 @@
[{"id_text": "P345452", "project_name": "obel", "raw_text": "Pure barge of the heavens, you are authoritative all on your own. Father Nanna, lord of Ur. Father Nanna, lord of the Eki\u0161nu\u014bal. Father Nanna, lord Dilimbabbar. Lord Nanna, foremost son of Enlil. When you float, when you float, When you appear authoritatively before your father, before Enlil, Father Nanna, when you appear authoritatively, when you raise your chest, When you appear authoritatively in your barge which is floating through the midst of heavens, Father Nanna, you, when you ride to your pure shrine, Father Nanna, when you float like a boat in a flood wave, When you float, when you float, you, when you float, When you float, when you pour out beer, you, when you float, When you pour out beer in a joyful mood, you, when you float, Father Nanna, when you tend to the ur cows and \u0161ar cows, Your father (Enlil) looks upon you with joyful eyes, and tends to you truly. Behold, he shines forth for the king; Enlil entrusted the sceptre of a lengthy reign to your hands. When you take care of lord Nudimmud, ... Having filled water into the .. canal ... Having filled water into the .. canal ... Having filled water into the Tigris, it is Nanna's. Having filled water into the Euphrates, it is Nanna's. Having filled water into canal and ditch for purification, they are Nanna's. Having filled the great marsh and the small marsh with water, they are Nanna's. An er\u0161ema song of Suen."}, {"id_text": "P355693", "project_name": "obel", "raw_text": "Oh my brother! ... Oh my brother! ... Oh my brother, son of Ga\u0161anmah! I lament for my brother, I lament, I lament in every way. I lament, the song of youthfulness I lament, in crying for the ... man She makes the woes plentyful, she makes the woes plentiful, standing up she makes the woes plentiful, young man, your mother makes the woes plentiful, your mother, Ninhursa\u014b, makes the woes plentiful. Our Princess in the Emah, the princess makes the woes plentiful. Atutur, the minister with hair hanging down makes the woes plentiful. My brother, you mother makes the woes plentiful. The palace of Ke\u0161 makes the woes plentiful. The brickwork of Iri\u0161ar makes the woes plentiful. The Emah of Adab makes the woes plentiful. The brickwork of Adab makes the woes plentiful. 'Where shall my son be handed over?' she is saying. 'Where shall my son, the Foolish One, be handed over?' she is saying. 'Where shall my son, the one I love, be handed over?' she is saying. The spouse calls out to her man. My brother, rise from your bed, may your mother rejoice over you. Your mother, Ga\u0161anhursa\u014b, may your mother rejoice over you. The en-priest, the lord, the great ruler of Adab may he rejoice over you. A\u0161irgi, the lord of Ke\u0161, may he rejoice over you. Atutur, in mourning, may she rejoice over you. Damgalnuna, of the Ema\u1e2b, the princess, may she rejoice over you. Lisin, the one of liver and heart, may she rejoice over you. ... ... ... ... ... Let me hear your sweet lips, let me hear your sweet voice let (my) heart be close to your good looks. Young man, do not let your mother, sit in tears, do not let your mother, Ninhursa\u014b, sit moaning, do not let Our Princess, sit (witnessing) your pain, do not let them do \"ua!\" Rise from your bed! Foolish One, do not let them do \"ua!\" Rise from your bed! The brother replied to his sister: My release, my sister, my release, Our Princess, my release, my sister, my release, Oh sister, do not speak so much, I am not one who can see. Our Princess, do not speak so much, I am not one who can see, My mother, Ga\u0161anmah, do not speak so much, I am not one who can see. In my bed, the dust of the netherworld, the ... lie with me. In my sleep, terror, the enemy sits with me. My sister, when I lie down and when I do not rise, my mother is the one who is anguished(?) over me, may I loosen the silah. Ga\u0161anhursa\u014b is the one who is anguished over me, may I loosen the simlah. My sister, stand up, give me my share, the estate of my father. My father made the woes over me plentiful, that be my share. Let my mother let her hair hang down for me, so that my ribs may lay down. May the bride whom my father (chose for me) measure grain for me, so that I may listen to it. Acquire a bed for me, (and recite) \"Its spirit is blown off.\" Set up the throne, seat the silah. Place the clothes on the throne, cover the simlah. Make funerary offerings, turn, accept them for me. Pour water into the libation pipe, and stir in the dust of the netherworld. Pour out the hot soup, let me drink its radiance. My sister, alas! Where ...? Our Princess ... Tears ... ... ... ... ... ..."}]

1
obmc Normal file

File diff suppressed because one or more lines are too long

1
obta Normal file
View File

@@ -0,0 +1 @@
[]

1
ogsl Normal file
View File

@@ -0,0 +1 @@
[]

1
oimea Normal file
View File

@@ -0,0 +1 @@
[]

1
pnao Normal file
View File

@@ -0,0 +1 @@
[]

View File

@@ -1,25 +1,25 @@
#adsd
#aemw
#akklove
#amgg####
#ario
#armep
#arrim
#asbp
#atae
#babcity
#blms
#btmao
#btto
#cams
adsd
aemw
akklove
amgg####
ario
armep
arrim
asbp
atae
babcity
blms
btmao
btto
cams
#caspo##############
ccpo
cdli
ckst
cmawro
contrib
contrib/amarna
contrib/lambert
#contrib/amarna
#contrib/lambert
ctij
dcclt
dccmt

View File

@@ -21,6 +21,7 @@
"import re\n",
"import logging\n",
"import datetime\n",
"import os\n",
"logging.basicConfig(\n",
" level=logging.INFO, filename=f'{datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")}.log', filemode='w',\n",
" format='%(name)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')\n",
@@ -29,31 +30,24 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 29,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'NoneType' object is not subscriptable",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[3], line 6\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mproject_list\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[0;32m 4\u001b[0m \u001b[39mfor\u001b[39;00m project \u001b[39min\u001b[39;00m f\u001b[39m.\u001b[39mread()\u001b[39m.\u001b[39msplit(\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m):\n\u001b[0;32m 5\u001b[0m \u001b[39m# connection = connection.execute(\"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)\", scrap)\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m scrap \u001b[39m=\u001b[39m scrapping\u001b[39m.\u001b[39;49mget_raw_english_texts_of_project(project)\n\u001b[0;32m 7\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 8\u001b[0m execute_batch(connection, \u001b[39m\"\u001b[39m\u001b[39minsert into raw_texts values (\u001b[39m\u001b[39m%(id_text)s\u001b[39;00m\u001b[39m, \u001b[39m\u001b[39m%(project_name)s\u001b[39;00m\u001b[39m, \u001b[39m\u001b[39m%(raw_text)s\u001b[39;00m\u001b[39m)\u001b[39m\u001b[39m\"\u001b[39m, scrap)\n",
"File \u001b[1;32mc:\\Users\\Saret\\Programming\\C#\\DH\\scrapping.py:46\u001b[0m, in \u001b[0;36mget_raw_english_texts_of_project\u001b[1;34m(project_dirname)\u001b[0m\n\u001b[0;32m 44\u001b[0m \u001b[39mfor\u001b[39;00m filename \u001b[39min\u001b[39;00m all_paths:\n\u001b[0;32m 45\u001b[0m cur_json \u001b[39m=\u001b[39m _load_json_from_path(filename)\n\u001b[1;32m---> 46\u001b[0m project_name \u001b[39m=\u001b[39m cur_json[\u001b[39m'\u001b[39;49m\u001b[39mproject\u001b[39;49m\u001b[39m'\u001b[39;49m]\n\u001b[0;32m 48\u001b[0m \u001b[39m# # Skip in case we are in saa project and the current sub project is not in neo-assyrian\u001b[39;00m\n\u001b[0;32m 49\u001b[0m \u001b[39m# if project_dirname == \"saao\" and project_name[-2:] not in SUB_PROJECTS_IN_NEO_ASS: # TODO: validate\u001b[39;00m\n\u001b[0;32m 50\u001b[0m \u001b[39m# continue\u001b[39;00m\n\u001b[0;32m 51\u001b[0m \n\u001b[0;32m 52\u001b[0m \u001b[39m# id_text = member.get('id_text', \"\") + member.get('id_composite', \"\")\u001b[39;00m\n\u001b[0;32m 53\u001b[0m \u001b[39m# html_dir = \"/\".join(path.parts[1:-1])\u001b[39;00m\n\u001b[0;32m 54\u001b[0m url \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttp://oracc.iaas.upenn.edu/\u001b[39m\u001b[39m{\u001b[39;00mproject_name\u001b[39m}\u001b[39;00m\u001b[39m/\u001b[39m\u001b[39m{\u001b[39;00mcur_json[\u001b[39m'\u001b[39m\u001b[39mtextid\u001b[39m\u001b[39m'\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m/html\u001b[39m\u001b[39m\"\u001b[39m\n",
"\u001b[1;31mTypeError\u001b[0m: 'NoneType' object is not subscriptable"
]
}
],
"outputs": [],
"source": [
"conn = psycopg2.connect(\"dbname='dh' user='dh' host='dh.saret.tk' password='qwerty'\")\n",
"connection = conn.cursor()\n",
"with open('project_list') as f:\n",
" for project in f.read().split('\\n'):\n",
" # connection = connection.execute(\"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)\", scrap)\n",
" scrap = scrapping.get_raw_english_texts_of_project(project)\n",
" try:\n",
" execute_batch(connection, \"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)\", scrap)\n",
" scrap = scrapping.get_raw_english_texts_of_project(project)\n",
" except Exception:\n",
" logging.error(f\"Error in {project}:{Exception}\")\n",
" try:\n",
" execute_batch(\n",
" connection,\n",
" \"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s) ON CONFLICT DO NOTHING\",\n",
" scrap)\n",
" conn.commit()\n",
" except Exception:\n",
" with open(project, 'w') as f:\n",
@@ -63,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -73,20 +67,48 @@
" level=logging.INFO, filename=f'./now.log',\n",
" format='%(name)s - %(levelname)s - %(message)s')\n",
"logging.info('Start')\n",
"with open('project_list') as f:\n",
" project = f.read().split('\\n')[2]\n",
" scrap = scrapping.get_raw_english_texts_of_project(project)\n",
" execute_batch(connection, \"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)\", scrap)\n",
" conn.commit()\n"
"with open('project_list', encoding='unicode-escape') as f:\n",
" # project = f.read().split('\\n')[2]\n",
" for project in f.read().split('\\n'):\n",
" # connection = connection.execute(\"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s)\", scrap)\n",
" if os.path.exists(project):\n",
" with open(project) as f:\n",
" logging.info(f'reading {project}')\n",
" scrap = json.loads(f.read())\n",
" # scrap = json.loads(project)\n",
" execute_batch(\n",
" connection, \"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s) ON CONFLICT DO NOTHING\",\n",
" scrap)\n",
" conn.commit()\n"
]
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "TypeError",
"evalue": "'NoneType' object is not subscriptable",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[4], line 4\u001b[0m\n\u001b[0;32m 2\u001b[0m conn \u001b[39m=\u001b[39m psycopg2\u001b[39m.\u001b[39mconnect(\u001b[39m\"\u001b[39m\u001b[39mdbname=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mdh\u001b[39m\u001b[39m'\u001b[39m\u001b[39m user=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mdh\u001b[39m\u001b[39m'\u001b[39m\u001b[39m host=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mdh.saret.tk\u001b[39m\u001b[39m'\u001b[39m\u001b[39m password=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mqwerty\u001b[39m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 3\u001b[0m connection \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39mcursor()\n\u001b[1;32m----> 4\u001b[0m scrap \u001b[39m=\u001b[39m scrapping\u001b[39m.\u001b[39;49mget_raw_english_texts_of_project(\u001b[39m'\u001b[39;49m\u001b[39msaao\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m 5\u001b[0m execute_batch(\n\u001b[0;32m 6\u001b[0m connection, \u001b[39m\"\u001b[39m\u001b[39minsert into raw_texts values (\u001b[39m\u001b[39m%(id_text)s\u001b[39;00m\u001b[39m, \u001b[39m\u001b[39m%(project_name)s\u001b[39;00m\u001b[39m, \u001b[39m\u001b[39m%(raw_text)s\u001b[39;00m\u001b[39m) ON CONFLICT DO NOTHING\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 7\u001b[0m scrap)\n\u001b[0;32m 8\u001b[0m conn\u001b[39m.\u001b[39mcommit()\n",
"File \u001b[1;32mc:\\Users\\Saret\\Programming\\C#\\DH\\scrapping.py:46\u001b[0m, in \u001b[0;36mget_raw_english_texts_of_project\u001b[1;34m(project_dirname, oracc_site)\u001b[0m\n\u001b[0;32m 44\u001b[0m \u001b[39mfor\u001b[39;00m filename \u001b[39min\u001b[39;00m all_paths:\n\u001b[0;32m 45\u001b[0m cur_json \u001b[39m=\u001b[39m _load_json_from_path(filename)\n\u001b[1;32m---> 46\u001b[0m project_name \u001b[39m=\u001b[39m cur_json[\u001b[39m'\u001b[39;49m\u001b[39mproject\u001b[39;49m\u001b[39m'\u001b[39;49m]\n\u001b[0;32m 48\u001b[0m \u001b[39m# # Skip in case we are in saa project and the current sub project is not in neo-assyrian\u001b[39;00m\n\u001b[0;32m 49\u001b[0m \u001b[39m# if project_dirname == \"saao\" and project_name[-2:] not in SUB_PROJECTS_IN_NEO_ASS: # TODO: validate\u001b[39;00m\n\u001b[0;32m 50\u001b[0m \u001b[39m# continue\u001b[39;00m\n\u001b[0;32m 51\u001b[0m \n\u001b[0;32m 52\u001b[0m \u001b[39m# id_text = member.get('id_text', \"\") + member.get('id_composite', \"\")\u001b[39;00m\n\u001b[0;32m 53\u001b[0m \u001b[39m# html_dir = \"/\".join(path.parts[1:-1])\u001b[39;00m\n\u001b[0;32m 54\u001b[0m url \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttp://\u001b[39m\u001b[39m{\u001b[39;00moracc_site\u001b[39m}\u001b[39;00m\u001b[39m/\u001b[39m\u001b[39m{\u001b[39;00mproject_name\u001b[39m}\u001b[39;00m\u001b[39m/\u001b[39m\u001b[39m{\u001b[39;00mcur_json[\u001b[39m'\u001b[39m\u001b[39mtextid\u001b[39m\u001b[39m'\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m/html\u001b[39m\u001b[39m\"\u001b[39m\n",
"\u001b[1;31mTypeError\u001b[0m: 'NoneType' object is not subscriptable"
]
}
],
"source": [
"\n"
"# with open('cams') as file:\n",
"conn = psycopg2.connect(\"dbname='dh' user='dh' host='dh.saret.tk' password='qwerty'\")\n",
"connection = conn.cursor()\n",
"scrap = scrapping.get_raw_english_texts_of_project('saao')\n",
"execute_batch(\n",
" connection, \"insert into raw_texts values (%(id_text)s, %(project_name)s, %(raw_text)s) ON CONFLICT DO NOTHING\",\n",
" scrap)\n",
"conn.commit()\n"
]
},
{

1
qcat Normal file
View File

@@ -0,0 +1 @@
[]

1
riao Normal file
View File

@@ -0,0 +1 @@
[]

1
ribo Normal file

File diff suppressed because one or more lines are too long

1
rimanum Normal file

File diff suppressed because one or more lines are too long

1
rinap Normal file

File diff suppressed because one or more lines are too long

1
saao Normal file

File diff suppressed because one or more lines are too long

View File

@@ -32,7 +32,7 @@ def _load_json_from_path(json_path: str) -> Dict:
return json.load(json_file)
def get_raw_english_texts_of_project(project_dirname: str) -> List[Dict]:
def get_raw_english_texts_of_project(project_dirname: str, oracc_site: str = 'oracc.museum.upenn.edu') -> List[Dict]:
raw_jsons = list()
all_paths = glob.glob(f'jsons_unzipped/{project_dirname}/**/corpusjson/*.json', recursive=True)
# path = Path(os.path.join(JSONS_DIR, project_dirname, 'catalogue.json'))
@@ -51,7 +51,7 @@ def get_raw_english_texts_of_project(project_dirname: str) -> List[Dict]:
# id_text = member.get('id_text', "") + member.get('id_composite', "")
# html_dir = "/".join(path.parts[1:-1])
url = f"http://oracc.iaas.upenn.edu/{project_name}/{cur_json['textid']}/html"
url = f"http://{oracc_site}/{project_name}/{cur_json['textid']}/html"
# print(url)
logging.info(url)
try:

1
suhu Normal file

File diff suppressed because one or more lines are too long

1
tcma Normal file
View File

@@ -0,0 +1 @@
[]

1
tsae Normal file
View File

@@ -0,0 +1 @@
[]

1
xcat Normal file
View File

@@ -0,0 +1 @@
[]