update
This commit is contained in:
		
							
								
								
									
										0
									
								
								2023-05-15_21-45-30.log
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								2023-05-15_21-45-30.log
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										109
									
								
								datat.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								datat.ipynb
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,109 @@
 | 
			
		||||
{
 | 
			
		||||
 "cells": [
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 13,
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "import sklearn\n",
 | 
			
		||||
    "import sklearn.model_selection\n",
 | 
			
		||||
    "from sklearn.metrics.pairwise import cosine_similarity\n",
 | 
			
		||||
    "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n",
 | 
			
		||||
    "import pandas as pd\n",
 | 
			
		||||
    "import scipy\n",
 | 
			
		||||
    "\n",
 | 
			
		||||
    "df_eng = pd.read_csv('raw_texts.csv')\n",
 | 
			
		||||
    "df_akk = pd.read_csv('new.csv')\n",
 | 
			
		||||
    "# akk_raw_train, akk_raw_test = sklearn.model_selection.train_test_split(df_akk, test_size=0.2, random_state=0)\n",
 | 
			
		||||
    "# eng_raw_train, eng_raw_test = sklearn.model_selection.train_test_split(df_eng, test_size=0.2, random_state=0)\n",
 | 
			
		||||
    "tf_vectorizer = TfidfVectorizer(analyzer='word')\n",
 | 
			
		||||
    "# tf_vectorizer.fit(akk_raw_train['Text'].to_list())"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 14,
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "tf_vectorizer = TfidfVectorizer(analyzer='word')\n",
 | 
			
		||||
    "save_vect = tf_vectorizer.fit_transform(df_akk['Text'].dropna().to_list())\n",
 | 
			
		||||
    "# save_vect = tf_vectorizer.fit_transform(['The sun in the sky is bright', 'We can see the shining sun, the bright sun.'])\n"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 15,
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "tfidf_tokens = tf_vectorizer.get_feature_names_out()\n",
 | 
			
		||||
    "df_tfidfvect = pd.DataFrame(data=save_vect.toarray(), columns=tfidf_tokens)\n"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 16,
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "test_mat = tf_vectorizer.transform(df_akk['Text'].dropna().to_list())\n",
 | 
			
		||||
    "cc = cosine_similarity(save_vect,save_vect)\n"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 22,
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "outputs": [
 | 
			
		||||
    {
 | 
			
		||||
     "ename": "ValueError",
 | 
			
		||||
     "evalue": "Cannot set a DataFrame with multiple columns to the single column Genre",
 | 
			
		||||
     "output_type": "error",
 | 
			
		||||
     "traceback": [
 | 
			
		||||
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 | 
			
		||||
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
 | 
			
		||||
      "Cell \u001b[1;32mIn[22], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m df_genre \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mread_csv(\u001b[39m'\u001b[39m\u001b[39mgenre.csv\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m df_akk[\u001b[39m\"\u001b[39;49m\u001b[39mGenre\u001b[39;49m\u001b[39m\"\u001b[39;49m] \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mconcat([df_genre, df_akk], axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m, join\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39minner\u001b[39m\u001b[39m'\u001b[39m, keys\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mProject\u001b[39m\u001b[39m'\u001b[39m)\n",
 | 
			
		||||
      "File \u001b[1;32mc:\\Users\\Saret\\Programming\\C#\\DH\\venv\\lib\\site-packages\\pandas\\core\\frame.py:3949\u001b[0m, in \u001b[0;36mDataFrame.__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m   3947\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_setitem_array(key, value)\n\u001b[0;32m   3948\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(value, DataFrame):\n\u001b[1;32m-> 3949\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_set_item_frame_value(key, value)\n\u001b[0;32m   3950\u001b[0m \u001b[39melif\u001b[39;00m (\n\u001b[0;32m   3951\u001b[0m     is_list_like(value)\n\u001b[0;32m   3952\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcolumns\u001b[39m.\u001b[39mis_unique\n\u001b[0;32m   3953\u001b[0m     \u001b[39mand\u001b[39;00m \u001b[39m1\u001b[39m \u001b[39m<\u001b[39m \u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcolumns\u001b[39m.\u001b[39mget_indexer_for([key])) \u001b[39m==\u001b[39m \u001b[39mlen\u001b[39m(value)\n\u001b[0;32m   3954\u001b[0m ):\n\u001b[0;32m   3955\u001b[0m     \u001b[39m# Column to set is duplicated\u001b[39;00m\n\u001b[0;32m   3956\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_setitem_array([key], value)\n",
 | 
			
		||||
      "File \u001b[1;32mc:\\Users\\Saret\\Programming\\C#\\DH\\venv\\lib\\site-packages\\pandas\\core\\frame.py:4103\u001b[0m, in \u001b[0;36mDataFrame._set_item_frame_value\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m   4100\u001b[0m     \u001b[39mreturn\u001b[39;00m\n\u001b[0;32m   4102\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(value\u001b[39m.\u001b[39mcolumns) \u001b[39m!=\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m-> 4103\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m   4104\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mCannot set a DataFrame with multiple columns to the single \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m   4105\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcolumn \u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m   4106\u001b[0m     )\n\u001b[0;32m   4108\u001b[0m \u001b[39mself\u001b[39m[key] \u001b[39m=\u001b[39m value[value\u001b[39m.\u001b[39mcolumns[\u001b[39m0\u001b[39m]]\n",
 | 
			
		||||
      "\u001b[1;31mValueError\u001b[0m: Cannot set a DataFrame with multiple columns to the single column Genre"
 | 
			
		||||
     ]
 | 
			
		||||
    }
 | 
			
		||||
   ],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "df_genre = pd.read_csv('genre.csv')\n",
 | 
			
		||||
    "df_akk[\"Genre\"] = pd.concat([df_genre, df_akk], axis=1, join='inner', keys='Project')"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": null,
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": []
 | 
			
		||||
  }
 | 
			
		||||
 ],
 | 
			
		||||
 "metadata": {
 | 
			
		||||
  "kernelspec": {
 | 
			
		||||
   "display_name": "venv",
 | 
			
		||||
   "language": "python",
 | 
			
		||||
   "name": "python3"
 | 
			
		||||
  },
 | 
			
		||||
  "language_info": {
 | 
			
		||||
   "codemirror_mode": {
 | 
			
		||||
    "name": "ipython",
 | 
			
		||||
    "version": 3
 | 
			
		||||
   },
 | 
			
		||||
   "file_extension": ".py",
 | 
			
		||||
   "mimetype": "text/x-python",
 | 
			
		||||
   "name": "python",
 | 
			
		||||
   "nbconvert_exporter": "python",
 | 
			
		||||
   "pygments_lexer": "ipython3",
 | 
			
		||||
   "version": "3.9.13"
 | 
			
		||||
  },
 | 
			
		||||
  "orig_nbformat": 4
 | 
			
		||||
 },
 | 
			
		||||
 "nbformat": 4,
 | 
			
		||||
 "nbformat_minor": 2
 | 
			
		||||
}
 | 
			
		||||
@@ -1,15 +1,5 @@
 | 
			
		||||
{
 | 
			
		||||
 "cells": [
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "markdown",
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "source": [
 | 
			
		||||
    "```\n",
 | 
			
		||||
    "installations\n",
 | 
			
		||||
    "\n",
 | 
			
		||||
    "```"
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
   "cell_type": "code",
 | 
			
		||||
   "execution_count": 2,
 | 
			
		||||
@@ -763,7 +753,7 @@
 | 
			
		||||
   "metadata": {},
 | 
			
		||||
   "outputs": [],
 | 
			
		||||
   "source": [
 | 
			
		||||
    "def "
 | 
			
		||||
    "#def "
 | 
			
		||||
   ]
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
@@ -86883,7 +86873,7 @@
 | 
			
		||||
   "name": "python",
 | 
			
		||||
   "nbconvert_exporter": "python",
 | 
			
		||||
   "pygments_lexer": "ipython3",
 | 
			
		||||
   "version": "3.9.2"
 | 
			
		||||
   "version": "3.9.13"
 | 
			
		||||
  }
 | 
			
		||||
 },
 | 
			
		||||
 "nbformat": 4,
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										52
									
								
								projects.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								projects.csv
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,52 @@
 | 
			
		||||
1,adsd
 | 
			
		||||
2,aemw
 | 
			
		||||
3,akklove
 | 
			
		||||
4,amgg
 | 
			
		||||
5,ario
 | 
			
		||||
6,armep
 | 
			
		||||
7,arrim
 | 
			
		||||
8,asbp
 | 
			
		||||
9,atae
 | 
			
		||||
10,babcity
 | 
			
		||||
11,blms
 | 
			
		||||
12,btmao
 | 
			
		||||
13,btto
 | 
			
		||||
14,cams
 | 
			
		||||
15,caspo
 | 
			
		||||
16,ccpo
 | 
			
		||||
17,cdli
 | 
			
		||||
18,ckst
 | 
			
		||||
19,cmawro
 | 
			
		||||
20,contrib
 | 
			
		||||
23,ctij
 | 
			
		||||
24,dcclt
 | 
			
		||||
25,dccmt
 | 
			
		||||
26,doc
 | 
			
		||||
27,dsst
 | 
			
		||||
28,ecut
 | 
			
		||||
29,eisl
 | 
			
		||||
30,epsd2
 | 
			
		||||
31,etcsri
 | 
			
		||||
32,glass
 | 
			
		||||
33,hbtin
 | 
			
		||||
34,lacost
 | 
			
		||||
35,lovelyrics
 | 
			
		||||
36,neo
 | 
			
		||||
37,nere
 | 
			
		||||
38,nimrud
 | 
			
		||||
39,obel
 | 
			
		||||
40,obmc
 | 
			
		||||
41,obta
 | 
			
		||||
42,ogsl
 | 
			
		||||
43,oimea
 | 
			
		||||
44,pnao
 | 
			
		||||
45,qcat
 | 
			
		||||
46,riao
 | 
			
		||||
47,ribo
 | 
			
		||||
48,rimanum
 | 
			
		||||
49,rinap
 | 
			
		||||
50,saao
 | 
			
		||||
51,suhu
 | 
			
		||||
52,tcma
 | 
			
		||||
53,tsae
 | 
			
		||||
54,xcat
 | 
			
		||||
		
		
			
  | 
							
								
								
									
										1
									
								
								raw_json.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								raw_json.csv
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										43704
									
								
								raw_texts.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43704
									
								
								raw_texts.csv
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Reference in New Issue
	
	Block a user