boolean similarity

This commit is contained in:
2023-06-26 23:21:34 +03:00
parent db8244d902
commit 03f1d663d0
2 changed files with 13004 additions and 13003 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -34,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -44,12 +44,13 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"test_mat = tf_vectorizer.transform(df_akk['Text'].dropna().to_list())\n",
"cc = cosine_similarity(save_vect,save_vect)\n"
"cc = cosine_similarity(save_vect,save_vect)\n",
"bool_similarity = cc > 0.5\n"
]
},
{

25996
new.csv

File diff suppressed because one or more lines are too long