boolean similarity

This commit is contained in:
2023-06-26 23:21:34 +03:00
parent db8244d902
commit 03f1d663d0
2 changed files with 13004 additions and 13003 deletions

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -23,7 +23,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -34,7 +34,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -44,12 +44,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"test_mat = tf_vectorizer.transform(df_akk['Text'].dropna().to_list())\n", "test_mat = tf_vectorizer.transform(df_akk['Text'].dropna().to_list())\n",
"cc = cosine_similarity(save_vect,save_vect)\n" "cc = cosine_similarity(save_vect,save_vect)\n",
"bool_similarity = cc > 0.5\n"
] ]
}, },
{ {

25996
new.csv

File diff suppressed because one or more lines are too long