-
Notifications
You must be signed in to change notification settings - Fork 0
/
cass-wordnet+bert.py
48 lines (36 loc) · 1.47 KB
/
cass-wordnet+bert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding: utf-8 -*-
"""
[Martinez-Gil2023b] Context-Aware Semantic Similarity Measurement for Unsupervised Word Sense Disambiguation, arXiv preprint arXiv:2305.03520, 2023
@author: Jorge Martinez-Gil
"""
# Modules
from nltk.corpus import wordnet
from sentence_transformers import SentenceTransformer, util
def calculate(word, context, exclude):
fw = 'null'
model = SentenceTransformer('all-MiniLM-L12-v2')
synonyms = []
for syn in wordnet.synsets(word):
for lm in syn.lemmas():
synonyms.append(lm.name())
maximum = 9999
for i in range(len(synonyms)):
cons = synonyms[int(i)]
if word.lower() not in cons.lower() and cons.lower() not in exclude.lower():
source = context.replace(word, cons)
source = source.replace('_', ' ')
target = context
source_embedding = model.encode(source)
target_embedding = model.encode(target)
result0 = util.cos_sim(source_embedding, target_embedding)
resulta = [float(t.item()) for t in result0]
result = 1-resulta[0]
print ('Comparing ' + source + ' <-> ' + target + ' ' + str(result))
if result < maximum:
fw = cons
maximum = result
print (synonyms)
return fw
text = 'Vienna is a nice city situated in the center of the european continent'
fr = calculate ('center', text, exclude='centre')
print (fr)