-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest.py
47 lines (36 loc) · 1.92 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import re
import numpy as np
from gensim.models import Word2Vec
from sklearn.metrics import f1_score
from utils import strip_html, remove_between_square_brackets, denoise_text, remove_special_characters, simple_stemmer, remove_stopwords
def test_model(dataframe, model, vectorizer, vectoriser_name):
# Define the valid Polarity values and drop everything else
valid_polarities = ['neutral', 'positive', 'negative']
dataframe = dataframe[dataframe['Polarity'].isin(valid_polarities)]
# Concatenate 'Text' and 'Category' columns (implicitly account for aspect)
dataframe['Text_Category'] = dataframe['Text'] + " " + dataframe['Category']
# Remove noise, special characters; perform stemming and remove stop words
dataframe['Text_Category'] = dataframe['Text_Category'].apply(denoise_text)
dataframe['Text_Category'] = dataframe['Text_Category'].apply(remove_special_characters)
dataframe['Text_Category'] = dataframe['Text_Category'].apply(simple_stemmer)
dataframe['Text_Category'] = dataframe['Text_Category'].apply(remove_stopwords)
# Transform the text data with the loaded vectorizer
if vectoriser_name == 'word2vec':
sentences = [row.split() for row in dataframe['Text_Category']]
X_test = np.array([np.mean([vectorizer.wv[word] for word in sentence if word in vectorizer.wv.key_to_index], axis=0) for sentence in sentences])
else:
X_test = vectorizer.transform(dataframe['Text_Category'])
# Labeling the sentient data
encode = {
'neutral': 0,
'positive': 1,
'negative': 2
}
# Transformed sentiment data
y_test = dataframe['Polarity'].apply(lambda label: encode[label])
# Make predictions
y_pred = model.predict(X_test)
# Calculate and return the F1 score
f1_weighted = f1_score(y_test, y_pred, average='weighted')
f1_individual = f1_score(y_test, y_pred, average=None)
return f1_weighted, f1_individual