-
Notifications
You must be signed in to change notification settings - Fork 0
/
metin_siniflandirma_on_isleme_adimlari.py
70 lines (59 loc) · 2.53 KB
/
metin_siniflandirma_on_isleme_adimlari.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from zemberek import TurkishMorphology, TurkishSentenceNormalizer
import re
# Function to normalize Turkish text
def turkce_metni_duzelt(metin):
morphology = TurkishMorphology.create_with_defaults()
normalizer = TurkishSentenceNormalizer(morphology)
duzeltilmis_metin = normalizer.normalize(metin)
return duzeltilmis_metin
def kelime_kok_neg(kelime):
morphology = TurkishMorphology.create_with_defaults()
results= morphology.analyze(kelime),
kok=""
for result in results:
if result.is_correct()== True:
kok = result.analysis_results[0].get_stem()
morphemes = result.analysis_results[0].get_morphemes()
for morpheme in morphemes:
if morpheme.name == "Negative":
kok=kok+"NEG"
else:
kok=kelime
return kok
# Function to detect emoticons and convert them to POSEMOTİON or NEGEMOTİON
def detect_emoticons(metin):
# Example: Replace :) with POSEMOTİON and :( with NEGEMOTİON
metin = metin.replace(":", "")
metin = metin.replace("(", "")
metin = metin.replace(")", "")
metin = metin.replace(";", "")
return metin
# Function to remove punctuation
def noktalama_isaretleri_kaldir(metin):
metin = re.sub(r'[^\w\s]', '', metin)
return metin
# Function to remove excessive spaces
def fazla_bosluklari_kaldir(metin):
metin = re.sub(r'\s+', ' ', metin)
return metin
# Main preprocessing function
def onisleme(metin):
metin = turkce_metni_duzelt(metin)
metin = detect_emoticons(metin)
metin = noktalama_isaretleri_kaldir(metin)
metin = fazla_bosluklari_kaldir(metin)
metin = " ".join(kelime_kok_neg(kelime) for kelime in metin.split())
return metin
# Reading and preprocessing the files
with open('negatif_yorumlar - Kopya.txt', 'r', encoding='utf-8') as file:
negatif_yorumlar = file.read().splitlines()
with open('pozitif_yorumlar - Kopya.txt', 'r', encoding='utf-8') as file:
pozitif_yorumlar = file.read().splitlines()
# Preprocessing the reviews
processed_negatif_yorumlar = [onisleme(review) for review in negatif_yorumlar]
processed_pozitif_yorumlar = [onisleme(review) for review in pozitif_yorumlar]
# Saving the preprocessed reviews to new files
with open('processed_negatif_yorumlar.txt', 'w', encoding='utf-8') as file:
file.write('\n'.join(processed_negatif_yorumlar))
with open('processed_pozitif_yorumlar.txt', 'w', encoding='utf-8') as file:
file.write('\n'.join(processed_pozitif_yorumlar))