-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest2.py
63 lines (29 loc) · 855 Bytes
/
test2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import nltk
from nltk.tokenize import word_tokenize
import re
with open ('text-test.txt') as fr:
words = word_tokenize(fr.read().lower())
with open ('Fr-dictionary.txt') as fr:
dic = word_tokenize(fr.read().lower())
l=[ ]
errors=[ ]
for n,word in enumerate (words):
l.append(word)
if word == "*":
print(words[n-1], words[n+1])
exp = words[n-1] + words[n+1]
if exp in dic:
l.append(exp)
errors.append(words[n-1])
errors.append("*")
errors.append(words[n+1])
else:
continue
print(l)
print(errors)
l=frozenset(l)
errors=frozenset(errors)
c=l.difference(errors)
print(list(c))
#['la', 'engagement', 'les', 'de']
#['les','engage', '*', 'ment', 'de','la']