-
Notifications
You must be signed in to change notification settings - Fork 0
/
multinomialHesap.py
118 lines (96 loc) · 5.26 KB
/
multinomialHesap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import pandas as pd
import tweepy
class multinomialHesap():
def __init__(self):
self.tweets = []
self.tweetText = []
def DownloadData(self, keyword):
auth = tweepy.OAuthHandler('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', )
api = tweepy.API(auth)
limit = int(1)
# 1 bot 0 non bot
limit = tweepy.Cursor(api.user_timeline, screen_name=keyword, ).items(limit)
# create DataFrame
columns = ['screen_name', 'location', 'description', 'verified', 'follower', 'following', 'url']
data1 = []
for tweet in limit:
data1.append([tweet.user.screen_name, tweet.user.location,
tweet.user.description, tweet.user.verified, tweet.user.followers_count,
tweet.user.friends_count, tweet.user.url])
df1 = pd.DataFrame(data1, columns=columns)
df1.to_csv('data/veri.csv')
data1 = pd.read_csv('data/veri.csv')
condition = (data1.screen_name.str.contains("bot", case=False) == True) | (
data1.description.str.contains("bot", case=False) == True) | (data1.location.isnull()) | (
data1.verified == False)
data1['screen_name_binary'] = (data1.screen_name.str.contains("bot", case=False) == True)
data1['description_binary'] = (data1.description.str.contains("bot", case=False) == True)
data1['location_binary'] = (data1.location.isnull())
data1['verified_binary'] = (data1.verified == False)
print(data1)
data1.to_csv('data/sonuc.csv')
bots = pd.read_csv('data/bots_data.csv', encoding=('ISO-8859-1'))
nonbots = pd.read_csv('data/nonbots_data.csv', encoding=('ISO-8859-1'))
# Creating Bots identifying condition
# bots[bots.listedcount>10000]
condition = (bots.screen_name.str.contains("bot", case=False) == True) | (
bots.description.str.contains("bot", case=False) == True) | (bots.location.isnull()) | (
bots.verified == False)
bots['screen_name_binary'] = (bots.screen_name.str.contains("bot", case=False) == True)
bots['description_binary'] = (bots.description.str.contains("bot", case=False) == True)
bots['location_binary'] = (bots.location.isnull())
bots['verified_binary'] = (bots.verified == False)
print("Bots shape: {0}".format(bots.shape))
# Creating NonBots identifying condition
condition = (nonbots.screen_name.str.contains("bot", case=False) == False) | (
nonbots.description.str.contains("bot", case=False) == False) | (
nonbots.location.isnull() == False) | (
nonbots.verified == True)
nonbots['screen_name_binary'] = (nonbots.screen_name.str.contains("bot", case=False) == False)
nonbots['description_binary'] = (nonbots.description.str.contains("bot", case=False) == False)
nonbots['location_binary'] = (nonbots.location.isnull() == False)
nonbots['verified_binary'] = (nonbots.verified == True)
print("Nonbots shape: {0}".format(nonbots.shape))
# Joining Bots and NonBots dataframes
df = pd.concat([bots, nonbots])
print("DataFrames created...")
# Splitting data randombly into train_df and test_df
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2)
print("Randomly splitting the dataset into training and test, and training classifiers...\n")
# Using MultinomialNB Classifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
clf = MultinomialNB(alpha=1, fit_prior=True)
# 80%
X_train = train_df[
['screen_name_binary', 'description_binary', 'location_binary', 'verified_binary']] # train_data
y_train = train_df['bot'] # train_target
# 20%
X_test = test_df[
['screen_name_binary', 'description_binary', 'location_binary', 'verified_binary']] # test_Data
y_test = test_df['bot'] # test_target
# Training on decision tree classifier
model = clf.fit(X_train, y_train)
# Predicting on test data
predicted = model.predict(X_test)
# Checking accuracy
data_df = pd.read_csv('data/sonuc.csv', encoding=('ISO-8859-1'))
dataset = data_df[['screen_name_binary', 'description_binary', 'location_binary', 'verified_binary']]
pred = model.predict(dataset)
if pred == 1:
pred = "Trol"
else:
pred = "Not Trol"
print("multinom", pred)
follower = tweet.user.followers_count
following = tweet.user.friends_count
url = tweet.user.url
name = tweet.user.name
img = tweet.user.profile_image_url
bg = tweet.user.profile_image_url
print("MultinomialNB Classifier Accuary: {0}".format(accuracy_score(y_test, predicted)))
return pred, keyword, follower, following, url, name, img, bg
""" print("MultinomialNB Classifier", pred)
print("MultinomialNB Classifier Accuary: {0}".format(accuracy_score(y_test, predicted)))"""