-
Notifications
You must be signed in to change notification settings - Fork 0
/
betweennesstest.R
106 lines (87 loc) · 2.38 KB
/
betweennesstest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
library(readxl)
Dataset <- read_excel("D:/sem4/A-SIN/PROJECT/Dataset.xlsx")
View(Dataset)
data<-Dataset
#Building and Cleaning Corpus
library(NLP)
library(tm)
corpus <- iconv(data$text, to = "utf-8")
corpus <- Corpus(VectorSource(corpus))
corpus <- tm_map(corpus,tolower)
corpus <- tm_map(corpus,removePunctuation)
corpus <- tm_map(corpus, removeNumbers)
cleanset <- tm_map(corpus,removeWords,stopwords('english'))
removeURL <- function(x) gsub('http[[:alnum:]]*','', x)
cleanset <- tm_map(cleanset,content_transformer(removeURL))
cleanset <- tm_map(cleanset,removeWords,c('corona','covid','positive'))
cleanset <- tm_map(cleanset, stripWhitespace)
#TDM
tdm <- TermDocumentMatrix(cleanset)
t<- removeSparseTerms(tdm,sparse =0.98)
t
t <- as.matrix(t)
#WordCloud Combined
library(RColorBrewer)
library(wordcloud)
w <- sort(rowSums(t), decreasing =TRUE)
set.seed(222)
wordcloud(words =names(w), freq = w,
max.words = 150,
random.order = F,
min.freq = 5,
colors = brewer.pal(8, 'Dark2'),
scale = c(5, 0.3),
rot.per = 0.7)
#Sentiment
library(syuzhet)
library(lubridate)
library(ggplot2)
library(scales)
library(reshape2)
library(dplyr)
s<-get_nrc_sentiment(data$text)
head(s)
barplot(
colSums(s),
las = 2,
col = rainbow(ncol(s))
)
#network of Terms
library(igraph)
#tdm[tdm>1]<-1
tdm <- tdm[names(tail(sort(rowSums(as.matrix(tdm))), 50)), ]
termM<- as.matrix(tdm) %*% as.matrix(t(tdm))
g<-graph.adjacency(termM, weighted = T, mode = 'undirected')
g
g<-simplify(g) #same terms apearing many times will be removed
V(g)$label<-V(g)$name
V(g)$degree<-degree(g)
#Histogram of node degree
hist(V(g)$degree,
breaks = 100 ,
col = "green",
main = "Histogram of node Degree" ,
ylab = "Frequency" ,
xlab = "Degree of vertices" )
#network dia
set.seed(222)
plot(g)
plot(g,vertex.color = "orange" , vertex.size = 6 , vertex.label.dist = 1.5 ,vertex.label = NA) # without lable
#CommunityDetection
#Edge BetweenNess
comm <- cluster_edge_betweenness(g)
plot(comm,g)
C2<-estimate_edge_betweenness(g,cutoff = 10)
plot(C2)
#Using propagative lables
prop <- cluster_label_prop(g)
plot(prop,g)
#greedy optimization
greed <- cluster_edge_betweenness(as.undirected(g))
plot(greed, as.undirected(g))
#C1<-cluster_fast_greedy(g)
#plot(C1,g)
#C2<-estimate_edge_betweenness(g,cutoff = 10)
#summary(C2)
#C3<-C2[C2<max(C2)]
#plot(C3)