-
Notifications
You must be signed in to change notification settings - Fork 0
/
Mantzaris.jl
309 lines (262 loc) · 10.4 KB
/
Mantzaris.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
using Statistics
function Mantzaris(stYr = 1975,endYr = 1980,windowSize = 5)
#load the data and add to a dictionary the number of countries in the year range
countryYearsNum = Dict{Integer,Integer}()
resultsFile = readdir("./dataTables/")
yrMin = 100000
yrMax = -1
for rf in resultsFile
fileTmp = open(string("./dataTables/",rf))
linesTmp = readlines(fileTmp) #readfile lines
yrTmp = parse(Int,((split(rf,"."))[1]))
countryNumTmp = length(split(linesTmp[1],",")) - 1#COLUMN NAME LIST (still the same regarding large set mapping to small set in cols) OF WHO CAN RECEIVE VOTES
countryYearsNum[yrTmp] = countryNumTmp #NUM that receives votes
close(fileTmp)
if(yrTmp < yrMin)
yrMin = yrTmp
end
if(yrTmp > yrMax)
yrMax = yrTmp
end
end
#sanity check input years
if( endYr < stYr || stYr < yrMin || endYr > yrMax )
print(string("year range improperly set, for the analysis end year must be greater than start and the smallest year is $(yrMin) and largest $(yrMax) with smallest first"))
return
end
if( (stYr+windowSize) > endYr)
print("not enough years between start and end for analysis")
return
end
#THRESHOLDS
windowConf = Dict() #the threshold for significance in each year of window
yr = stYr
while( (yr+windowSize) <= endYr )
conf5perc = scoreSim(yr,yr+windowSize,countryYearsNum)#what a typical scenario of unbiased voting towards candidate set of receivers
yr = yr + windowSize
windowConf[string(yr-windowSize,"-",yr)] = conf5perc
end
#MATRIX WINDOW AVGS
#print(windowConf)
winDicts = windowScores(stYr,endYr,windowSize)
#print(winDicts)
collusionDict = countryCollusion(stYr,endYr,windowSize,windowConf,winDicts)
#print("collusion Dict:\n")
#print(collusionDict)
return collusionDict
end
function countryCollusion(stYr,endYr,windowSize,windowConf,winDicts)
#FROM THE THRESHOLDS DETERMINE THE COUNTRY PAIRS OF COLLUSION
collusionDict = Dict()
yr = stYr
while( (yr+windowSize) <= endYr )
tmp = windowConf["$(yr)-$(yr + windowSize)"]
threshold = tmp[2]
cntryNames = winDicts["$(yr)-$(yr + windowSize)"]["countries"]
scoremat = winDicts["$(yr)-$(yr + windowSize)"]["scoremat"]
scorematAVG = scoremat * (1/(windowSize+1))
collusionDict["1way:$(yr)-$(yr + windowSize)"] = []
collusionDict["2way:$(yr)-$(yr + windowSize)"] = []
for ii=1:(size(scoremat)[1])
for jj=1:(size(scoremat)[2])
if(scorematAVG[ii,jj] >= threshold)
c1 = cntryNames[ii]
c2 = cntryNames[jj]
prev = collusionDict["1way:$(yr)-$(yr + windowSize)"]
new = append!(prev, ["$(c1)-$(c2)"])
collusionDict["1way:$(yr)-$(yr + windowSize)"] = new
end
end
end
threshold1 = tmp[2]
for ii=1:(size(scoremat)[1])
for jj=1:(size(scoremat)[2])
if(jj>ii)
if((scorematAVG[ii,jj] >= threshold1) && (scorematAVG[jj,ii] >= threshold1))
c1 = cntryNames[ii]
c2 = cntryNames[jj]
prev = collusionDict["2way:$(yr)-$(yr + windowSize)"]
new = append!(prev, ["$(c1)-$(c2)"])
collusionDict["2way:$(yr)-$(yr + windowSize)"] = new
end
end
end
end
yr = yr + windowSize
end
return collusionDict
end
#FOR EACH TIME WINDOW
function windowScores(stYr,endYr,windowSize)
#(make the output overall score matrices symmetric)
#1-DICTIONARY FOR EACH WINDOW INTERVAL
winDict = Dict()
yr = stYr
while( (yr+windowSize) <= endYr )
winDict["$(yr)-$(yr+windowSize)"] = Dict()
winDict["$(yr)-$(yr+windowSize)"]["countries"] = [] #this gets filled with the total set.
winDict["$(yr)-$(yr+windowSize)"]["scoremat"] = []
yr = yr + windowSize
end
#2-FILL DICTIONARY WITH ALL POSSIBLE COUNTRY PAIRS IN THOSE YEARS IT COVERS
resultsFile = readdir("./dataTables/")
namesDict = Dict()
scoresDict = Dict()
for rf in resultsFile
yrTmp = parse(Int,((split(rf,"."))[1]))
fileTmp = open(string("./dataTables/",rf))#each file pipe
linesTmp = readlines(fileTmp)#read each file lines
origColNames = split(linesTmp[1],r",|\n",keepempty=false) #store the COL name list of the columns
#print(origColNames);print("\n")
splice!(origColNames,1) #remove first entry
#print(origColNames);print("\n")
rowNAMES =[]
for ii=2:length(linesTmp)#get the ROW's name list
rowNAMEStmp = split(linesTmp[ii],r",|\n",keepempty=false)
#print(rowNAMEStmp[1]);print("\n")
append!(rowNAMES, [rowNAMEStmp[1]])
end
totalNames = sort(unique(append!(rowNAMES,origColNames)))
#>>>>>>>>>><<<<<<<<<<
namesDict[yrTmp] = totalNames#IMPORTANT! here is the name list of the year stored
#>>>>>>>>><<<<<<<<<<
nameNum = length(totalNames)
scoreMat = zeros(nameNum,nameNum)
#print(totalNames);print(scoreMat);print("\n")
for ii=2:length(linesTmp)#fill the scoremat
lineScores = split(linesTmp[ii],r",|\n",keepempty=false)
votingCNTRY = lineScores[1]
splice!(lineScores,1)
lineScores = [parse(Int,lineScores[tt]) for tt in 1:length(lineScores)]
for jj=1:length(lineScores)-1
scoreTMP = lineScores[jj]
colNAME = origColNames[jj]
newColNamePos = findall([totalNames[tt] == colNAME for tt in 1:length(totalNames)])
scoreMat[ii-1,newColNamePos] .= scoreTMP
end
end
scoresDict[string(yrTmp)] = scoreMat
close(fileTmp)
end
yr = stYr
while( (yr+windowSize) <= endYr )
winInd = 0
while(winInd <= windowSize)
new = namesDict[yr + winInd]
prev = winDict["$(yr)-$(yr+windowSize)"]["countries"]
winDict["$(yr)-$(yr+windowSize)"]["countries"] = sort(unique(append!(prev,new)))
winInd = winInd + 1
end
yr = yr + windowSize
end
#3-ACCUMULATE POINTS OF EACH PARTICULAR COUNTRY TOWARDS ANOTHER DIFFERENT COUNTRY
yr = stYr
while( (yr+windowSize) <= endYr )
winInd = 0
cntryNum = length(winDict["$(yr)-$(yr+windowSize)"]["countries"])#WILL BECOME ARRAAY FOR ROW AND COL SIZE
winDict["$(yr)-$(yr+windowSize)"]["scoremat"] = zeros(cntryNum,cntryNum)#WILL BECOME ASSYMETRIC!!!
while(winInd <= windowSize)
matPrev = winDict["$(yr)-$(yr+windowSize)"]["scoremat"]
matNew = zeros(length(matPrev[:,1]),length(matPrev[1,:]))
namesTotal = winDict["$(yr)-$(yr+windowSize)"]["countries"]
yrScoresMat = scoresDict[string(yr+winInd)]#the matrix for a PARTICULAR YEAR (might not be nxn)
yrNames = namesDict[yr+winInd]
for rowNum=1:length(yrScoresMat[:,1])#change the year nameInds to the total window Inds of names
newIndsMat = []
for colNum=1:length(findall(yrScoresMat[rowNum,:] .> 0))
tmpNamesMat = yrNames[findall(yrScoresMat[rowNum,:] .> 0)] #non-zero cols
newIndsMat = sort(append!(findall([namesTotal[ii] == tmpNamesMat[colNum] for ii in 1:length(namesTotal)]),newIndsMat))
end
rowNumNew = findall([namesTotal[ii] == yrNames[rowNum] for ii in 1:length(namesTotal)])
matNew[rowNumNew,newIndsMat] = yrScoresMat[rowNum,findall(yrScoresMat[rowNum,:] .> 0)]
end
winDict["$(yr)-$(yr+windowSize)"]["scoremat"] = matPrev + matNew
winInd = winInd + 1
end
yr = yr + windowSize
end
return winDict
end
#THRESHOLD FOR EACH TIME WINDOW; looking at each year in the range-> for each year draw a hypothetical score -> from the applicable voting paradigm -> accumulate the score (I will simulate each year independently from stYr:endYr choosing the appropriate scheme each time
function scoreSim(stYr,endYr,countryYearsNum)
AVG_SIMULATION = []
iterNum = 2500
confInd5perc = max(1,floor(Int,0.05*iterNum))
confInd10perc = max(1,floor(Int,0.1*iterNum))
for ii = 1:iterNum
ONE_SIMULATION = []
for yr = stYr:endYr
NUM = countryYearsNum[yr]#number of countries voting that year
if(yr >= 1975 || yr == 1963 || yr == 1962)
score = Allocated(yr,NUM)
elseif( (1964<=yr<= 1966) || yr==1974 || (1967<=yr<=1970) || (1957<=yr<=1961))
score = Sequential(yr,NUM)
elseif(1971<=yr<=1973)
score = Rated(yr,NUM)
else
score = Allocated(-1,NUM)
end
append!(ONE_SIMULATION,score)
end
avgSim = mean(ONE_SIMULATION)
append!(AVG_SIMULATION,[avgSim])
end
sortedAVG_SIMULATION = sort(AVG_SIMULATION,rev=true)
conf5perc = sortedAVG_SIMULATION[confInd5perc]
conf10perc = sortedAVG_SIMULATION[confInd10perc]
return [conf10perc,conf5perc]
end
#here each country can receive a set of scores with consecutive points awarded
#in sequence for that year it has an equal chance of receiving each score
function Sequential(yr,NUM)
SCORES1 = [5,3,1]
SCORES2 = ones(Int,1,10)
score = 0
#so we iterate through the scores to see how many of the points we amass
if( 1964 <= yr <= 1966)
for ii=1:length(SCORES1)
position = ceil.(rand(1,1)*NUM)
if([position][1] == 1)
score = SCORES1[ii] + score
end
end
elseif(yr==1974 || (1967<=yr<=1970) || (1957<=yr<=1961))
for ii=1:length(SCORES2)
position = ceil.(rand(1,1)*NUM)
if([position][1] == 1)
score = SCORES2[ii] + score
end
end
end
return score
end
function Allocated(yr,NUM)
SCORES1 = [3,2,1]
SCORES2 = [5,4,3,2,1]
SCORES3 = [12,10,8,7,6,5,4,3,2,1]
position = Int.(ceil.(rand(1,1)*NUM))
if(yr >= 1975 && yr <= 2016)
SCORES = SCORES3
elseif(yr == 1962)
SCORES = SCORES1
elseif(yr == 1963)
SCORES = SCORES2
else
SCORES = SCORES3
end
if position[1] <= length(SCORES)
score = SCORES[position]
else
score = 0
end
return score
end
function Rated(yr,NUM)
SCORES1 = [5,4,3,2,1]
if(1971<=yr<=1973)
X1 = SCORES1[rand(1:end)]
X2 = SCORES1[rand(1:end)]
end
score = X1 + X2
return score
end