-
Notifications
You must be signed in to change notification settings - Fork 4
/
countrylist.py
executable file
·143 lines (111 loc) · 3.3 KB
/
countrylist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
Print the different countries present in a csv file generated by dbpedia extractors
Vagueplaces Generator
\author Jordi Castells
\date 18 December 2016
"""
import os
import re
import sys
import csv
import argparse
import signal
import tempfile
import xml, warnings
import cSpinner
import cPlace
import cReport
import geom_functions as GEOM
# ###########################
#
# ARGUMENT PARSING
#
# ###########################
parser = argparse.ArgumentParser(description='print a country list from a downloaded CSV')
parser.add_argument('--pointFile', default=None, dest='points',help='input file with points to alpha shape.')
parser.add_argument('--nospinner', default=False, dest='nospinner',help='Deactivate live feedback via shell. For batch operations', action="store_true")
args = parser.parse_args()
def slugify(value, allow_unicode=False):
"""
Force a valid filename
"""
import re
value = re.sub("http://dbpedia.org/resource/","",value)
return re.sub(r'[^\x00-\x7F]+','', value)
# ###########################
#
# INITIALIZATIONS
#
# ###########################
#Spinner
S = cSpinner.cSpinner()
if not args.nospinner:
S.start()
csv.field_size_limit(sys.maxsize)
def finish_program():
S.stop()
sys.exit(0)
def read_countries_from_csv(filename):
"""
Reads a CSV points file
"name;WKT;Country;Abstract"
We read this information apart because reading the whole
file can destroy our memory
"""
data = {}
total = 0
failed = 0
totalL = sum(1 for line in open(filename))
with open(filename, 'rb') as csvfile:
preader = csv.DictReader(csvfile, delimiter=';', quotechar='"')
for row in preader:
S.set_msg("Reading Countries: %s/%s. FAILED: %s"%(total,totalL,failed))
total+=1
#skip incorrectly parsed points
if "http://dbpedia.org/resource/" not in row["country"]:
failed+=1
continue
if row["country"] not in data:
#country = re.sub("http://dbpedia.org/resource/","", row["country"])
#country = re.sub("http://dbpedia.org/resource/","", country)
data[row["country"]] = 1
#data.append(country)
else:
data[row["country"]] += 1
rdata = []
for key,val in data.iteritems():
if val > 3:
rdata.append(key)
return rdata
# ###########################
#
# SIGNAL HANDLING
#
# ###########################
def kill_handler(signal, frame):
print 'Kill Signal Recieved'
finish_program()
signal.signal(signal.SIGINT, kill_handler)
# ###########################
#
# START
#
# ###########################
#Open Points file
try:
S.set_msg("Parsing input")
#CREATE A NICE TEMPFILE for CGAL from the input data
tmpfile = tempfile.NamedTemporaryFile(prefix='vagueplace',delete=True);
#Can't use this since it's using a lot of memory. This approach was getting
#everything in RAM and then start processing. We have to do something slower
#but less memory hungry
# First get the countries
S.set_msg("Generating country list")
countries = read_countries_from_csv(args.points)
for country in countries:
print country
sys.exit(1)
except Exception as e:
print e
finally:
finish_program()