Skip to content

Commit

Permalink
#197 use the cursor object and next to handle the mongo drop connecti…
Browse files Browse the repository at this point in the history
…on problem, since teh previous implemenation when redo teh cursor will restart again so it will next ends
  • Loading branch information
YanLiang1102 committed Oct 24, 2017
1 parent 2819972 commit 4ea29f5
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions otherHelperCode/bigdataCode/exploreLanguage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

import pymongo import MongoClient
import pymongo
import pickle
import logging
import time
import logging.config

client1 =MongoClient('mongodb://user:[email protected]:port',maxPoolSize=5)
db1 = client1['lexisnexis']
largestory = db1.disk_stories_full
logging.basicConfig(filename='logging.python')
dic = {}
count = 0
dic["errorout"]=0;
dic["goodrecord"]=0;

cur = largestory.find()

sleep = 1
done = False
#the total number of records in the collections
while(count<=109672706):
try:
i=cur.next();
try:
count = count + 1
if count % 100000 == 0:
print (str(count), 'finished processing')
lan = i['language']
if not lan in dic:
dic[lan] = 0
dic[lan] = dic[lan] + 1
dic["goodrecord"]=dic["goodrecord"]+1
except:
print ('record has problems')
dic["errorout"]=dic["errorout"]+1
pass
except pymongo.errors.AutoReconnect:
logging.info("Error connecting sleeping for {}".format(pow(2, sleep)))
time.sleep(pow(2, sleep))
sleep += 1
logging.info("retrying...")

with open('language_count.pickle', 'wb') as file1:
pickle.dump(dic, file1, protocol=pickle.HIGHEST_PROTOCOL)

0 comments on commit 4ea29f5

Please sign in to comment.