-
Notifications
You must be signed in to change notification settings - Fork 0
/
sgfcount.py
50 lines (37 loc) · 1.62 KB
/
sgfcount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
############## Second Part ##########################
# Fix all_7M.sgf & all_8M.sgf IntegrityCheck issue in batch.log first then run this script
#
# nohup python3 sgfcount.py >sgfcount.log 2>&1 &
#
import os
from collections import deque
import re
import time
startTime = time.time()
trimedSGFsFolder = 'mSGFs'
trimedSGFsList = os.listdir('{}/'.format(trimedSGFsFolder))
countNFolder="count"
clip_dir="sgfClips"
if not os.path.exists(countNFolder) :
os.makedirs(countNFolder)
if not os.path.exists(clip_dir) :
os.makedirs(clip_dir)
for sgfs_name in trimedSGFsList:
### Count game number
os.system("grep '(;GM' {}/{} -n | grep 'RE' | cut -f1 -d: >{}/{}.txt".format(trimedSGFsFolder, sgfs_name, countNFolder, sgfs_name))
os.system("sed -n '$=' {}/{} >>{}/{}.txt".format(trimedSGFsFolder, sgfs_name, countNFolder, sgfs_name))
### Split into smaller size for less memory burden.
splitPiece = 20
end=list(range(splitPiece))
countFile = countNFolder+"/"+sgfs_name+".txt"
with open(countFile) as f:
r = re.search(r"\d+",str(deque(f,1)))
print(r.group())
countTotal = int(r.group())
for i in range(0,splitPiece):
end[i] = int(countTotal/splitPiece*min(splitPiece,i+1))
os.system("sed -n '{},{}p' {}/{} >{}/{}{}".format(1, end[0], trimedSGFsFolder, sgfs_name, clip_dir, sgfs_name,1))
for i in range(1,splitPiece):
os.system("sed -n '{},{}p' {}/{} >{}/{}{}".format(end[i-1]+1, end[i], trimedSGFsFolder, sgfs_name, clip_dir, sgfs_name,i+1))
countTime = time.time()
print("--- %s seconds --- counting Finished" % (countTime - startTime))