-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathTDLindexer.py
236 lines (202 loc) · 9.77 KB
/
TDLindexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
"""
The TDL indexer discovers all of the files in/under the provided source
directory and builds a list of all of the filenames. It then uses this
list to perform the following:
- Derive DOS-friendly filenames from the discovered files
- Copy the discovered files to the destination directory provided, using the DOS-friendly filenames as the destination filenames
- Build indexes that allow the DOS TDL to work faster.
For more information on the index file formats this tool generates, consult
index_formats.txt.
This is my very first Python project. All mockery and jeers can be
directed to [email protected], although it would be more helpful
to the project if you could fix my novice coding and make this program better.
"""
import sys
import os
import shutil
import struct
import hashlib
import re
from zipfile import ZipFile
def scantree_files(path):
# Recursively yield DirEntry objects for given directory
for entry in os.scandir(path):
if entry.is_dir(follow_symlinks=False):
yield from scantree_files(entry.path)
else:
yield entry
# Cleans filenames for safer matching
def clean_name(name):
# Replace any number of spaces with _
name = re.sub(r'\s+', '_', name)
# Allow letters, numbers, and meaningful punctuation
return re.sub(r'[^a-zA-Z0-9_!$]', '', name)
def index(outputDir, scriptDir, fullnameToGameDir, isDebug, preExtractGames, logger):
sourceDir = os.path.join(outputDir, 'games')
destDir = os.path.join(outputDir, 'tdlprocessed')
gamesDataTempDir = os.path.join(outputDir, 'games-data')
distroDir = os.path.join(scriptDir, 'data', 'mister', 'distro')
filesIDX = distroDir + '/FILES.IDX'
titlesIDX = distroDir + '/TITLES.IDX'
filesDir = destDir + '/files/'
if not os.path.exists(distroDir):
with ZipFile(os.path.join(scriptDir, 'data', 'mister', 'distro.zip'), 'r') as zipFile:
# Extract distro (compressed to preserve file handlings)
logger.log(" unzipping distro.zip")
zipFile.extractall(path=os.path.join(scriptDir, 'data', 'mister'))
sourceFiles = [] # Source filenames with full paths and extensions (sorted)
baseFiles = [] # Source filenames with extensions (no paths)
titles = [] # Source filenames without paths or extensions
DOSnames = [] # titles() converted to 8.3-friendly DOS names
foundfiles = list(scantree_files(sourceDir))
if isDebug:
logger.log(" Indexing %i files" % len(foundfiles))
if len(foundfiles) > 32767:
logger.log(" Fatal: Current design of DOS TDL does not support more than 32767 files.", logger.ERROR)
return
if len(foundfiles) > 16383:
logger.log(' Warning: This many files may cause the DOS TDL to operate slower than normal\n'
' due to the titles index not being able to be cached in memory. TDL will still\n'
' run, but might require a very fast I/O device for acceptable speed.', logger.WARNING)
# Sort discovered files by their filename, case insensitive. Additional
# sort criteria may be added in the future, but I lack the skills to do so,
# someone please help me! Ideally I would like add an option to sort
# on something that can be regex'd, like (1983) or [Adventure].
sfoundfiles = sorted(foundfiles, key=lambda dirent: dirent.name.lower())
for entry in sfoundfiles:
sourceFiles.append(entry.path)
fname = entry.name
baseFiles.append(fname)
tmptitle = fname.rsplit(sep='.', maxsplit=1)[0]
tmptitle = tmptitle.encode('ascii', 'backslashreplace').decode()
titles.append(tmptitle)
# if isDebug:
# logger.log(" First 5 files found were:")
# logger.log(baseFiles[0:5] + "\n")
# logger.log(" First 5 titles found were:")
# logger.log(titles[0:5] + "\n")
# # logger.log ("Last 5 files found were:")
# # logger.log(baseFiles[-5:],"\n")
# # logger.log ("Last 5 titles found were:")
# # logger.log(titles[-5:],"\n")
logger.log(" Converting to DOS-friendly 8.3 filenames")
dosNameToLongname = dict()
for idx, longname in enumerate(baseFiles):
base_name = longname.replace('.zip', '')
if base_name.startswith('('):
# For 'custom' files starting with -, we just remove all the bits of the filename that aren't
# valid DOS chars. We assume there won't be any conflicts here.
cleaned_name = '(' + re.sub(r'[^a-zA-Z0-9]', '', base_name).upper()
if len(cleaned_name) > 8:
cleaned_name = cleaned_name[0:8]
DOSnames.append(f"{cleaned_name}.ZIP")
dosNameToLongname[f"{cleaned_name}.ZIP"] = longname
else:
if base_name not in fullnameToGameDir:
logger.log(" Unknown game %s no corresponding shortname found" % longname, logger.ERROR)
else:
dname = fullnameToGameDir[base_name].upper()
DOSnames.append(f"{dname}.ZIP")
dosNameToLongname[f"{dname}.ZIP"] = longname
# DOSnames.append(dname)
# dosNameToLongname[dname] = longname
# if isDebug:
# logger.log(" first 5 DOS-friendly filenames are:")
# logger.log(" " + DOSnames[0:5] + "\n")
# logger.log(" Last 5 DOS-friendly filenames are:")
# logger.log(" " + DOSnames[-5:] + "\n")
# refer to index_formats.txt for info on what is being generated for all the index files, and why
logger.log(" Generating files index...")
f = open(filesIDX, 'wb')
f.write(struct.pack('<H', len(DOSnames)))
for idx, fname in enumerate(DOSnames):
f.write(struct.pack('<H', idx))
f.write(str.encode(fname[0:12].ljust(12, "\x00")))
f.close()
# Need to generate this index:
"""
Title Index format (from index_formats.txt):
numEntries: 16-bit word of how many titles we have available
REPEAT (This structure repeats numEntries times)
titleOfs: 32-bit word of offset where each variable-length
record starts
END
REPEAT (This structure repeats numEntries times)
titleID: 16-bit word
titleHash: 16 bytes of the MD5 hash of the title string
titleLen: 1 byte of length of title string
titleStr: titleLen characters of title string
END
"""
# This index generation method is fugly -- avert thine eyes
# There is likely a very elegant way to do this using tuples or something
# but this is my first python program so I'll figure it out later
logger.log(" Generating titles index...")
f = open(titlesIDX, 'wb')
f.write(struct.pack('<H', len(titles)))
# build list of offsets
toffsets = []
curofs = 2 + (len(titles) * 4) # real starting offset is past the offset structure itself
for tlen in titles:
toffsets.append(curofs)
curofs = curofs + (2 + 16 + 1 + len(tlen))
# dump offsets to index file
for tmpofs in toffsets:
f.write(struct.pack('<L', tmpofs))
for idx, name in enumerate(titles):
# write titleID
f.write(struct.pack('<H', idx))
# write titleHash
thash = hashlib.md5(name.encode()).digest()
f.write(thash)
# write titleLen
f.write(struct.pack('B', len(name)))
# write title itself
f.write(name.encode())
f.close()
# Create mapping table so the user can weed things out and try again.
# For example, it would be a good idea to not put any "porn" or "adult"
# games on a system at a show/convention or out on the museum floor.
f = open("name_mapping.txt", 'w', newline='\r\n')
for idx, shortn in enumerate(DOSnames):
f.write(shortn + ' ;' + titles[idx] + '\n')
f.close()
"""
Copy everything over to the destination.
Also copy the TDL itself, the index files, tools needed, etc.
"""
if os.path.exists(filesDir):
logger.log(
' Output directory %s already exists.\nPlease specify a non-existent directory for the destination.' % destDir)
sys.exit(1)
logger.log(" Copying games zip from " + sourceDir + " to " + destDir + ", this might take a while...")
shutil.copytree(distroDir, destDir)
if not os.path.exists(filesDir):
os.makedirs(filesDir)
# Copy source:longfilenames to destination:shortfilenames
for i in range(len(DOSnames)):
if isDebug:
logger.log(" " + DOSnames[i])
shutil.copy(sourceFiles[i], filesDir + DOSnames[i])
logger.log(" All games indexed")
# Create sub games dir
os.mkdir(os.path.join(destDir, 'games'))
# Handle pre extract case
if preExtractGames:
# Move content of games/game.pc dir to destDir/games
for dosZipName in dosNameToLongname:
if dosZipName not in ['(MANUALL.ZIP', '(UTILITI.ZIP']:
longCleanName = dosNameToLongname[dosZipName]
gameDataDir = os.path.join(gamesDataTempDir, os.path.splitext(longCleanName)[0])
if os.path.exists(gameDataDir):
# preExtractFolder = os.path.join(destDir, 'games', os.path.splitext(dosZipName)[0])
shutil.move(gameDataDir, os.path.join(destDir, 'games', os.path.splitext(dosZipName)[0]))
else:
logger.log(' Pre-extracted game data no found for %s / %s'
% (dosZipName, dosNameToLongname[dosZipName]), logger.ERROR)
else:
os.mkdir(os.path.join(destDir, 'games', os.path.splitext(dosZipName)[0]))
with ZipFile(os.path.join(destDir, 'files', dosZipName), 'r') as zipFile:
zipFile.extractall(path=os.path.join(destDir, 'games', os.path.splitext(dosZipName)[0]))
# delete game-data
shutil.rmtree(gamesDataTempDir)