Skip to content

Commit

Permalink
Merge pull request #3 from akb89/develop
Browse files Browse the repository at this point in the history
Fixed bug with wikidump extraction
  • Loading branch information
akb89 authored Aug 22, 2018
2 parents a3522db + 885d288 commit 1f5b446
Showing 1 changed file with 4 additions and 11 deletions.
15 changes: 4 additions & 11 deletions nonce2vec/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,12 @@ def __init__(self, input_data, source):
if source != 'wiki' and source != 'nonces' and source != 'chimeras':
raise Exception('Invalid source parameter \'{}\''.format(source))
self._source = source
if source == 'wiki':
self._datadir = input_data
if source == 'nonces' or source == 'chimeras':
self._datafile = input_data
self._datafile = input_data

def _iterate_over_wiki(self):
for filename in os.listdir(self._datadir):
if filename.startswith('.'):
continue
with open(os.path.join(self._datadir, filename), 'rt') \
as input_stream:
for line in input_stream:
yield line.strip().split()
with open(self._datafile, 'rt') as input_stream:
for line in input_stream:
yield line.strip().split()

def _iterate_over_nonces(self):
with open(self._datafile, 'rt') as input_stream:
Expand Down

0 comments on commit 1f5b446

Please sign in to comment.