From 0abda56c91013ae6ba73346768942e1fe7c63e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dion=20H=C3=A4fner?= Date: Thu, 14 May 2020 14:07:21 +0200 Subject: [PATCH] optimize reading records --- fowd/processing.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fowd/processing.py b/fowd/processing.py index cd5244c..67f89c2 100644 --- a/fowd/processing.py +++ b/fowd/processing.py @@ -63,16 +63,13 @@ def read_pickled_record_chunks(input_file): def read_pickle_outfile(input_file): reader = read_pickled_record_chunks(input_file) - try: - out = next(reader) - except StopIteration: - return {} + records = collections.defaultdict(list) for row in reader: - assert set(out.keys()) == set(row.keys()) for key, val in row.items(): - out[key] = np.concatenate((out[key], val)) + records[key].append(val) + out = {key: np.concatenate(val) for key, val in records.items()} return out