BEL-Public · mscf · Apr 14, 2020 · Apr 6, 2020 · Apr 8, 2020 · ViridianForge
diff --git a/mffpy/bin_writer.py b/mffpy/bin_writer.py
@@ -13,8 +13,9 @@
 ANY KIND, either express or implied.
 """
 from os import SEEK_SET
-from io import BytesIO
-from typing import List, Union
+from io import BytesIO, FileIO
+from typing import List, Union, IO
+from os.path import join
 
 import numpy as np
 
@@ -25,8 +26,7 @@
     compute_header_byte_size
 )
 
-
-class BinWriter:
+class BinWriter(object):
 
     default_filename = 'signal1.bin'
     default_info_filename = 'info1.xml'
@@ -44,7 +44,7 @@ def __init__(self, sampling_rate: int, data_type: str = 'EEG'):
         self.data_type = data_type
         self.sampling_rate = sampling_rate
         self.header: Union[HeaderBlock, None] = None
-        self.stream = BytesIO()
+        self.stream: Union[IO[bytes], FileIO] = BytesIO()
         self.epochs: List[Epoch] = []
 
     @property
@@ -134,7 +134,38 @@ def write(self, filename: str, *args, **kwargs):
         # *args, **kwargs are ignored
         self.stream.seek(0, SEEK_SET)
         byts = self.stream.read()
+        assert isinstance(byts, bytes) 
         with open(filename, 'wb') as fo:
             num_written = fo.write(byts)
         assert num_written == len(byts), f"""
         Wrote {num_written} bytes (expected {len(byts)})"""
+
+class StreamingBinWriter(BinWriter):
+
+    """
+    Subclass of BinWriter to support streaming bin file to disk.
+    """
+
+    def __init__(self, sampling_rate: int, mffdir: str, data_type: str = 'EEG'):
+        """
+
+        **Parameters**
+
+        * **`sampling_rate`**: sampling rate of all channels.  Sampling rate
+        has to fit in a 3-byte integer.  See docs in `mffpy.header_block`.
+
+        * **`data_type`**: name of the type of signal.
+
+        * **`mffdir`**: directory of the mff recording to stream data to.
+
+        Note: Because we are streaming the recording to disk, the folder into which it
+        is to be saved must have been created prior to the initialization of this class.
+        """
+
+        super().__init__(sampling_rate, data_type)
+        self.stream = FileIO(join(mffdir, self.default_filename), mode='w')
+
+    def write(self, filename: str, *args, **kwargs):
+        # Because the recording has been streamed to a file, all that is required 
+        # here is closing the stream
+        self.stream.close()
diff --git a/mffpy/header_block.py b/mffpy/header_block.py
@@ -42,8 +42,9 @@
 
 import struct
 from os import SEEK_CUR
-from typing import IO
+from typing import IO, Union
 from collections import namedtuple
+from io import FileIO
 
 import numpy as np
 
@@ -141,7 +142,7 @@ def skip(n: int):
     )
 
 
-def write_header_block(fp: IO[bytes], hdr: HeaderBlock):
+def write_header_block(fp: Union[IO[bytes], FileIO], hdr: HeaderBlock):
     """write HeaderBlock `hdr` to file pointer `fp`"""
     fp.write(struct.pack('4i',
                          1, hdr.header_size, hdr.block_size, hdr.num_channels))

diff --git a/mffpy/tests/test_devices.py b/mffpy/tests/test_devices.py
@@ -45,6 +45,6 @@ def test_devices(device):
         for i, (_, props) in enumerate(coords.sensors.items())
     ], dtype=np.float)
     device = basename(splitext(device)[0]) if exists(device) else device
-    expected = np.load(join(resources_dir, 'testing', device+'.npy'))
+    expected = np.load(join(resources_dir, 'testing', device+'.npy'), allow_pickle=True)
     assert locs.shape == expected.shape
     assert locs == pytest.approx(expected)
diff --git a/mffpy/tests/test_writer.py b/mffpy/tests/test_writer.py
@@ -15,13 +15,14 @@
 from datetime import datetime
 from os import makedirs, rmdir, remove
 from os.path import join
+from shutil import rmtree
 
 import pytest
 import json
 import numpy as np
 
 from ..writer import Writer
-from ..bin_writer import BinWriter
+from ..bin_writer import BinWriter, StreamingBinWriter
 from ..reader import Reader
 from ..xml_files import XML
 
@@ -110,3 +111,50 @@ def test_writer_exports_JSON():
         remove(filename)
     except BaseException:
         raise AssertionError(f"""Clean-up failed of '{filename}'.""")
+
+def test_streaming_writer_receives_bad_init_data():
+    """Test bin writer fails when initialized with non-int sampling rate"""
+    dirname = 'testdir.mff'
+    makedirs(dirname)
+    StreamingBinWriter(100, mffdir=dirname)
+    with pytest.raises(AssertionError):
+        StreamingBinWriter(100.0, mffdir=dirname)
+    rmtree(dirname)
+
+def test_streaming_writer_writes():
+    dirname = 'testdir3.mff'
+    # create some data and add it to a binary writer
+    device = 'HydroCel GSN 256 1.0'
+    num_samples = 10
+    num_channels = 256
+    sampling_rate = 128
+    # create an mffpy.Writer and add a file info, and the binary file
+    writer = Writer(dirname)
+    writer.create_directory()
+    bin_writer = StreamingBinWriter(sampling_rate=sampling_rate, data_type='EEG', mffdir=dirname)
+    data = np.random.randn(num_channels, num_samples).astype(np.float32)
+    bin_writer.add_block(data)
+    startdatetime = datetime.strptime(
+        '1984-02-18T14:00:10.000000+0100', XML._time_format)
+    writer.addxml('fileInfo', recordTime=startdatetime)
+    writer.add_coordinates_and_sensor_layout(device)
+    writer.addbin(bin_writer)
+    writer.write()
+    # read it again; compare the result
+    reader = Reader(dirname)
+    assert reader.startdatetime == startdatetime
+    # Read binary data and compare
+    read_data = reader.get_physical_samples_from_epoch(reader.epochs[0])
+    assert 'EEG' in read_data
+    read_data, t0 = read_data['EEG']
+    assert t0 == 0.0
+    assert read_data == pytest.approx(data)
+    layout = reader.directory.filepointer('sensorLayout')
+    layout = XML.from_file(layout)
+    assert layout.name == device
+    # cleanup
+    try:
+        rmtree(dirname)
+    except BaseException:
+        raise AssertionError(f"""
+        Clean-up failed of '{dirname}'.  Were additional files written?""")
diff --git a/mffpy/writer.py b/mffpy/writer.py
@@ -21,11 +21,11 @@
 
 from .dict2xml import dict2xml
 from .xml_files import XML
-from .bin_writer import BinWriter
+from .bin_writer import BinWriter, StreamingBinWriter
 from .devices import coordinates_and_sensor_layout
 import json
 
-__all__ = ['Writer', 'BinWriter']
+__all__ = ['Writer', 'BinWriter', 'StreamingBinWriter']
 
 
 class Writer:
@@ -34,25 +34,32 @@ def __init__(self, filename: str):
         self.filename = filename
         self.files: Dict[str, Any] = {}
         self._bin_file_added = False
+        self.mffdir, self.ext = splitext(self.filename)
+        self.mffdir += '.mff'
+        self.file_created = False
+
+    def create_directory(self):
+        """Creates the directory for the recording."""
+        if not self.file_created:
+            makedirs(self.mffdir, exist_ok=False)
+            self.file_created = True
 
     def write(self):
         """write contents to .mff/.mfz file"""
-        # create .mff directory
-        mffdir, ext = splitext(self.filename)
-        mffdir += '.mff'
-        makedirs(mffdir, exist_ok=False)
+
+        self.create_directory()
 
         # write .xml/.bin files.  For .xml files we need to set the default
         # namespace to avoid `ns0:` being prepended to each tag.
         for filename, (content, typ) in self.files.items():
             if '.xml' == splitext(filename)[1]:
                 ET.register_namespace('', typ._xmlns[1:-1])
-            content.write(join(mffdir, filename), encoding='UTF-8',
+            content.write(join(self.mffdir, filename), encoding='UTF-8',
                           xml_declaration=True, method='xml')
 
         # convert from .mff to .mfz
-        if ext == '.mfz':
-            check_output(['mff2mfz.py', mffdir])
+        if self.ext == '.mfz':
+            check_output(['mff2mfz.py', self.mffdir])
 
     def export_to_json(self, data):
         """export data to .json file"""