Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Neuralynx file header processing #1562

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions neo/rawio/neuralynxrawio/ncssections.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@
import math
import numpy as np

from enum import IntEnum, auto


class AcqType(IntEnum):
PRE4 = auto()
BML = auto()
DIGITALLYNX = auto()
DIGITALLYNXSX = auto()
CHEETAH64 = auto()
RAWDATAFILE = auto()
CHEETAH560 = auto()
ATLAS = auto()
UNKNOWN = auto()

class NcsSections:
"""
Expand Down Expand Up @@ -250,7 +263,7 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=Tru
acqType = nlxHdr.type_of_recording()
freq = nlxHdr["sampling_rate"]

if acqType == "PRE4":
if acqType == AcqType.PRE4:
# Old Neuralynx style with truncated whole microseconds for actual sampling. This
# restriction arose from the sampling being based on a master 1 MHz clock.
microsPerSampUsed = math.floor(NcsSectionsFactory.get_micros_per_samp_for_freq(freq))
Expand All @@ -266,7 +279,8 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=Tru
ncsSects.sampFreqUsed = sampFreqUsed
ncsSects.microsPerSampUsed = microsPerSampUsed

elif acqType in ["DIGITALLYNX", "DIGITALLYNXSX", "CHEETAH64", "CHEETAH560", "RAWDATAFILE"]:
elif acqType in [AcqType.DIGITALLYNX, AcqType.DIGITALLYNXSX, AcqType.CHEETAH64,
AcqType.CHEETAH560, AcqType.RAWDATAFILE]:
# digital lynx style with fractional frequency and micros per samp determined from block times
if gapTolerance is None:
if strict_gap_mode:
Expand All @@ -293,7 +307,7 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=Tru
ncsSects.sampFreqUsed = sampFreqUsed
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(sampFreqUsed)

elif acqType == "BML" or acqType == "ATLAS":
elif acqType == AcqType.BML or acqType == AcqType.ATLAS:
# BML & ATLAS style with fractional frequency and micros per samp
if strict_gap_mode:
# this is the old behavior, maybe we could put 0.9 sample interval no ?
Expand Down
258 changes: 142 additions & 116 deletions neo/rawio/neuralynxrawio/nlxheader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import re
from collections import OrderedDict

from neo.rawio.neuralynxrawio.ncssections import AcqType


class NlxHeader(OrderedDict):
"""
Expand All @@ -22,7 +24,9 @@ def _to_bool(txt):
else:
raise Exception("Can not convert %s to bool" % txt)

# keys that may be present in header which we parse
# Keys that may be present in header which we parse. First entry of tuple is what is
# present in header, second entry is key which will be used in dictionary, third entry
# type the value will be converted to.
txt_header_keys = [
("AcqEntName", "channel_names", None), # used
("FileType", "", None),
Expand Down Expand Up @@ -68,69 +72,6 @@ def _to_bool(txt):
("NLX_Base_Class_Type", "", None), # in version 4 and earlier versions of Cheetah
]

# Filename and datetime may appear in header lines starting with # at
# beginning of header or in later versions as a property. The exact format
# used depends on the application name and its version as well as the
# -FileVersion property.
#
# There are 4 styles understood by this code and the patterns used for parsing
# the items within each are stored in a dictionary. Each dictionary is then
# stored in main dictionary keyed by an abbreviation for the style.
header_pattern_dicts = {
# BML
"bml": dict(
datetime1_regex=r"## Time Opened: \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%y %H:%M:%S.%f",
),
# Cheetah after version 1 and before version 5
"bv5": dict(
datetime1_regex=r"## Time Opened: \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S.%f",
),
# Cheetah version 5.4.0
"v5.4.0": dict(
datetime1_regex=r"## Time Opened \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
datetime2_regex=r"## Time Closed \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S.%f",
),
# Cheetah version 5.6.0, some range of versions in between
"v5.6.0": dict(
datetime1_regex=r"## Time Opened: \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S.%f",
),
# Cheetah version 5 before and including v 5.6.4 as well as version 1
"bv5.6.4": dict(
datetime1_regex=r"## Time Opened \(m/d/y\): (?P<date>\S+)" r" \(h:m:s\.ms\) (?P<time>\S+)",
datetime2_regex=r"## Time Closed \(m/d/y\): (?P<date>\S+)" r" \(h:m:s\.ms\) (?P<time>\S+)",
filename_regex=r"## File Name (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S.%f",
),
"neuraview2": dict(
datetime1_regex=r"## Date Opened: \(mm/dd/yyy\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
datetime2_regex=r"## Date Closed: \(mm/dd/yyy\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S",
),
"peg": dict(
datetime1_regex=r"-TimeCreated (?P<date>\S+) (?P<time>\S+)",
datetime2_regex=r"-TimeClosed (?P<date>\S+) (?P<time>\S+)",
filename_regex=r'-OriginalFileName "?(?P<filename>\S+)"?',
datetimeformat=r"%Y/%m/%d %H:%M:%S",
datetime2format=r"%Y/%m/%d %H:%M:%S.%f",
),
# Cheetah after v 5.6.4 and default for others such as Pegasus
"def": dict(
datetime1_regex=r"-TimeCreated (?P<date>\S+) (?P<time>\S+)",
datetime2_regex=r"-TimeClosed (?P<date>\S+) (?P<time>\S+)",
filename_regex=r'-OriginalFileName "?(?P<filename>\S+)"?',
datetimeformat="%Y/%m/%d %H:%M:%S",
),
}

def __init__(self, filename, props_only=False):
"""
Factory function to build NlxHeader for a given file.
Expand All @@ -148,6 +89,10 @@ def __init__(self, filename, props_only=False):
ValueError("Neuralynx files must start with 8 # characters.")

self.read_properties(filename, txt_header)
numChidEntries = self.convert_channel_ids_names(filename)
self.setApplicationAndVersion()
self.setBitToMicroVolt()
self.setInputRanges(numChidEntries)

if not props_only:
self.readTimeDate(txt_header)
Expand Down Expand Up @@ -183,25 +128,31 @@ def read_properties(self, filename, txt_header):
if type_ is not None:
value = type_(value)
self[name] = value
# if channel_ids or s not in self then the filename is used
name = os.path.splitext(os.path.basename(filename))[0]
# convert channel ids
if "channel_ids" in self:
chid_entries = re.findall(r"\S+", self["channel_ids"])
self["channel_ids"] = [int(c) for c in chid_entries]
else:
self["channel_ids"] = ["unknown"]
# convert channel names
if "channel_names" in self:
name_entries = re.findall(r"\S+", self["channel_names"])
if len(name_entries) == 1:
self["channel_names"] = name_entries * len(self["channel_ids"])
assert len(self["channel_names"]) == len(
self["channel_ids"]
), "Number of channel ids does not match channel names."
else:
self["channel_names"] = ["unknown"] * len(self["channel_ids"])
# version and application name

def setInputRanges(self, numChidEntries):
if "InputRange" in self:
ir_entries = re.findall(r"\w+", self["InputRange"])
if len(ir_entries) == 1:
self["InputRange"] = [int(ir_entries[0])] * numChidEntries
else:
self["InputRange"] = [int(e) for e in ir_entries]
assert len(self["InputRange"]) == numChidEntries, \
"Number of channel ids does not match input range values."

def setBitToMicroVolt(self):
# convert bit_to_microvolt
if "bit_to_microVolt" in self:
btm_entries = re.findall(r"\S+", self["bit_to_microVolt"])
if len(btm_entries) == 1:
btm_entries = btm_entries * len(self["channel_ids"])
self["bit_to_microVolt"] = [float(e) * 1e6 for e in btm_entries]
assert len(self["bit_to_microVolt"]) == len( self["channel_ids"]), \
"Number of channel ids does not match bit_to_microVolt conversion factors."

def setApplicationAndVersion(self):
"""
Set "ApplicationName" property and app_version attribute based on existing properties
"""
# older Cheetah versions with CheetahRev property
if "CheetahRev" in self:
assert "ApplicationName" not in self
Expand All @@ -214,35 +165,111 @@ def read_properties(self, filename, txt_header):
assert len(match) == 1, "impossible to find application name and version"
self["ApplicationName"], app_version = match[0]
# BML Ncs file contain neither property, but 'NLX_Base_Class_Type'
elif "NLX_Base_Class_Type" in txt_header:
elif "NLX_Base_Class_Type" in self:
self["ApplicationName"] = "BML"
app_version = "2.0"
# Neuraview Ncs file contained neither property nor
# NLX_Base_Class_Type information
# Neuraview Ncs file contained neither property nor NLX_Base_Class_Type information
else:
self["ApplicationName"] = "Neuraview"
app_version = "2"

if " Development" in app_version:
app_version = app_version.replace(" Development", ".dev0")

self["ApplicationVersion"] = Version(app_version)
# convert bit_to_microvolt
if "bit_to_microVolt" in self:
btm_entries = re.findall(r"\S+", self["bit_to_microVolt"])
if len(btm_entries) == 1:
btm_entries = btm_entries * len(self["channel_ids"])
self["bit_to_microVolt"] = [float(e) * 1e6 for e in btm_entries]
assert len(self["bit_to_microVolt"]) == len(

def convert_channel_ids_names(self, filename):
"""
Convert channel ids and channel name properties, if present.

:return number of channel id entries
"""
# if channel_ids or names not in self then the filename is used for channel name
name = os.path.splitext(os.path.basename(filename))[0]

# convert channel ids
if "channel_ids" in self:
chid_entries = re.findall(r"\S+", self["channel_ids"])
self["channel_ids"] = [int(c) for c in chid_entries]
else:
self["channel_ids"] = ["unknown"]
chid_entries = []

# convert channel names
if "channel_names" in self:
name_entries = re.findall(r"\S+", self["channel_names"])
if len(name_entries) == 1:
self["channel_names"] = name_entries * len(self["channel_ids"])
assert len(self["channel_names"]) == len(
self["channel_ids"]
), "Number of channel ids does not match bit_to_microVolt conversion factors."
if "InputRange" in self:
ir_entries = re.findall(r"\w+", self["InputRange"])
if len(ir_entries) == 1:
self["InputRange"] = [int(ir_entries[0])] * len(chid_entries)
else:
self["InputRange"] = [int(e) for e in ir_entries]
assert len(self["InputRange"]) == len(
chid_entries
), "Number of channel ids does not match input range values."
), "Number of channel ids does not match channel names."
else:
self["channel_names"] = ["unknown"] * len(self["channel_ids"])

return len(chid_entries)

# Filename and datetime may appear in header lines starting with # at
# beginning of header or in later versions as a property. The exact format
# used depends on the application name and its version as well as the
# -FileVersion property.
#
# There are 4 styles understood by this code and the patterns used for parsing
# the items within each are stored in a dictionary. Each dictionary is then
# stored in main dictionary keyed by an abbreviation for the style.
header_pattern_dicts = {
# BML
"bml": dict(
datetime1_regex=r"## Time Opened: \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%y %H:%M:%S.%f",
),
# Cheetah after version 1 and before version 5
"bv5": dict(
datetime1_regex=r"## Time Opened: \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S.%f",
),
# Cheetah version 5.4.0
"v5.4.0": dict(
datetime1_regex=r"## Time Opened \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
datetime2_regex=r"## Time Closed \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S.%f",
),
# Cheetah version 5.6.0, some range of versions in between
"v5.6.0": dict(
datetime1_regex=r"## Time Opened: \(m/d/y\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S.%f",
),
# Cheetah version 5 before and including v 5.6.4 as well as version 1
"bv5.6.4": dict(
datetime1_regex=r"## Time Opened \(m/d/y\): (?P<date>\S+)" r" \(h:m:s\.ms\) (?P<time>\S+)",
datetime2_regex=r"## Time Closed \(m/d/y\): (?P<date>\S+)" r" \(h:m:s\.ms\) (?P<time>\S+)",
filename_regex=r"## File Name (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S.%f",
),
"neuraview2": dict(
datetime1_regex=r"## Date Opened: \(mm/dd/yyy\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
datetime2_regex=r"## Date Closed: \(mm/dd/yyy\): (?P<date>\S+)" r" At Time: (?P<time>\S+)",
filename_regex=r"## File Name: (?P<filename>\S+)",
datetimeformat="%m/%d/%Y %H:%M:%S",
),
"peg": dict(
datetime1_regex=r"-TimeCreated (?P<date>\S+) (?P<time>\S+)",
datetime2_regex=r"-TimeClosed (?P<date>\S+) (?P<time>\S+)",
filename_regex=r'-OriginalFileName "?(?P<filename>\S+)"?',
datetimeformat=r"%Y/%m/%d %H:%M:%S",
datetime2format=r"%Y/%m/%d %H:%M:%S.%f",
),
# Cheetah after v 5.6.4 and default for others such as Pegasus
"def": dict(
datetime1_regex=r"-TimeCreated (?P<date>\S+) (?P<time>\S+)",
datetime2_regex=r"-TimeClosed (?P<date>\S+) (?P<time>\S+)",
filename_regex=r'-OriginalFileName "?(?P<filename>\S+)"?',
datetimeformat="%Y/%m/%d %H:%M:%S",
),
}

def readTimeDate(self, txt_header):
"""
Expand Down Expand Up @@ -320,51 +347,50 @@ def type_of_recording(self):
"""
Determines type of recording in Ncs file with this header.

RETURN:
one of 'PRE4','BML','DIGITALLYNX','DIGITALLYNXSX','UNKNOWN'
RETURN: NcsSections.AcqType
"""

if "NLX_Base_Class_Type" in self:

# older style standard neuralynx acquisition with rounded sampling frequency
if self["NLX_Base_Class_Type"] == "CscAcqEnt":
return "PRE4"
return AcqType.PRE4

# BML style with fractional frequency and microsPerSamp
elif self["NLX_Base_Class_Type"] == "BmlAcq":
return "BML"
return AcqType.BML

else:
return "UNKNOWN"
return AcqType.UNKNOWN

elif "HardwareSubSystemType" in self:

# DigitalLynx
if self["HardwareSubSystemType"] == "DigitalLynx":
return "DIGITALLYNX"
return AcqType.DIGITALLYNX

# DigitalLynxSX
elif self["HardwareSubSystemType"] == "DigitalLynxSX":
return "DIGITALLYNXSX"
return AcqType.DIGITALLYNXSX

# Cheetah64
elif self["HardwareSubSystemType"] == "Cheetah64":
return "CHEETAH64"
return AcqType.CHEETAH64

# RawDataFile
elif self["HardwareSubSystemType"] == "RawDataFile":
return "RAWDATAFILE"
return AcqType.RAWDATAFILE

else:
return "UNKNOWN"
return AcqType.UNKNOWN

elif "FileType" in self:

if "FileVersion" in self and self["FileVersion"] in ["3.2", "3.3", "3.4"]:
return self["AcquisitionSystem"].split()[1].upper()
return AcqType[self["AcquisitionSystem"].split()[1].upper()]

else:
return "CHEETAH560" # only known case of FileType without FileVersion
return AcqType.CHEETAH560 # only known case of FileType without FileVersion

else:
return "UNKNOWN"
return AcqType.UNKNOWN
Loading
Loading