Skip to content

Commit

Permalink
Merge pull request #17517 from astrovsky01/add_npy
Browse files Browse the repository at this point in the history
add npy datatype
  • Loading branch information
jdavcs authored Feb 26, 2024
2 parents c00a47a + 97f8782 commit d5bfe05
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 0 deletions.
2 changes: 2 additions & 0 deletions lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@
<datatype extension="ktab" type="galaxy.datatypes.binary:Binary" subclass="true" description="A table of canonical k‑mers and their counts for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>
<datatype extension="hist" type="galaxy.datatypes.binary:Binary" subclass="true" description="A binary histogram file of kmers and frequencies for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>
<datatype extension="prof" type="galaxy.datatypes.binary:Binary" subclass="true" description="Read profile file for the fastk toolkit." display_in_upload="true" description_url="https://github.com/thegenemyers/FASTK?tab=readme-ov-file#file-encodings"/>
<datatype extension="npy" type="galaxy.datatypes.binary:Numpy" description="Standard format for saving numpy arrays" display_in_upload="true" description_url="https://numpy.org/devdocs/reference/generated/numpy.lib.format.html"/>

<!-- ISA data types -->
<datatype extension="isa-tab" type="galaxy.datatypes.isa:IsaTab" mimetype="application/isa-tools" display_in_upload="true" description="ISA-Tab data type." description_url="https://isa-tools.org"/>
Expand Down Expand Up @@ -1075,6 +1076,7 @@
<sniffer type="galaxy.datatypes.binary:Edr"/>
<sniffer type="galaxy.datatypes.binary:Vel"/>
<sniffer type="galaxy.datatypes.binary:Xlsx"/>
<sniffer type="galaxy.datatypes.binary:Numpy"/>
<sniffer type="galaxy.datatypes.qiime2:QIIME2Metadata"/>
<sniffer type="galaxy.datatypes.qiime2:QIIME2Artifact"/>
<sniffer type="galaxy.datatypes.qiime2:QIIME2Visualization"/>
Expand Down
55 changes: 55 additions & 0 deletions lib/galaxy/datatypes/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -4538,3 +4538,58 @@ def display_peek(self, dataset: DatasetProtocol) -> str:
return dataset.peek
except Exception:
return f"Binary FITS file size ({nice_size(dataset.get_size())})"


@build_sniff_from_prefix
class Numpy(Binary):
"""
Class defining a numpy data file
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.npy')
>>> Numpy().sniff(fname)
True
"""

file_ext = "npy"

MetadataElement(
name="version_str",
default="",
param=MetadataParameter,
desc="Version string for the numpy file format",
readonly=True,
visible=True,
no_value=0,
optional=True,
)

def _numpy_version_string(self, filename):
magic_string = open(filename, "rb").read(8)
version_str = f"{magic_string[6]}.{magic_string[7]}"
return version_str

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
try:
dataset.metadata.version_str = self._numpy_version_string(dataset.get_file_name())
except Exception as e:
log.warning("%s, set_meta Exception: %s", self, e)

def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
# The first 6 bytes of any numpy file is '\x93NUMPY', with following bytes for version
# number of file formats, and info about header data. The rest of the file contains binary data.
return file_prefix.startswith_bytes(b"\x93NUMPY")

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = f"Binary numpy file version {dataset.metadata.version_str}"
dataset.blurb = nice_size(dataset.get_size())
else:
dataset.peek = "file does not exist"
dataset.blurb = "file purged from disk"

def display_peek(self, dataset: DatasetProtocol) -> str:
try:
return dataset.peek
except Exception:
return "Binary numpy file (%s)" % (nice_size(dataset.get_size()))
Binary file added lib/galaxy/datatypes/test/test.npy
Binary file not shown.

0 comments on commit d5bfe05

Please sign in to comment.