Skip to content

Commit

Permalink
Replace file_name property with get_file_name function
Browse files Browse the repository at this point in the history
  • Loading branch information
SergeyYakubov authored and jdavcs committed Nov 8, 2023
1 parent cc6f437 commit c30a4c0
Show file tree
Hide file tree
Showing 97 changed files with 517 additions and 480 deletions.
2 changes: 1 addition & 1 deletion doc/source/admin/jobs.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def ncbi_blastn_wrapper(job):
# Allocate extra time
inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] )
inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] )
query_file = inp_data[ "query" ].file_name
query_file = inp_data[ "query" ].get_file_name()
query_size = os.path.getsize( query_file )
if query_size > 1024 * 1024:
walltime_str = "walltime=24:00:00/"
Expand Down
8 changes: 4 additions & 4 deletions doc/source/dev/data_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ we're setting metadata about the file.

```python
def set_meta(self, dataset, **kwd):
dataset.metadata.number_of_sequences = self._count_genbank_sequences(dataset.file_name)
dataset.metadata.number_of_sequences = self._count_genbank_sequences(dataset.get_file_name())
```

Now we'll need to make use of this in our `set_peek`
Expand All @@ -297,7 +297,7 @@ override:
else:
dataset.blurb = "%s sequences" % dataset.metadata.number_of_sequences
# Get standard text peek from dataset
dataset.peek = data.get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
dataset.peek = data.get_file_peek(dataset.get_file_name(), is_multi_byte=is_multi_byte)
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
Expand Down Expand Up @@ -332,7 +332,7 @@ class Genbank(data.Text):
else:
dataset.blurb = "%s sequences" % dataset.metadata.number_of_sequences
# Get
dataset.peek = data.get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
dataset.peek = data.get_file_peek(dataset.get_file_name(), is_multi_byte=is_multi_byte)
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
Expand All @@ -348,7 +348,7 @@ class Genbank(data.Text):
"""
Set the number of sequences in dataset.
"""
dataset.metadata.number_of_sequences = self._count_genbank_sequences(dataset.file_name)
dataset.metadata.number_of_sequences = self._count_genbank_sequences(dataset.get_file_name())

def _count_genbank_sequences(self, filename):
"""
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/celery/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def change_datatype(
log.info(f"Changing datatype is not allowed for {model_class} {dataset_instance.id}")
return
if datatype == "auto":
path = dataset_instance.dataset.file_name
path = dataset_instance.dataset.get_file_name()
datatype = sniff.guess_ext(path, datatypes_registry.sniff_order)
datatypes_registry.change_datatype(dataset_instance, datatype)
with transaction(sa_session):
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class SnapHmm(Text):

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name)
dataset.peek = get_file_peek(dataset.get_file_name())
dataset.blurb = "SNAP HMM model"
else:
dataset.peek = "file does not exist"
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def regenerate_primary_file(self, dataset: DatasetProtocol) -> None:
else:
rval.append(f'<li><a href="{fn}" type="text/plain">{fn}</a>{opt_text}</li>')
rval.append("</ul></div></html>")
with open(dataset.file_name, "w") as f:
with open(dataset.get_file_name(), "w") as f:
f.write("\n".join(rval))
f.write("\n")

Expand Down
114 changes: 57 additions & 57 deletions lib/galaxy/datatypes/binary.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions lib/galaxy/datatypes/blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class BlastXml(GenericXml):
def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name)
dataset.peek = get_file_peek(dataset.get_file_name())
dataset.blurb = "NCBI Blast XML data"
else:
dataset.peek = "file does not exist"
Expand Down Expand Up @@ -246,7 +246,7 @@ def display_data(
msg = ""
try:
# Try to use any text recorded in the dummy index file:
with open(dataset.file_name, encoding="utf-8") as handle:
with open(dataset.get_file_name(), encoding="utf-8") as handle:
msg = handle.read().strip()
except Exception:
pass
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/datatypes/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N
"""
data_lines = 0
chains = 0
with compression_utils.get_fileobj(dataset.file_name) as fh:
with compression_utils.get_fileobj(dataset.get_file_name()) as fh:
for line in fh:
line = line.strip()
if line and line.startswith("#"):
Expand All @@ -55,7 +55,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek(dataset.file_name)
dataset.peek = data.get_file_peek(dataset.get_file_name())
if dataset.metadata.chains:
dataset.blurb = f"{commaify(str(dataset.metadata.chains))} chains"
else:
Expand Down
24 changes: 12 additions & 12 deletions lib/galaxy/datatypes/constructive_solid_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _is_ply_header(self, fh: "TextIOBase", subtype: str) -> bool:

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
if dataset.has_data():
with open(dataset.file_name, errors="ignore") as fh:
with open(dataset.get_file_name(), errors="ignore") as fh:
for line in fh:
line = line.strip()
if not line:
Expand All @@ -129,7 +129,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name)
dataset.peek = get_file_peek(dataset.get_file_name())
dataset.blurb = f"Faces: {str(dataset.metadata.face)}, Vertices: {str(dataset.metadata.vertex)}"
else:
dataset.peek = "File does not exist"
Expand Down Expand Up @@ -292,7 +292,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N
field_components = {}
dataset_structure_complete = False
processing_field_section = False
with open(dataset.file_name, errors="ignore") as fh:
with open(dataset.get_file_name(), errors="ignore") as fh:
for i, line in enumerate(fh):
line = line.strip()
if not line:
Expand Down Expand Up @@ -463,7 +463,7 @@ def get_blurb(self, dataset: HasMetadata) -> str:

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name)
dataset.peek = get_file_peek(dataset.get_file_name())
dataset.blurb = self.get_blurb(dataset)
else:
dataset.peek = "File does not exist"
Expand Down Expand Up @@ -556,7 +556,7 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool:

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
if dataset.has_data():
with open(dataset.file_name, errors="ignore") as fh:
with open(dataset.get_file_name(), errors="ignore") as fh:
for i, line in enumerate(fh):
line = line.strip()
if not line or i > 6:
Expand All @@ -572,7 +572,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name, line_count=7)
dataset.peek = get_file_peek(dataset.get_file_name(), line_count=7)
dataset.blurb = f"format: {str(dataset.metadata.format)} dim: {str(dataset.metadata.dimension)} cells: {str(dataset.metadata.cells)}"
else:
dataset.peek = "File does not exist"
Expand Down Expand Up @@ -627,7 +627,7 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool:

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
if dataset.has_data():
with open(dataset.file_name, errors="ignore") as fh:
with open(dataset.get_file_name(), errors="ignore") as fh:
field = ""
for i, line in enumerate(fh):
line = line.strip()
Expand Down Expand Up @@ -659,7 +659,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name, line_count=9)
dataset.peek = get_file_peek(dataset.get_file_name(), line_count=9)
dataset.blurb = f"format: {str(dataset.metadata.format)} dim: {str(dataset.metadata.dimension)} cells: {str(dataset.metadata.cells)}"
else:
dataset.peek = "File does not exist"
Expand All @@ -681,7 +681,7 @@ def __init__(self, **kwd):
def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
data.Text.set_meta(self, dataset, overwrite=overwrite, **kwd)
if dataset.has_data():
with open(dataset.file_name, errors="ignore") as fh:
with open(dataset.get_file_name(), errors="ignore") as fh:
dataset.metadata.dimension = self._get_dimension(fh)

def _get_dimension(self, fh: "TextIOBase", maxlines: int = 100, sep: Optional[str] = None) -> Optional[float]:
Expand Down Expand Up @@ -723,7 +723,7 @@ def __init__(self, **kwd):
def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
Tabular.set_meta(self, dataset, overwrite=overwrite, **kwd)
if dataset.has_data():
with open(dataset.file_name, errors="ignore") as fh:
with open(dataset.get_file_name(), errors="ignore") as fh:
dataset.metadata.dimension = self._get_dimension(fh)

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
Expand Down Expand Up @@ -768,7 +768,7 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool:

def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> None:
if dataset.has_data():
with open(dataset.file_name, errors="ignore") as fh:
with open(dataset.get_file_name(), errors="ignore") as fh:
for i, line in enumerate(fh):
line = line.strip()
if not line or i > 1:
Expand All @@ -784,7 +784,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N

def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name, line_count=3)
dataset.peek = get_file_peek(dataset.get_file_name(), line_count=3)
dataset.blurb = f"Gmsh verion: {str(dataset.metadata.version)} {str(dataset.metadata.format)}"
else:
dataset.peek = "File does not exist"
Expand Down
42 changes: 22 additions & 20 deletions lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def _get_file_size(data):
if data.dataset.object_store:
file_size = data.dataset.object_store.size(data.dataset)
else:
file_size = os.stat(data.file_name).st_size
file_size = os.stat(data.get_file_name()).st_size
return file_size


Expand Down Expand Up @@ -389,7 +389,7 @@ def _archive_composite_dataset(
error = False
msg = ""
ext = data.extension
path = data.file_name
path = data.get_file_name()
efp = data.extra_files_path
# Add any central file to the archive,

Expand Down Expand Up @@ -424,7 +424,7 @@ def __archive_extra_files_path(self, extra_files_path: str) -> Generator[Tuple[s
def _serve_raw(
self, dataset: DatasetHasHidProtocol, to_ext: Optional[str], headers: Headers, **kwd
) -> Tuple[IO, Headers]:
headers["Content-Length"] = str(os.stat(dataset.file_name).st_size)
headers["Content-Length"] = str(os.stat(dataset.get_file_name()).st_size)
headers[
"content-type"
] = "application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename
Expand All @@ -436,7 +436,7 @@ def _serve_raw(
filename_pattern=kwd.get("filename_pattern"),
)
headers["Content-Disposition"] = f'attachment; filename="{filename}"'
return open(dataset.file_name, mode="rb"), headers
return open(dataset.get_file_name(), mode="rb"), headers

def to_archive(self, dataset: DatasetProtocol, name: str = "") -> Iterable:
"""
Expand All @@ -451,13 +451,13 @@ def to_archive(self, dataset: DatasetProtocol, name: str = "") -> Iterable:
if dataset.datatype.composite_type or dataset.extension.endswith("html"):
main_file = f"{name}.html"
rel_paths.append(main_file)
file_paths.append(dataset.file_name)
file_paths.append(dataset.get_file_name())
for fpath, rpath in self.__archive_extra_files_path(dataset.extra_files_path):
rel_paths.append(os.path.join(name, rpath))
file_paths.append(fpath)
else:
rel_paths.append(f"{name or dataset.file_name}.{dataset.extension}")
file_paths.append(dataset.file_name)
rel_paths.append(f"{name or dataset.get_file_name()}.{dataset.extension}")
file_paths.append(dataset.get_file_name())
return zip(file_paths, rel_paths)

def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
Expand All @@ -480,15 +480,15 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
"content-type"
] = "application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename
headers["Content-Disposition"] = f'attachment; filename="{filename}"'
return open(data.file_name, "rb"), headers
return open(data.get_file_name(), "rb"), headers

def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size):
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/binary_file.mako",
data=data,
file_contents=open(data.file_name, "rb").read(max_peek_size),
file_contents=open(data.get_file_name(), "rb").read(max_peek_size),
file_size=util.nice_size(file_size),
truncated=file_size > max_peek_size,
),
Expand All @@ -500,13 +500,15 @@ def _serve_file_contents(self, trans, data, headers, preview, file_size, max_pee

preview = util.string_as_bool(preview)
if not preview or isinstance(data.datatype, images.Image) or file_size < max_peek_size:
return self._yield_user_file_content(trans, data, data.file_name, headers), headers
return self._yield_user_file_content(trans, data, data.get_file_name(), headers), headers

# preview large text file
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/large_file.mako", truncated_data=open(data.file_name, "rb").read(max_peek_size), data=data
"/dataset/large_file.mako",
truncated_data=open(data.get_file_name(), "rb").read(max_peek_size),
data=data,
),
headers,
)
Expand Down Expand Up @@ -581,8 +583,8 @@ def display_data(
downloading = to_ext is not None
file_size = _get_file_size(dataset)

if not os.path.exists(dataset.file_name):
raise ObjectNotFound(f"File Not Found ({dataset.file_name}).")
if not os.path.exists(dataset.get_file_name()):
raise ObjectNotFound(f"File Not Found ({dataset.get_file_name()}).")

if downloading:
trans.log_event(f"Download dataset id: {str(dataset.id)}")
Expand Down Expand Up @@ -618,7 +620,7 @@ def display_as_markdown(self, dataset_instance: DatasetProtocol) -> str:
if self.is_binary:
result = "*cannot display binary content*\n"
else:
with open(dataset_instance.file_name) as f:
with open(dataset_instance.get_file_name()) as f:
contents = f.read(DEFAULT_MAX_PEEK_SIZE)
result = literal_via_fence(contents)
if len(contents) == DEFAULT_MAX_PEEK_SIZE:
Expand Down Expand Up @@ -1038,12 +1040,12 @@ def estimate_file_lines(self, dataset: DatasetProtocol) -> Optional[int]:
"""
sample_size = 1048576
try:
with compression_utils.get_fileobj(dataset.file_name) as dataset_fh:
with compression_utils.get_fileobj(dataset.get_file_name()) as dataset_fh:
dataset_read = dataset_fh.read(sample_size)
sample_lines = dataset_read.count("\n")
return int(sample_lines * (float(dataset.get_size()) / float(sample_size)))
except UnicodeDecodeError:
log.error(f"Unable to estimate lines in file {dataset.file_name}")
log.error(f"Unable to estimate lines in file {dataset.get_file_name()}")
return None

def count_data_lines(self, dataset: HasFileName) -> Optional[int]:
Expand All @@ -1053,7 +1055,7 @@ def count_data_lines(self, dataset: HasFileName) -> Optional[int]:
"""
CHUNK_SIZE = 2**15 # 32Kb
data_lines = 0
with compression_utils.get_fileobj(dataset.file_name) as in_file:
with compression_utils.get_fileobj(dataset.get_file_name()) as in_file:
# FIXME: Potential encoding issue can prevent the ability to iterate over lines
# causing set_meta process to fail otherwise OK jobs. A better solution than
# a silent try/except is desirable.
Expand All @@ -1063,7 +1065,7 @@ def count_data_lines(self, dataset: HasFileName) -> Optional[int]:
if line and not line.startswith("#"):
data_lines += 1
except UnicodeDecodeError:
log.error(f"Unable to count lines in file {dataset.file_name}")
log.error(f"Unable to count lines in file {dataset.get_file_name()}")
return None
return data_lines

Expand All @@ -1078,7 +1080,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None:

if not dataset.dataset.purged:
# The file must exist on disk for the get_file_peek() method
dataset.peek = get_file_peek(dataset.file_name, width=width, skipchars=skipchars, line_wrap=line_wrap)
dataset.peek = get_file_peek(dataset.get_file_name(), width=width, skipchars=skipchars, line_wrap=line_wrap)
if line_count is None:
# See if line_count is stored in the metadata
if dataset.metadata.data_lines:
Expand Down Expand Up @@ -1117,7 +1119,7 @@ def split(cls, input_datasets: List, subdir_generator_function: Callable, split_

if len(input_datasets) > 1:
raise Exception("Text file splitting does not support multiple files")
input_files = [ds.file_name for ds in input_datasets]
input_files = [ds.get_file_name() for ds in input_datasets]

lines_per_file = None
chunk_size = None
Expand Down
Loading

0 comments on commit c30a4c0

Please sign in to comment.