Skip to content

Commit

Permalink
continue work
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBelthle committed Jul 10, 2024
1 parent 4264041 commit 14fe33a
Showing 1 changed file with 38 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,23 @@ def build(self) -> TREE:
files = [d.stem for d in self.config.path.iterdir()]
children: TREE = {}
for file in files:
children[file] = OutputSynthesis(self.context, self.config.next_file(f"{file}.txt"))
synthesis_class = DigestSynthesis if file == "digest" else OutputSynthesis
children[file] = synthesis_class(self.context, self.config.next_file(f"{file}.txt"))
return children


class OutputSynthesis(LazyNode[JSON, bytes, bytes]):
def __init__(self, context: ContextServer, config: FileStudyTreeConfig):
super().__init__(context, config)

def get_lazy_content(
self,
url: t.Optional[t.List[str]] = None,
depth: int = -1,
expanded: bool = False,
) -> str:
return f"matrix://{self.config.path.name}"

def load(
self,
url: t.Optional[t.List[str]] = None,
Expand All @@ -32,7 +41,9 @@ def load(
) -> JSON:
file_path = self.config.path
df = pd.read_csv(file_path, sep="\t")
return t.cast(JSON, df.to_dict(orient="split"))
output = df.to_dict(orient="split")
del output["index"]
return t.cast(JSON, output)

def dump(self, data: bytes, url: t.Optional[t.List[str]] = None) -> None:
self.config.path.parent.mkdir(exist_ok=True, parents=True)
Expand All @@ -51,3 +62,28 @@ def normalize(self) -> None:

def denormalize(self) -> None:
pass # no external store in this node


class DigestSynthesis(OutputSynthesis):
def __init__(self, context: ContextServer, config: FileStudyTreeConfig):
super().__init__(context, config)

def load(
self,
url: t.Optional[t.List[str]] = None,
depth: int = -1,
expanded: bool = False,
formatted: bool = True,
) -> JSON:
file_path = self.config.path
df = pd.read_csv(
file_path,
sep="\t",
skiprows=4,
header=[0, 1, 2],
na_values="N/A",
float_precision="legacy",
)
output = df.to_dict(orient="split")
del output["index"]
return t.cast(JSON, output)

0 comments on commit 14fe33a

Please sign in to comment.