Skip to content

Commit

Permalink
add support for rendering a Partition as RDF
Browse files Browse the repository at this point in the history
  • Loading branch information
ceteri committed Oct 2, 2022
1 parent bb7793b commit 888211f
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
10 changes: 10 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def cli_load_parq (
*,
load_parq: str = typer.Option(..., "--file", "-f", help="input Parquet file"),
save_csv: str = typer.Option(None, "--save-csv", help="output as CSV"),
save_rdf: str = typer.Option(None, "--save-rdf", help="output as RDF"),
rdf_format: str = typer.Option("ttl", "--format", help="RDF format: ttl, rdf, jsonld, etc."),
debug: bool = False,
) -> None:
"""
Expand All @@ -46,12 +48,17 @@ def cli_load_parq (
if save_csv is not None:
part.save_file_csv(cloudpathlib.AnyPath(save_csv))

if save_rdf is not None:
part.save_file_rdf(cloudpathlib.AnyPath(save_rdf), rdf_format)


@APP.command("load-csv")
def cli_load_csv (
*,
load_csv: str = typer.Option(..., "--file", "-f", help="input CSV file"),
save_parq: str = typer.Option(None, "--save-parq", help="output as Parquet"),
save_rdf: str = typer.Option(None, "--save-rdf", help="output as RDF"),
rdf_format: str = typer.Option("ttl", "--format", help="RDF format: ttl, rdf, jsonld, etc."),
debug: bool = False,
) -> None:
"""
Expand All @@ -71,6 +78,9 @@ def cli_load_csv (
if save_parq is not None:
part.save_file_parquet(cloudpathlib.AnyPath(save_parq))

if save_rdf is not None:
part.save_file_rdf(cloudpathlib.AnyPath(save_rdf), rdf_format)


@APP.command("load-rdf")
def cli_load_rdf (
Expand Down
30 changes: 30 additions & 0 deletions pynock/pynock.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import pyarrow as pa # type: ignore # pylint: disable=E0401
import pyarrow.lib # type: ignore # pylint: disable=E0401
import pyarrow.parquet as pq # type: ignore # pylint: disable=E0401
import rdflib


######################################################################
Expand Down Expand Up @@ -452,3 +453,32 @@ def save_file_csv (
"""
df = pd.DataFrame([row for row in self.iter_gen_rows()])
df.to_csv(save_csv.as_posix(), index=False)


def save_file_rdf (
self,
save_rdf: cloudpathlib.AnyPath,
rdf_format: str,
*,
debug: bool = False,
) -> None:
"""
Save a partition to an RDF file.
"""
kg = kglab.KnowledgeGraph()
subj = None

for row in self.iter_gen_rows():
if row["is_rdf"]:

if row["edge_id"] < 0:
subj = rdflib.term.URIRef(row["src_name"])
else:
pred = rdflib.term.URIRef(row["rel_name"])
objt = rdflib.term.URIRef(row["dst_name"])
kg.add(subj, pred, objt)

if debug:
ic(subj, pred, objt)

kg.save_rdf(save_rdf, format=rdf_format)

0 comments on commit 888211f

Please sign in to comment.