From 1d4e7ba30e774cf02febb3bdff77e8e417102698 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 15:19:05 +0100 Subject: [PATCH 01/12] Use flate2 crate to read gzipped graphml files --- Cargo.lock | 35 +++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/graphml.rs | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index acd36b0b7..008969089 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "ahash" version = "0.8.11" @@ -59,6 +65,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -102,6 +117,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flate2" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -220,6 +245,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + [[package]] name = "ndarray" version = "0.16.1" @@ -566,6 +600,7 @@ version = "0.16.0" dependencies = [ "ahash", "fixedbitset", + "flate2", "hashbrown 0.14.5", "indexmap", "ndarray", diff --git a/Cargo.toml b/Cargo.toml index bf4b4adda..9a3c84e4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" smallvec = { version = "1.0", features = ["union"] } rustworkx-core = { path = "rustworkx-core", version = "=0.16.0" } +flate2 = "1.0.35" [dependencies.pyo3] version = "0.22.6" diff --git a/src/graphml.rs b/src/graphml.rs index 6211b25ce..c489319a3 100644 --- a/src/graphml.rs +++ b/src/graphml.rs @@ -13,11 +13,15 @@ #![allow(clippy::borrow_as_ptr)] use std::convert::From; +use std::fs::File; +use std::ffi::OsStr; +use std::io::{BufRead, BufReader}; use std::iter::FromIterator; use std::num::{ParseFloatError, ParseIntError}; use std::path::Path; use std::str::ParseBoolError; +use flate2::bufread::GzDecoder; use hashbrown::HashMap; use indexmap::IndexMap; @@ -524,19 +528,25 @@ impl GraphML { Ok(()) } + /// Open file compressed with gzip, using the GzDecoder + /// Returns a quick_xml Reader instance + fn open_file_gzip>( ath: P) -> Result>>>,quick_xml::Error>{ + let file = File::open(path)?; + let reader = BufReader::new(file); + let gzip_reader = BufReader::new(GzDecoder::new(reader)); + Ok(Reader::from_reader(gzip_reader)) + } - /// Parse a file written in GraphML format. + /// Parse a file written in GraphML format from a BufReader /// /// The implementation is based on a state machine in order to /// accept only valid GraphML syntax (e.g a `` element should /// be nested inside a `` element) where the internal state changes /// after handling each quick_xml event. - fn from_file>(path: P) -> Result { + fn read_graph_from_reader(mut reader: Reader)-> Result{ let mut graphml = GraphML::default(); - let mut buf = Vec::new(); - let mut reader = Reader::from_file(path)?; - + let mut buf: Vec<_> = Vec::new(); let mut state = State::Start; let mut domain_of_last_key = Domain::Node; let mut last_data_key = String::new(); @@ -677,6 +687,23 @@ impl GraphML { Ok(graphml) } + + /// Read a graph from a file in the GraphML format + /// If the the file extension is "graphmlz" or "gz", decompress it on the fly + fn from_file>(path: P) -> Result { + let extension = path.as_ref().extension().unwrap_or(OsStr::new("")); + let graph: Result; + + if extension.eq("graphmlz") || extension.eq("gz"){ + let reader = Self::open_file_gzip(path)?; + graph = Self::read_graph_from_reader(reader); + } else{ + let reader = Reader::from_file(path)?; + graph = Self::read_graph_from_reader(reader); + } + + graph + } } /// Read a list of graphs from a file in GraphML format. From d9dbceb6116a56464d8e4b2eee600b88ac2b36f4 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 15:24:57 +0100 Subject: [PATCH 02/12] fix typo --- src/graphml.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graphml.rs b/src/graphml.rs index c489319a3..0d0c4329d 100644 --- a/src/graphml.rs +++ b/src/graphml.rs @@ -530,7 +530,7 @@ impl GraphML { } /// Open file compressed with gzip, using the GzDecoder /// Returns a quick_xml Reader instance - fn open_file_gzip>( ath: P) -> Result>>>,quick_xml::Error>{ + fn open_file_gzip>(path: P) -> Result>>>,quick_xml::Error>{ let file = File::open(path)?; let reader = BufReader::new(file); let gzip_reader = BufReader::new(GzDecoder::new(reader)); @@ -546,7 +546,7 @@ impl GraphML { fn read_graph_from_reader(mut reader: Reader)-> Result{ let mut graphml = GraphML::default(); - let mut buf: Vec<_> = Vec::new(); + let mut buf = Vec::new(); let mut state = State::Start; let mut domain_of_last_key = Domain::Node; let mut last_data_key = String::new(); From 1996aa16be7a83260dbe0fe15e41c2e28c346c10 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 15:27:57 +0100 Subject: [PATCH 03/12] run rustfmt --- src/graphml.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/graphml.rs b/src/graphml.rs index 0d0c4329d..b98d5efe2 100644 --- a/src/graphml.rs +++ b/src/graphml.rs @@ -13,8 +13,8 @@ #![allow(clippy::borrow_as_ptr)] use std::convert::From; -use std::fs::File; use std::ffi::OsStr; +use std::fs::File; use std::io::{BufRead, BufReader}; use std::iter::FromIterator; use std::num::{ParseFloatError, ParseIntError}; @@ -530,7 +530,9 @@ impl GraphML { } /// Open file compressed with gzip, using the GzDecoder /// Returns a quick_xml Reader instance - fn open_file_gzip>(path: P) -> Result>>>,quick_xml::Error>{ + fn open_file_gzip>( + path: P, + ) -> Result>>>, quick_xml::Error> { let file = File::open(path)?; let reader = BufReader::new(file); let gzip_reader = BufReader::new(GzDecoder::new(reader)); @@ -543,7 +545,7 @@ impl GraphML { /// accept only valid GraphML syntax (e.g a `` element should /// be nested inside a `` element) where the internal state changes /// after handling each quick_xml event. - fn read_graph_from_reader(mut reader: Reader)-> Result{ + fn read_graph_from_reader(mut reader: Reader) -> Result { let mut graphml = GraphML::default(); let mut buf = Vec::new(); @@ -694,14 +696,14 @@ impl GraphML { let extension = path.as_ref().extension().unwrap_or(OsStr::new("")); let graph: Result; - if extension.eq("graphmlz") || extension.eq("gz"){ + if extension.eq("graphmlz") || extension.eq("gz") { let reader = Self::open_file_gzip(path)?; graph = Self::read_graph_from_reader(reader); - } else{ + } else { let reader = Reader::from_file(path)?; graph = Self::read_graph_from_reader(reader); } - + graph } } From 470454753537c6e9b65f9ffc215047e7a1c1ec52 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 15:31:28 +0100 Subject: [PATCH 04/12] apply suggestion from clippy --- src/graphml.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/graphml.rs b/src/graphml.rs index b98d5efe2..1fe74551c 100644 --- a/src/graphml.rs +++ b/src/graphml.rs @@ -694,15 +694,14 @@ impl GraphML { /// If the the file extension is "graphmlz" or "gz", decompress it on the fly fn from_file>(path: P) -> Result { let extension = path.as_ref().extension().unwrap_or(OsStr::new("")); - let graph: Result; - if extension.eq("graphmlz") || extension.eq("gz") { + let graph: Result = if extension.eq("graphmlz") || extension.eq("gz") { let reader = Self::open_file_gzip(path)?; - graph = Self::read_graph_from_reader(reader); + Self::read_graph_from_reader(reader) } else { let reader = Reader::from_file(path)?; - graph = Self::read_graph_from_reader(reader); - } + Self::read_graph_from_reader(reader) + }; graph } From 58777d4c32389c37326f96b1883fdf9112470173 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 15:52:33 +0100 Subject: [PATCH 05/12] add test for gzipped graphml --- tests/test_graphml.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/tests/test_graphml.py b/tests/test_graphml.py index fee85da4a..50b127c85 100644 --- a/tests/test_graphml.py +++ b/tests/test_graphml.py @@ -12,6 +12,8 @@ import unittest import tempfile +import gzip + import numpy import rustworkx @@ -54,9 +56,9 @@ def assertGraphMLRaises(self, graph_xml): fd.flush() with self.assertRaises(Exception): rustworkx.read_graphml(fd.name) - - def test_simple(self): - graph_xml = self.HEADER.format( + + def graphml_xml_example(self): + return self.HEADER.format( """ yellow @@ -80,6 +82,8 @@ def test_simple(self): """ ) + def test_simple(self): + graph_xml = self.graphml_xml_example() with tempfile.NamedTemporaryFile("wt") as fd: fd.write(graph_xml) fd.flush() @@ -96,6 +100,30 @@ def test_simple(self): ] self.assertGraphEqual(graph, nodes, edges, directed=False) + def test_gzipped(self): + graph_xml = self.graphml_xml_example() + + ## Test reading a graphmlz + with tempfile.NamedTemporaryFile("w+b") as fd: + #fd.write(graph_xml) + fd.flush() + newname = fd.name+".gz" + with gzip.open(newname, "wt") as wf: + wf.write(graph_xml) + + graphml = rustworkx.read_graphml(newname) + graph = graphml[0] + nodes = [ + {"id": "n0", "color": "blue"}, + {"id": "n1", "color": "yellow"}, + {"id": "n2", "color": "green"}, + ] + edges = [ + ("n0", "n1", {"fidelity": 0.98}), + ("n0", "n2", {"fidelity": 0.95}), + ] + self.assertGraphEqual(graph, nodes, edges, directed=False) + def test_multiple_graphs_in_single_file(self): graph_xml = self.HEADER.format( """ From 2dba2529004f6cb7424eb27cbae319c12d6e4bf6 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 16:11:30 +0100 Subject: [PATCH 06/12] write separate function --- src/graphml.rs | 56 +++++++++++++++++++++++++++++++++---------- src/lib.rs | 1 + tests/test_graphml.py | 6 ++--- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/src/graphml.rs b/src/graphml.rs index 1fe74551c..776385137 100644 --- a/src/graphml.rs +++ b/src/graphml.rs @@ -13,7 +13,6 @@ #![allow(clippy::borrow_as_ptr)] use std::convert::From; -use std::ffi::OsStr; use std::fs::File; use std::io::{BufRead, BufReader}; use std::iter::FromIterator; @@ -691,19 +690,16 @@ impl GraphML { } /// Read a graph from a file in the GraphML format - /// If the the file extension is "graphmlz" or "gz", decompress it on the fly fn from_file>(path: P) -> Result { - let extension = path.as_ref().extension().unwrap_or(OsStr::new("")); - - let graph: Result = if extension.eq("graphmlz") || extension.eq("gz") { - let reader = Self::open_file_gzip(path)?; - Self::read_graph_from_reader(reader) - } else { - let reader = Reader::from_file(path)?; - Self::read_graph_from_reader(reader) - }; + let reader = Reader::from_file(path)?; + Self::read_graph_from_reader(reader) + } + + /// Read a graph from a file in the GraphML gzipped format + fn from_gzip_file>(path: P) -> Result { + let reader = Self::open_file_gzip(path)?; - graph + Self::read_graph_from_reader(reader) } } @@ -742,3 +738,39 @@ pub fn read_graphml(py: Python, path: &str) -> PyResult> { Ok(out) } + +/// Read a list of graphs from a file in compressed GraphML format (with DEFLATE compression). +/// +/// GraphML is a comprehensive and easy-to-use file format for graphs. It consists +/// of a language core to describe the structural properties of a graph and a flexible +/// extension mechanism to add application-specific data. +/// +/// For more information see: +/// http://graphml.graphdrawing.org/ +/// +/// .. note:: +/// +/// This implementation does not support mixed graphs (directed and unidirected edges together), +/// hyperedges, nested graphs, or ports. +/// +/// .. note:: +/// +/// GraphML attributes with `graph` domain are stored in :attr:`~.PyGraph.attrs` field. +/// +/// :param str path: The path of the input file to read. +/// +/// :return: A list of graphs parsed from GraphML file. +/// :rtype: list[Union[PyGraph, PyDiGraph]] +/// :raises RuntimeError: when an error is encountered while parsing the GraphML file. +#[pyfunction] +#[pyo3(text_signature = "(path, /)")] +pub fn read_graphmlz(py: Python, path: &str) -> PyResult> { + let graphml = GraphML::from_gzip_file(path)?; + + let mut out = Vec::new(); + for graph in graphml.graphs { + out.push(graph.into_py(py)) + } + + Ok(out) +} diff --git a/src/lib.rs b/src/lib.rs index 79f183462..e518dfd0a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -612,6 +612,7 @@ fn rustworkx(py: Python<'_>, m: &Bound) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(connected_subgraphs))?; m.add_wrapped(wrap_pyfunction!(is_planar))?; m.add_wrapped(wrap_pyfunction!(read_graphml))?; + m.add_wrapped(wrap_pyfunction!(read_graphmlz))?; m.add_wrapped(wrap_pyfunction!(digraph_node_link_json))?; m.add_wrapped(wrap_pyfunction!(graph_node_link_json))?; m.add_wrapped(wrap_pyfunction!(from_node_link_json_file))?; diff --git a/tests/test_graphml.py b/tests/test_graphml.py index 50b127c85..d3a687b70 100644 --- a/tests/test_graphml.py +++ b/tests/test_graphml.py @@ -105,13 +105,11 @@ def test_gzipped(self): ## Test reading a graphmlz with tempfile.NamedTemporaryFile("w+b") as fd: - #fd.write(graph_xml) fd.flush() - newname = fd.name+".gz" - with gzip.open(newname, "wt") as wf: + with gzip.open(fd.name, "wt") as wf: wf.write(graph_xml) - graphml = rustworkx.read_graphml(newname) + graphml = rustworkx.read_graphmlz(fd.name) graph = graphml[0] nodes = [ {"id": "n0", "color": "blue"}, From 3218be2c01e54a2b65296a814c63475051812906 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 16:12:47 +0100 Subject: [PATCH 07/12] add changelog --- .../notes/short-description-string-564c7e376b8e7304.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 releasenotes/notes/short-description-string-564c7e376b8e7304.yaml diff --git a/releasenotes/notes/short-description-string-564c7e376b8e7304.yaml b/releasenotes/notes/short-description-string-564c7e376b8e7304.yaml new file mode 100644 index 000000000..d55be2808 --- /dev/null +++ b/releasenotes/notes/short-description-string-564c7e376b8e7304.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Added the ability to read GraphML files that are compressed using gzip, with function :func:`~rustworkx.read_graphmlz`. From b536938f9feaec3b116b5bc6709e927571d7e932 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 17:35:11 +0100 Subject: [PATCH 08/12] reformat --- tests/test_graphml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_graphml.py b/tests/test_graphml.py index d3a687b70..f5e41d2e1 100644 --- a/tests/test_graphml.py +++ b/tests/test_graphml.py @@ -56,7 +56,7 @@ def assertGraphMLRaises(self, graph_xml): fd.flush() with self.assertRaises(Exception): rustworkx.read_graphml(fd.name) - + def graphml_xml_example(self): return self.HEADER.format( """ @@ -108,7 +108,7 @@ def test_gzipped(self): fd.flush() with gzip.open(fd.name, "wt") as wf: wf.write(graph_xml) - + graphml = rustworkx.read_graphmlz(fd.name) graph = graphml[0] nodes = [ From 6aa9236ebf30e0a8555b7e8199baf625729a8f94 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 17:36:48 +0100 Subject: [PATCH 09/12] Revert "write separate function" This reverts commit 2dba2529004f6cb7424eb27cbae319c12d6e4bf6. --- src/graphml.rs | 56 ++++++++++--------------------------------- src/lib.rs | 1 - tests/test_graphml.py | 8 ++++--- 3 files changed, 17 insertions(+), 48 deletions(-) diff --git a/src/graphml.rs b/src/graphml.rs index 776385137..1fe74551c 100644 --- a/src/graphml.rs +++ b/src/graphml.rs @@ -13,6 +13,7 @@ #![allow(clippy::borrow_as_ptr)] use std::convert::From; +use std::ffi::OsStr; use std::fs::File; use std::io::{BufRead, BufReader}; use std::iter::FromIterator; @@ -690,16 +691,19 @@ impl GraphML { } /// Read a graph from a file in the GraphML format + /// If the the file extension is "graphmlz" or "gz", decompress it on the fly fn from_file>(path: P) -> Result { - let reader = Reader::from_file(path)?; - Self::read_graph_from_reader(reader) - } - - /// Read a graph from a file in the GraphML gzipped format - fn from_gzip_file>(path: P) -> Result { - let reader = Self::open_file_gzip(path)?; + let extension = path.as_ref().extension().unwrap_or(OsStr::new("")); + + let graph: Result = if extension.eq("graphmlz") || extension.eq("gz") { + let reader = Self::open_file_gzip(path)?; + Self::read_graph_from_reader(reader) + } else { + let reader = Reader::from_file(path)?; + Self::read_graph_from_reader(reader) + }; - Self::read_graph_from_reader(reader) + graph } } @@ -738,39 +742,3 @@ pub fn read_graphml(py: Python, path: &str) -> PyResult> { Ok(out) } - -/// Read a list of graphs from a file in compressed GraphML format (with DEFLATE compression). -/// -/// GraphML is a comprehensive and easy-to-use file format for graphs. It consists -/// of a language core to describe the structural properties of a graph and a flexible -/// extension mechanism to add application-specific data. -/// -/// For more information see: -/// http://graphml.graphdrawing.org/ -/// -/// .. note:: -/// -/// This implementation does not support mixed graphs (directed and unidirected edges together), -/// hyperedges, nested graphs, or ports. -/// -/// .. note:: -/// -/// GraphML attributes with `graph` domain are stored in :attr:`~.PyGraph.attrs` field. -/// -/// :param str path: The path of the input file to read. -/// -/// :return: A list of graphs parsed from GraphML file. -/// :rtype: list[Union[PyGraph, PyDiGraph]] -/// :raises RuntimeError: when an error is encountered while parsing the GraphML file. -#[pyfunction] -#[pyo3(text_signature = "(path, /)")] -pub fn read_graphmlz(py: Python, path: &str) -> PyResult> { - let graphml = GraphML::from_gzip_file(path)?; - - let mut out = Vec::new(); - for graph in graphml.graphs { - out.push(graph.into_py(py)) - } - - Ok(out) -} diff --git a/src/lib.rs b/src/lib.rs index e518dfd0a..79f183462 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -612,7 +612,6 @@ fn rustworkx(py: Python<'_>, m: &Bound) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(connected_subgraphs))?; m.add_wrapped(wrap_pyfunction!(is_planar))?; m.add_wrapped(wrap_pyfunction!(read_graphml))?; - m.add_wrapped(wrap_pyfunction!(read_graphmlz))?; m.add_wrapped(wrap_pyfunction!(digraph_node_link_json))?; m.add_wrapped(wrap_pyfunction!(graph_node_link_json))?; m.add_wrapped(wrap_pyfunction!(from_node_link_json_file))?; diff --git a/tests/test_graphml.py b/tests/test_graphml.py index f5e41d2e1..8e029e26a 100644 --- a/tests/test_graphml.py +++ b/tests/test_graphml.py @@ -105,11 +105,13 @@ def test_gzipped(self): ## Test reading a graphmlz with tempfile.NamedTemporaryFile("w+b") as fd: + #fd.write(graph_xml) fd.flush() - with gzip.open(fd.name, "wt") as wf: + newname = fd.name+".gz" + with gzip.open(newname, "wt") as wf: wf.write(graph_xml) - - graphml = rustworkx.read_graphmlz(fd.name) + + graphml = rustworkx.read_graphml(newname) graph = graphml[0] nodes = [ {"id": "n0", "color": "blue"}, From 09a8c665e8861d22c0b23a9235326cce024d4aa8 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 17:56:36 +0100 Subject: [PATCH 10/12] run with compression argument --- src/graphml.rs | 23 ++++++++++++----------- tests/test_graphml.py | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/graphml.rs b/src/graphml.rs index 1fe74551c..37afac2aa 100644 --- a/src/graphml.rs +++ b/src/graphml.rs @@ -692,16 +692,17 @@ impl GraphML { /// Read a graph from a file in the GraphML format /// If the the file extension is "graphmlz" or "gz", decompress it on the fly - fn from_file>(path: P) -> Result { + fn from_file>(path: P, compression: &str) -> Result { let extension = path.as_ref().extension().unwrap_or(OsStr::new("")); - let graph: Result = if extension.eq("graphmlz") || extension.eq("gz") { - let reader = Self::open_file_gzip(path)?; - Self::read_graph_from_reader(reader) - } else { - let reader = Reader::from_file(path)?; - Self::read_graph_from_reader(reader) - }; + let graph: Result = + if extension.eq("graphmlz") || extension.eq("gz") || compression.eq("gzip") { + let reader = Self::open_file_gzip(path)?; + Self::read_graph_from_reader(reader) + } else { + let reader = Reader::from_file(path)?; + Self::read_graph_from_reader(reader) + }; graph } @@ -731,9 +732,9 @@ impl GraphML { /// :rtype: list[Union[PyGraph, PyDiGraph]] /// :raises RuntimeError: when an error is encountered while parsing the GraphML file. #[pyfunction] -#[pyo3(text_signature = "(path, /)")] -pub fn read_graphml(py: Python, path: &str) -> PyResult> { - let graphml = GraphML::from_file(path)?; +#[pyo3(signature=(path, compression=""),text_signature = "(path, compression=\"\", /)")] +pub fn read_graphml(py: Python, path: &str, compression: &str) -> PyResult> { + let graphml = GraphML::from_file(path, compression)?; let mut out = Vec::new(); for graph in graphml.graphs { diff --git a/tests/test_graphml.py b/tests/test_graphml.py index 8e029e26a..d2a7c0c0b 100644 --- a/tests/test_graphml.py +++ b/tests/test_graphml.py @@ -124,6 +124,29 @@ def test_gzipped(self): ] self.assertGraphEqual(graph, nodes, edges, directed=False) + def test_gzipped_force(self): + graph_xml = self.graphml_xml_example() + + ## Test reading a graphmlz + with tempfile.NamedTemporaryFile("w+b") as fd: + #fd.write(graph_xml) + fd.flush() + with gzip.open(fd.name, "wt") as wf: + wf.write(graph_xml) + + graphml = rustworkx.read_graphml(fd.name, compression="gzip") + graph = graphml[0] + nodes = [ + {"id": "n0", "color": "blue"}, + {"id": "n1", "color": "yellow"}, + {"id": "n2", "color": "green"}, + ] + edges = [ + ("n0", "n1", {"fidelity": 0.98}), + ("n0", "n2", {"fidelity": 0.95}), + ] + self.assertGraphEqual(graph, nodes, edges, directed=False) + def test_multiple_graphs_in_single_file(self): graph_xml = self.HEADER.format( """ From 3050fad5fc1903cd8819946296983e793e977dbe Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 17:58:14 +0100 Subject: [PATCH 11/12] update contribution --- .../notes/short-description-string-564c7e376b8e7304.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/releasenotes/notes/short-description-string-564c7e376b8e7304.yaml b/releasenotes/notes/short-description-string-564c7e376b8e7304.yaml index d55be2808..5ddc62d18 100644 --- a/releasenotes/notes/short-description-string-564c7e376b8e7304.yaml +++ b/releasenotes/notes/short-description-string-564c7e376b8e7304.yaml @@ -1,4 +1,5 @@ --- features: - | - Added the ability to read GraphML files that are compressed using gzip, with function :func:`~rustworkx.read_graphmlz`. + Added the ability to read GraphML files that are compressed using gzip, with function :func:`~rustworkx.read_graphml`. + The extensions `.graphmlz` and `.gz` are automatically recognised, but the gzip decompression can be forced with the "compression" optional argument. From 768282a757a59fd301ebf1d89e3ef36e805288f8 Mon Sep 17 00:00:00 2001 From: Fabio Mazza Date: Fri, 15 Nov 2024 17:59:19 +0100 Subject: [PATCH 12/12] lint python --- tests/test_graphml.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_graphml.py b/tests/test_graphml.py index d2a7c0c0b..6b9d7fa0a 100644 --- a/tests/test_graphml.py +++ b/tests/test_graphml.py @@ -105,12 +105,11 @@ def test_gzipped(self): ## Test reading a graphmlz with tempfile.NamedTemporaryFile("w+b") as fd: - #fd.write(graph_xml) fd.flush() - newname = fd.name+".gz" + newname = fd.name + ".gz" with gzip.open(newname, "wt") as wf: wf.write(graph_xml) - + graphml = rustworkx.read_graphml(newname) graph = graphml[0] nodes = [ @@ -129,11 +128,11 @@ def test_gzipped_force(self): ## Test reading a graphmlz with tempfile.NamedTemporaryFile("w+b") as fd: - #fd.write(graph_xml) + # fd.write(graph_xml) fd.flush() with gzip.open(fd.name, "wt") as wf: wf.write(graph_xml) - + graphml = rustworkx.read_graphml(fd.name, compression="gzip") graph = graphml[0] nodes = [