Merge branch 'master' into graphbolt_tests

dmlc · Feb 1, 2024 · ad71144 · ad71144
2 parents 7a50e13 + 50eb101
commit ad71144
Show file tree

Hide file tree

Showing 11 changed files with 538 additions and 142 deletions.
diff --git a/python/dgl/distributed/dist_graph.py b/python/dgl/distributed/dist_graph.py
@@ -60,18 +60,21 @@ class InitGraphRequest(rpc.Request):
     with shared memory.
     """
 
-    def __init__(self, graph_name):
+    def __init__(self, graph_name, use_graphbolt):
         self._graph_name = graph_name
+        self._use_graphbolt = use_graphbolt
 
     def __getstate__(self):
-        return self._graph_name
+        return self._graph_name, self._use_graphbolt
 
     def __setstate__(self, state):
-        self._graph_name = state
+        self._graph_name, self._use_graphbolt = state
 
     def process_request(self, server_state):
         if server_state.graph is None:
-            server_state.graph = _get_graph_from_shared_mem(self._graph_name)
+            server_state.graph = _get_graph_from_shared_mem(
+                self._graph_name, self._use_graphbolt
+            )
         return InitGraphResponse(self._graph_name)
 
 
@@ -153,13 +156,15 @@ def _exist_shared_mem_array(graph_name, name):
     return exist_shared_mem_array(_get_edata_path(graph_name, name))
 
 
-def _get_graph_from_shared_mem(graph_name):
+def _get_graph_from_shared_mem(graph_name, use_graphbolt):
     """Get the graph from the DistGraph server.
 
     The DistGraph server puts the graph structure of the local partition in the shared memory.
     The client can access the graph structure and some metadata on nodes and edges directly
     through shared memory to reduce the overhead of data access.
     """
+    if use_graphbolt:
+        return gb.load_from_shared_memory(graph_name)
     g, ntypes, etypes = heterograph_index.create_heterograph_from_shared_memory(
         graph_name
     )
@@ -524,6 +529,8 @@ class DistGraph:
     part_config : str, optional
         The path of partition configuration file generated by
         :py:meth:`dgl.distributed.partition.partition_graph`. It's used in the standalone mode.
+    use_graphbolt : bool, optional
+        Whether to load GraphBolt partition. Default: False.
 
     Examples
     --------
@@ -557,9 +564,15 @@ class DistGraph:
     manually setting up servers and trainers. The setup is not fully tested yet.
     """
 
-    def __init__(self, graph_name, gpb=None, part_config=None):
+    def __init__(
+        self, graph_name, gpb=None, part_config=None, use_graphbolt=False
+    ):
         self.graph_name = graph_name
+        self._use_graphbolt = use_graphbolt
         if os.environ.get("DGL_DIST_MODE", "standalone") == "standalone":
+            assert (
+                use_graphbolt is False
+            ), "GraphBolt is not supported in standalone mode."
             assert (
                 part_config is not None
             ), "When running in the standalone model, the partition config file is required"
@@ -600,7 +613,9 @@ def __init__(self, graph_name, gpb=None, part_config=None):
             self._init(gpb)
             # Tell the backup servers to load the graph structure from shared memory.
             for server_id in range(self._client.num_servers):
-                rpc.send_request(server_id, InitGraphRequest(graph_name))
+                rpc.send_request(
+                    server_id, InitGraphRequest(graph_name, use_graphbolt)
+                )
             for server_id in range(self._client.num_servers):
                 rpc.recv_response()
             self._client.barrier()
@@ -625,7 +640,9 @@ def _init(self, gpb):
         assert (
             self._client is not None
         ), "Distributed module is not initialized. Please call dgl.distributed.initialize."
-        self._g = _get_graph_from_shared_mem(self.graph_name)
+        self._g = _get_graph_from_shared_mem(
+            self.graph_name, self._use_graphbolt
+        )
         self._gpb = get_shared_mem_partition_book(self.graph_name)
         if self._gpb is None:
             self._gpb = gpb
@@ -682,10 +699,10 @@ def _init_edata_store(self):
                 self._edata_store[etype] = data
 
     def __getstate__(self):
-        return self.graph_name, self._gpb
+        return self.graph_name, self._gpb, self._use_graphbolt
 
     def __setstate__(self, state):
-        self.graph_name, gpb = state
+        self.graph_name, gpb, self._use_graphbolt = state
         self._init(gpb)
 
         self._init_ndata_store()
@@ -1230,6 +1247,9 @@ def find_edges(self, edges, etype=None):
         tensor
             The destination node ID array.
         """
+        assert (
+            self._use_graphbolt is False
+        ), "find_edges is not supported in GraphBolt."
         if etype is None:
             assert (
                 len(self.etypes) == 1

diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py
@@ -638,6 +638,8 @@ def partition_graph(
     num_trainers_per_machine=1,
     objtype="cut",
     graph_formats=None,
+    use_graphbolt=False,
+    **kwargs,
 ):
     """Partition a graph for distributed training and store the partitions on files.
 
@@ -811,6 +813,10 @@ def partition_graph(
         ``csc`` and ``csr``. If not specified, save one format only according to what
         format is available. If multiple formats are available, selection priority
         from high to low is ``coo``, ``csc``, ``csr``.
+    use_graphbolt : bool, optional
+        Whether to save partitions in GraphBolt format. Default: False.
+    kwargs : dict
+        Other keyword arguments for converting DGL partitions to GraphBolt.
 
     Returns
     -------
@@ -1298,7 +1304,8 @@ def get_homogeneous(g, balance_ntypes):
         )
     )
 
-    _dump_part_config(f"{out_path}/{graph_name}.json", part_metadata)
+    part_config = os.path.join(out_path, graph_name + ".json")
+    _dump_part_config(part_config, part_metadata)
 
     num_cuts = sim_g.num_edges() - tot_num_inner_edges
     if num_parts == 1:
@@ -1309,6 +1316,12 @@ def get_homogeneous(g, balance_ntypes):
         )
     )
 
+    if use_graphbolt:
+        dgl_partition_to_graphbolt(
+            part_config,
+            **kwargs,
+        )
+
     if return_mapping:
         return orig_nids, orig_eids
 

diff --git a/python/dgl/graphbolt/dataloader.py b/python/dgl/graphbolt/dataloader.py
@@ -1,6 +1,6 @@
 """Graph Bolt DataLoaders"""
 
-from queue import Queue
+from collections import deque
 
 import torch
 import torch.utils.data
@@ -69,18 +69,18 @@ def __init__(self, datapipe, buffer_size=1):
             raise ValueError(
                 "'buffer_size' is required to be a positive integer."
             )
-        self.buffer = Queue(buffer_size)
+        self.buffer = deque(maxlen=buffer_size)
 
     def __iter__(self):
         for data in self.datapipe:
-            if not self.buffer.full():
-                self.buffer.put(data)
+            if len(self.buffer) < self.buffer.maxlen:
+                self.buffer.append(data)
             else:
-                return_data = self.buffer.get()
-                self.buffer.put(data)
+                return_data = self.buffer.popleft()
+                self.buffer.append(data)
                 yield return_data
-        while not self.buffer.empty():
-            yield self.buffer.get()
+        while len(self.buffer) > 0:
+            yield self.buffer.popleft()
 
 
 class Awaiter(dp.iter.IterDataPipe):

diff --git a/python/dgl/graphbolt/impl/neighbor_sampler.py b/python/dgl/graphbolt/impl/neighbor_sampler.py
@@ -1,9 +1,12 @@
 """Neighbor subgraph samplers for GraphBolt."""
 
+from functools import partial
+
 import torch
 from torch.utils.data import functional_datapipe
 
 from ..internal import compact_csc_format, unique_and_compact_csc_formats
+from ..minibatch_transformer import MiniBatchTransformer
 
 from ..subgraph_sampler import SubgraphSampler
 from .sampled_subgraph_impl import SampledSubgraphImpl
@@ -12,8 +15,66 @@
 __all__ = ["NeighborSampler", "LayerNeighborSampler"]
 
 
+@functional_datapipe("sample_per_layer")
+class SamplePerLayer(MiniBatchTransformer):
+    """Sample neighbor edges from a graph for a single layer."""
+
+    def __init__(self, datapipe, sampler, fanout, replace, prob_name):
+        super().__init__(datapipe, self._sample_per_layer)
+        self.sampler = sampler
+        self.fanout = fanout
+        self.replace = replace
+        self.prob_name = prob_name
+
+    def _sample_per_layer(self, minibatch):
+        subgraph = self.sampler(
+            minibatch._seed_nodes, self.fanout, self.replace, self.prob_name
+        )
+        minibatch.sampled_subgraphs.insert(0, subgraph)
+        return minibatch
+
+
+@functional_datapipe("compact_per_layer")
+class CompactPerLayer(MiniBatchTransformer):
+    """Compact the sampled edges for a single layer."""
+
+    def __init__(self, datapipe, deduplicate):
+        super().__init__(datapipe, self._compact_per_layer)
+        self.deduplicate = deduplicate
+
+    def _compact_per_layer(self, minibatch):
+        subgraph = minibatch.sampled_subgraphs[0]
+        seeds = minibatch._seed_nodes
+        if self.deduplicate:
+            (
+                original_row_node_ids,
+                compacted_csc_format,
+            ) = unique_and_compact_csc_formats(subgraph.sampled_csc, seeds)
+            subgraph = SampledSubgraphImpl(
+                sampled_csc=compacted_csc_format,
+                original_column_node_ids=seeds,
+                original_row_node_ids=original_row_node_ids,
+                original_edge_ids=subgraph.original_edge_ids,
+            )
+        else:
+            (
+                original_row_node_ids,
+                compacted_csc_format,
+            ) = compact_csc_format(subgraph.sampled_csc, seeds)
+            subgraph = SampledSubgraphImpl(
+                sampled_csc=compacted_csc_format,
+                original_column_node_ids=seeds,
+                original_row_node_ids=original_row_node_ids,
+                original_edge_ids=subgraph.original_edge_ids,
+            )
+        minibatch._seed_nodes = original_row_node_ids
+        minibatch.sampled_subgraphs[0] = subgraph
+        return minibatch
+
+
 @functional_datapipe("sample_neighbor")
 class NeighborSampler(SubgraphSampler):
+    # pylint: disable=abstract-method
     """Sample neighbor edges from a graph and return a subgraph.
 
     Functional name: :obj:`sample_neighbor`.
@@ -95,6 +156,7 @@ class NeighborSampler(SubgraphSampler):
     )]
     """
 
+    # pylint: disable=useless-super-delegation
     def __init__(
         self,
         datapipe,
@@ -103,26 +165,19 @@ def __init__(
         replace=False,
         prob_name=None,
         deduplicate=True,
+        sampler=None,
     ):
-        super().__init__(datapipe)
-        self.graph = graph
-        # Convert fanouts to a list of tensors.
-        self.fanouts = []
-        for fanout in fanouts:
-            if not isinstance(fanout, torch.Tensor):
-                fanout = torch.LongTensor([int(fanout)])
-            self.fanouts.insert(0, fanout)
-        self.replace = replace
-        self.prob_name = prob_name
-        self.deduplicate = deduplicate
-        self.sampler = graph.sample_neighbors
+        if sampler is None:
+            sampler = graph.sample_neighbors
+        super().__init__(
+            datapipe, graph, fanouts, replace, prob_name, deduplicate, sampler
+        )
 
-    def sample_subgraphs(self, seeds, seeds_timestamp):
-        subgraphs = []
-        num_layers = len(self.fanouts)
+    def _prepare(self, node_type_to_id, minibatch):
+        seeds = minibatch._seed_nodes
         # Enrich seeds with all node types.
         if isinstance(seeds, dict):
-            ntypes = list(self.graph.node_type_to_id.keys())
+            ntypes = list(node_type_to_id.keys())
             # Loop over different seeds to extract the device they are on.
             device = None
             dtype = None
@@ -134,42 +189,37 @@ def sample_subgraphs(self, seeds, seeds_timestamp):
             seeds = {
                 ntype: seeds.get(ntype, default_tensor) for ntype in ntypes
             }
-        for hop in range(num_layers):
-            subgraph = self.sampler(
-                seeds,
-                self.fanouts[hop],
-                self.replace,
-                self.prob_name,
+        minibatch._seed_nodes = seeds
+        minibatch.sampled_subgraphs = []
+        return minibatch
+
+    @staticmethod
+    def _set_input_nodes(minibatch):
+        minibatch.input_nodes = minibatch._seed_nodes
+        return minibatch
+
+    # pylint: disable=arguments-differ
+    def sampling_stages(
+        self, datapipe, graph, fanouts, replace, prob_name, deduplicate, sampler
+    ):
+        datapipe = datapipe.transform(
+            partial(self._prepare, graph.node_type_to_id)
+        )
+        for fanout in reversed(fanouts):
+            # Convert fanout to tensor.
+            if not isinstance(fanout, torch.Tensor):
+                fanout = torch.LongTensor([int(fanout)])
+            datapipe = datapipe.sample_per_layer(
+                sampler, fanout, replace, prob_name
             )
-            if self.deduplicate:
-                (
-                    original_row_node_ids,
-                    compacted_csc_format,
-                ) = unique_and_compact_csc_formats(subgraph.sampled_csc, seeds)
-                subgraph = SampledSubgraphImpl(
-                    sampled_csc=compacted_csc_format,
-                    original_column_node_ids=seeds,
-                    original_row_node_ids=original_row_node_ids,
-                    original_edge_ids=subgraph.original_edge_ids,
-                )
-            else:
-                (
-                    original_row_node_ids,
-                    compacted_csc_format,
-                ) = compact_csc_format(subgraph.sampled_csc, seeds)
-                subgraph = SampledSubgraphImpl(
-                    sampled_csc=compacted_csc_format,
-                    original_column_node_ids=seeds,
-                    original_row_node_ids=original_row_node_ids,
-                    original_edge_ids=subgraph.original_edge_ids,
-                )
-            subgraphs.insert(0, subgraph)
-            seeds = original_row_node_ids
-        return seeds, subgraphs
+            datapipe = datapipe.compact_per_layer(deduplicate)
+
+        return datapipe.transform(self._set_input_nodes)
 
 
 @functional_datapipe("sample_layer_neighbor")
 class LayerNeighborSampler(NeighborSampler):
+    # pylint: disable=abstract-method
     """Sample layer neighbor edges from a graph and return a subgraph.
 
     Functional name: :obj:`sample_layer_neighbor`.
@@ -280,5 +330,5 @@ def __init__(
             replace,
             prob_name,
             deduplicate,
+            graph.sample_layer_neighbors,
         )
-        self.sampler = graph.sample_layer_neighbors