USEPA · kaklise · Nov 12, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/documentation/gis.rst b/documentation/gis.rst
@@ -112,13 +112,13 @@ For example, the junctions GeoDataFrame contains the following information:
     :skipif: gpd is None
 
     >>> print(wn_gis.junctions.head())
-         node_type  elevation  initial_quality                   geometry
-    name                                                                 
-    10    Junction    216.408        5.000e-04  POINT (20.00000 70.00000)
-    11    Junction    216.408        5.000e-04  POINT (30.00000 70.00000)
-    12    Junction    213.360        5.000e-04  POINT (50.00000 70.00000)
-    13    Junction    211.836        5.000e-04  POINT (70.00000 70.00000)
-    21    Junction    213.360        5.000e-04  POINT (30.00000 40.00000)
+          elevation  initial_quality                   geometry
+    name                                                       
+    10      216.408        5.000e-04  POINT (20.00000 70.00000)
+    11      216.408        5.000e-04  POINT (30.00000 70.00000)
+    12      213.360        5.000e-04  POINT (50.00000 70.00000)
+    13      211.836        5.000e-04  POINT (70.00000 70.00000)
+    21      213.360        5.000e-04  POINT (30.00000 40.00000)
 
 Each GeoDataFrame contains attributes and geometry:
 
@@ -334,23 +334,23 @@ and then translates the GeoDataFrames coordinates to EPSG:3857.
 
     >>> wn_gis = wntr.network.to_gis(wn, crs='EPSG:4326')
     >>> print(wn_gis.junctions.head())
-         node_type  elevation  initial_quality                   geometry
-    name                                                                 
-    10    Junction    216.408        5.000e-04  POINT (20.00000 70.00000)
-    11    Junction    216.408        5.000e-04  POINT (30.00000 70.00000)
-    12    Junction    213.360        5.000e-04  POINT (50.00000 70.00000)
-    13    Junction    211.836        5.000e-04  POINT (70.00000 70.00000)
-    21    Junction    213.360        5.000e-04  POINT (30.00000 40.00000)
+          elevation  initial_quality                   geometry
+    name                                                       
+    10      216.408        5.000e-04  POINT (20.00000 70.00000)
+    11      216.408        5.000e-04  POINT (30.00000 70.00000)
+    12      213.360        5.000e-04  POINT (50.00000 70.00000)
+    13      211.836        5.000e-04  POINT (70.00000 70.00000)
+    21      213.360        5.000e-04  POINT (30.00000 40.00000)
 
     >>> wn_gis.to_crs('EPSG:3857')
     >>> print(wn_gis.junctions.head())
-         node_type  elevation  initial_quality                          geometry
-    name                                                                        
-    10    Junction    216.408        5.000e-04  POINT (2226389.816 11068715.659)
-    11    Junction    216.408        5.000e-04  POINT (3339584.724 11068715.659)
-    12    Junction    213.360        5.000e-04  POINT (5565974.540 11068715.659)
-    13    Junction    211.836        5.000e-04  POINT (7792364.356 11068715.659)
-    21    Junction    213.360        5.000e-04   POINT (3339584.724 4865942.280)
+          elevation  initial_quality                          geometry
+    name                                                              
+    10      216.408        5.000e-04  POINT (2226389.816 11068715.659)
+    11      216.408        5.000e-04  POINT (3339584.724 11068715.659)
+    12      213.360        5.000e-04  POINT (5565974.540 11068715.659)
+    13      211.836        5.000e-04  POINT (7792364.356 11068715.659)
+    21      213.360        5.000e-04   POINT (3339584.724 4865942.280)
 
 Snap point geometries to the nearest point or line
 ----------------------------------------------------

diff --git a/documentation/model_io.rst b/documentation/model_io.rst
@@ -216,7 +216,7 @@ The following example returns valid base GeoJSON column names for junctions.
 
     >>> geojson_column_names = wntr.network.io.valid_gis_names()
     >>> print(geojson_column_names['junctions'])
-    ['name', 'elevation', 'coordinates', 'emitter_coefficient', 'initial_quality', 'minimum_pressure', 'required_pressure', 'pressure_exponent', 'tag']
+    ['name', 'elevation', 'geometry', 'emitter_coefficient', 'initial_quality', 'minimum_pressure', 'required_pressure', 'pressure_exponent', 'tag']
 
 A minimal list of valid column names can also be obtained by setting ``complete_list`` to False.
 Column names that are optional (i.e., ``initial_quality``) and not included in the GeoJSON file are defined using default values.
@@ -226,7 +226,7 @@ Column names that are optional (i.e., ``initial_quality``) and not included in t
 
     >>> geojson_column_names = wntr.network.io.valid_gis_names(complete_list=False)
     >>> print(geojson_column_names['junctions'])
-    ['name', 'elevation', 'coordinates']
+    ['name', 'elevation', 'geometry']
 
 Note that GeoJSON files can contain additional custom column names that are assigned to WaterNetworkModel objects.
 
@@ -311,7 +311,7 @@ To use Esri Shapefiles in WNTR, several formatting requirements are enforced:
 
       >>> shapefile_field_names = wntr.network.io.valid_gis_names(truncate_names=10)
       >>> print(shapefile_field_names['junctions'])
-      ['name', 'elevation', 'coordinate', 'emitter_co', 'initial_qu', 'minimum_pr', 'required_p', 'pressure_e', 'tag']
+      ['name', 'elevation', 'geometry', 'emitter_co', 'initial_qu', 'minimum_pr', 'required_p', 'pressure_e', 'tag']
 
   A minimal list of valid field names can also be obtained by setting ``complete_list`` to False.
   Field names that are optional (i.e., ``initial_quality``) and not included in the Shapefile are defined using default values.
@@ -322,7 +322,7 @@ To use Esri Shapefiles in WNTR, several formatting requirements are enforced:
       >>> shapefile_field_names = wntr.network.io.valid_gis_names(complete_list=False, 
       ...    truncate_names=10)
       >>> print(shapefile_field_names['junctions'])
-      ['name', 'elevation', 'coordinate']
+      ['name', 'elevation', 'geometry']
 
 * Shapefiles can contain additional custom field names that are assigned to WaterNetworkModel objects.
 

diff --git a/wntr/gis/network.py b/wntr/gis/network.py
@@ -99,14 +99,18 @@ def _create_gis(self, wn, crs: str = None, pumps_as_points: bool = False,
             Represent valves as points (True) or lines (False), by default False
         """
 
-        def _extract_geodataframe(df, crs=None, links_as_points=False):
-            # Drop any column with all NaN
+        def _extract_geodataframe(df, crs=None, valid_base_names=[], 
+                                  links_as_points=False):
+            # Drop any column with all NaN, this removes excess attributes
+            # Valid base attributes that have all None values are added back 
+            # at the end of this routine
             df = df.loc[:, ~df.isna().all()]
 
+            # Define geom and drop node_type/link_type
             if df.shape[0] > 0:
-                # Define geom
                 if 'node_type' in df.columns:
                     geom = [Point((x,y)) for x,y in df['coordinates']]
+                    del df['node_type']
                 elif 'link_type' in df.columns:
                     geom = []
                     for link_name in df['name']:
@@ -120,24 +124,33 @@ def _extract_geodataframe(df, crs=None, links_as_points=False):
                                 ls.append(v)
                             ls.append(link.end_node.coordinates)
                             geom.append(LineString(ls))
+                    del df['link_type']
 
-                # Drop column if not a str, float, int, or bool
+                # Drop column if not a str, float, int, or bool (or np.bool_)
+                # This drops columns like coordinates, vertices
                 # This could be extended to keep additional data type (list, 
                 # tuple, network elements like Patterns, Curves)
                 drop_cols = []
                 for col in df.columns:
-                    if not isinstance(df.iloc[0][col], (str, float, int, bool)):
+                    # Added np.bool_ to the following check
+                    # Returned by df.to_dict('records') for some network models
+                    if not isinstance(df.iloc[0][col], (str, float, int, bool, np.bool_)):
                         drop_cols.append(col) 
                 df = df.drop(columns=drop_cols)
 
+                # Add back in valid base attributes that had all None values
+                cols = list(set(valid_base_names) - set(df.columns))
+                if len(cols) > 0:
+                    df[cols] = None
+
                 # Set index
                 if len(df) > 0:
                     df.set_index('name', inplace=True)
 
                 df = gpd.GeoDataFrame(df, crs=crs, geometry=geom)
             else:
                 df = gpd.GeoDataFrame()
-                
+
             return df
 
         # Convert the WaterNetworkModel to a dictionary
@@ -146,29 +159,31 @@ def _extract_geodataframe(df, crs=None, links_as_points=False):
         df_nodes = pd.DataFrame(wn_dict['nodes'])
         df_links = pd.DataFrame(wn_dict['links'])
 
+        valid_base_names = self._valid_names(complete_list=False, truncate_names=None)
+
         # Junctions
         df = df_nodes[df_nodes['node_type'] == 'Junction']
-        self.junctions = _extract_geodataframe(df, crs)
+        self.junctions = _extract_geodataframe(df, crs, valid_base_names['junctions'])
 
         # Tanks
         df = df_nodes[df_nodes['node_type'] == 'Tank']
-        self.tanks = _extract_geodataframe(df, crs)
+        self.tanks = _extract_geodataframe(df, crs, valid_base_names['tanks'])
 
         # Reservoirs
         df = df_nodes[df_nodes['node_type'] == 'Reservoir']
-        self.reservoirs = _extract_geodataframe(df, crs)
+        self.reservoirs = _extract_geodataframe(df, crs, valid_base_names['reservoirs'])
 
         # Pipes
         df = df_links[df_links['link_type'] == 'Pipe']
-        self.pipes = _extract_geodataframe(df, crs, False)
+        self.pipes = _extract_geodataframe(df, crs, valid_base_names['pipes'], False)
 
         # Pumps
         df = df_links[df_links['link_type'] == 'Pump']
-        self.pumps = _extract_geodataframe(df, crs, pumps_as_points)
+        self.pumps = _extract_geodataframe(df, crs, valid_base_names['pumps'], pumps_as_points)
 
         # Valves
         df = df_links[df_links['link_type'] == 'Valve']
-        self.valves = _extract_geodataframe(df, crs, valves_as_points) 
+        self.valves = _extract_geodataframe(df, crs, valid_base_names['valves'], valves_as_points) 
 
     def _create_wn(self, append=None):
         """
@@ -187,22 +202,32 @@ def _create_wn(self, append=None):
         wn_dict['nodes'] = []
         wn_dict['links'] = []
 
-        for element in [self.junctions, self.tanks, self.reservoirs]:
+        # Modifications to create a WaterNetworkModel from a dict
+        # Reset index
+        # Create coordinates/vertices from geometry
+        # Add node_type/link_type 
+        for node_type, element in [('Junction', self.junctions), 
+                                   ('Tank', self.tanks), 
+                                   ('Reservoir', self.reservoirs)]:
             if element.shape[0] > 0:
                 assert (element['geometry'].geom_type).isin(['Point']).all()
                 df = element.reset_index(names="name")
                 df.rename(columns={'geometry':'coordinates'}, inplace=True)
                 df['coordinates'] = [[x,y] for x,y in zip(df['coordinates'].x, 
                                                           df['coordinates'].y)]
+                df['node_type'] = node_type
                 wn_dict['nodes'].extend(df.to_dict('records'))
 
-        for element in [self.pipes, self.pumps, self.valves]:
+        for link_type, element in [('Pipe', self.pipes), 
+                                   ('Pump', self.pumps), 
+                                   ('Valve', self.valves)]:
             if element.shape[0] > 0:
                 assert 'start_node_name' in element.columns
                 assert 'end_node_name' in element.columns
                 df = element.reset_index(names="name")
                 df['vertices'] = df.apply(lambda row: list(row.geometry.coords)[1:-1], axis=1)
                 df.drop(columns=['geometry'], inplace=True)
+                df['link_type'] = link_type
                 wn_dict['links'].extend(df.to_dict('records'))
 
         # Create WaterNetworkModel from dictionary
@@ -470,6 +495,17 @@ def _valid_names(self, complete_list=True, truncate_names=None):
         if truncate_names is not None and truncate_names > 0:
             for element, attributes in valid_names.items():
                 valid_names[element] = [attribute[:truncate_names] for attribute in attributes]
+
+        for key, vals in valid_names.items():
+            # Remove coordinates and vertices (not used to create GeoDataFrame geometry)
+            if 'coordinates' in valid_names[key]:
+                valid_names[key].remove('coordinates')
+            if 'vertices' in valid_names[key]:
+                valid_names[key].remove('vertices')
+
+            # Add geometry
+            if 'geometry' not in valid_names[key]:
+                valid_names[key].append('geometry')
 
         return valid_names
 

diff --git a/wntr/network/elements.py b/wntr/network/elements.py
@@ -394,7 +394,7 @@ class Tank(Node):
                         "min_level",
                         "max_level", 
                         "diameter",
-                        "min_vol"
+                        "min_vol",
                         "vol_curve_name",
                         "overflow", 
                         "coordinates"]
@@ -1041,7 +1041,7 @@ class Pump(Link):
                         "end_node_name",
                         "pump_type",
                         "pump_curve_name",
-                        "power"
+                        "power",
                         "base_speed",
                         "speed_pattern_name",
                         "initial_status"]

diff --git a/wntr/tests/test_gis.py b/wntr/tests/test_gis.py
@@ -125,12 +125,12 @@ def test_wn_to_gis(self):
         #assert self.gis_data.valves.shape[0] == self.wn.num_valves
 
         # Check minimal set of attributes
-        assert set(['node_type', 'elevation', 'geometry']).issubset(self.gis_data.junctions.columns)
-        assert set(['node_type', 'elevation', 'geometry']).issubset(self.gis_data.tanks.columns)
-        assert set(['node_type', 'geometry']).issubset(self.gis_data.reservoirs.columns)
-        assert set(['link_type', 'start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.pipes.columns)
-        assert set(['link_type', 'start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.pumps.columns)
-        #assert set(['link_type', 'start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.valves.columns) # Net1 has no valves
+        assert set(['elevation', 'geometry']).issubset(self.gis_data.junctions.columns)
+        assert set(['elevation', 'geometry']).issubset(self.gis_data.tanks.columns)
+        assert set(['geometry']).issubset(self.gis_data.reservoirs.columns)
+        assert set(['start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.pipes.columns)
+        assert set(['start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.pumps.columns)
+        #assert set(['start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.valves.columns) # Net1 has no valves
 
     def test_gis_to_wn(self):
 
@@ -258,15 +258,17 @@ def test_set_crs_to_crs(self):
 
     def test_add_attributes_and_write(self):
 
-        self.gis_data.add_node_attributes(self.results.node['pressure'].loc[3600,:], 'Pressure_1hr')
-        self.gis_data.add_link_attributes(self.results.link['flowrate'].loc[3600,:], 'Flowrate_1hr')
+        gis_data = self.wn.to_gis()
+
+        gis_data.add_node_attributes(self.results.node['pressure'].loc[3600,:], 'Pressure_1hr')
+        gis_data.add_link_attributes(self.results.link['flowrate'].loc[3600,:], 'Flowrate_1hr')
 
-        assert 'Pressure_1hr' in self.gis_data.junctions.columns
-        assert 'Pressure_1hr' in self.gis_data.tanks.columns
-        assert 'Pressure_1hr' in self.gis_data.reservoirs.columns
-        assert 'Flowrate_1hr' in self.gis_data.pipes.columns
-        assert 'Flowrate_1hr' in self.gis_data.pumps.columns
-        assert 'Flowrate_1hr' not in self.gis_data.valves.columns # Net1 has no valves
+        assert 'Pressure_1hr' in gis_data.junctions.columns
+        assert 'Pressure_1hr' in gis_data.tanks.columns
+        assert 'Pressure_1hr' in gis_data.reservoirs.columns
+        assert 'Flowrate_1hr' in gis_data.pipes.columns
+        assert 'Flowrate_1hr' in gis_data.pumps.columns
+        assert 'Flowrate_1hr' not in gis_data.valves.columns # Net1 has no valves
 
     def test_write_geojson(self):
         prefix = 'temp_Net1'

diff --git a/wntr/tests/test_network.py b/wntr/tests/test_network.py
@@ -1087,6 +1087,30 @@ def test_shapefile_roundtrip(self):
                 files['valves'] = 'temp_valves'
             B = self.wntr.network.read_shapefile(files)
             assert(wn._compare(B, level=0))
-
+
+    def test_valid_gis_names(self):
+
+        required_names = wntr.network.io.valid_gis_names(complete_list=False, truncate_names=None)
+        valid_names = wntr.network.io.valid_gis_names(complete_list=True, truncate_names=None)
+
+        wn = self.wntr.network.WaterNetworkModel(join(ex_datadir, "Net6.inp"))
+        gis_data = wn.to_gis()
+
+        for component in required_names.keys():
+            required_columns = required_names[component]
+            valid_columns = valid_names[component]
+
+            data = getattr(gis_data, component)
+            data_columns = list(data.columns)
+            data_columns.append(data.index.name)
+
+            # Check that all data columns are valid
+            assert len(set(data_columns)-set(valid_columns)) == 0
+            # Check that all required columns are in the data
+            assert len(set(required_columns)-set(data_columns)) == 0
+            # Assert that node_type and link_type are not in data columns
+            assert 'node_type' not in data_columns
+            assert 'link_type' not in data_columns
+
 if __name__ == "__main__":
     unittest.main()