Skip to content

Commit

Permalink
Merge pull request #413 from Living-with-machines/issue-398
Browse files Browse the repository at this point in the history
Update saving for duplicate sheet names with different coords
  • Loading branch information
rwood-97 authored May 3, 2024
2 parents 565d8aa + 04248fc commit fe370ee
Show file tree
Hide file tree
Showing 4 changed files with 436 additions and 86 deletions.
99 changes: 73 additions & 26 deletions mapreader/download/sheet_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def _initialise_merger(self, path_save: str):
"""
self.merger = TileMerger(output_folder=f"{path_save}/")

def _check_map_sheet_exists(self, feature: dict) -> bool:
def _check_map_sheet_exists(self, feature: dict, metadata_fname) -> bool:
"""
Checks if a map sheet is already saved.
Expand All @@ -544,51 +544,87 @@ def _check_map_sheet_exists(self, feature: dict) -> bool:
Returns
-------
bool
True if file exists, False if not.
img_path if file exists, False if not.
"""
map_name = str("map_" + feature["properties"]["IMAGE"])
path_save = self.merger.output_folder
if os.path.exists(f"{path_save}{map_name}.png"):

try:
# get image id with same coords in metadata
existing_metadata_df = pd.read_csv(
f"{path_save}{metadata_fname}", sep=",", index_col=0
)
except FileNotFoundError:
return False

polygon = get_polygon_from_grid_bb(feature["grid_bb"])
if str(polygon.bounds) in existing_metadata_df["coordinates"].values:
image_id = existing_metadata_df[
existing_metadata_df["coordinates"] == str(polygon.bounds)
].iloc[0]["name"]
else:
return False # coordinates not in metadata means image doesn't exist

if os.path.exists(f"{path_save}{image_id}"):
try:
mpimg.imread(f"{path_save}{map_name}.png")
print(
f'[INFO] "{path_save}{map_name}.png" already exists. Skipping download.'
)
return True
# check image is valid
mpimg.imread(f"{path_save}{image_id}")
return image_id
except OSError:
return False
return False

def _download_map(self, feature: dict, download_in_parallel: bool = True) -> bool:
def _download_map(
self,
feature: dict,
existing_id: str | bool,
download_in_parallel: bool = True,
overwrite: bool = False,
) -> str | bool:
"""
Downloads a single map sheet and saves as png file.
Parameters
----------
feature : dict
The feature for which to download the map sheet.
existing_id : str | bool
The existing image id if the map sheet already exists.
download_in_parallel : bool, optional
Whether to download tiles in parallel, by default ``True``.
overwrite : bool, optional
Whether to overwrite existing maps, by default ``False``.
Returns
-------
bool
True if map was downloaded successfully, False if not.
str or bool
image path if map was downloaded successfully, False if not.
"""
map_name = str("map_" + feature["properties"]["IMAGE"])
self.downloader.download_tiles(
feature["grid_bb"], download_in_parallel=download_in_parallel
)
success = self.merger.merge(feature["grid_bb"], map_name)
if success:
print(f'[INFO] Downloaded "{map_name}.png"')

if existing_id is False:
map_name = f"map_{feature['properties']['IMAGE']}"
else:
map_name = existing_id[:-4] # remove file extension (assuming .png)

img_path = self.merger.merge(
feature["grid_bb"], file_name=map_name, overwrite=overwrite
)

if img_path is not False:
print(f'[INFO] Downloaded "{img_path}"')
else:
print(f'[WARNING] Download of "{map_name}.png" was unsuccessful.')
print(f'[WARNING] Download of "{img_path}" was unsuccessful.')

shutil.rmtree(DEFAULT_TEMP_FOLDER)
return success
return img_path

def _save_metadata(
self,
feature: dict,
out_filepath: str,
img_path: str,
metadata_to_save: dict | None = None,
**kwargs: dict | None,
) -> None:
Expand All @@ -602,6 +638,8 @@ def _save_metadata(
The feature for which to extract the metadata from
out_filepath : str
The path to save metadata csv.
img_path : str
The path to the downloaded map sheet.
metadata_to_save : dict, optional
A dictionary containing column names (str) and metadata keys (str or list) to save to metadata csv.
Multilayer keys should be passed as a list, i.e. ["key1","key2"] will search for ``self.features[i]["key1"]["key2"]``.
Expand Down Expand Up @@ -631,7 +669,7 @@ def _save_metadata(
metadata_dict = {col: None for col in metadata_cols}

# get default metadata
metadata_dict["name"] = str("map_" + feature["properties"]["IMAGE"] + ".png")
metadata_dict["name"] = os.path.basename(img_path)
metadata_dict["url"] = str(feature["properties"]["IMAGEURL"])
if not self.published_dates:
date_col = kwargs.get("date_col", None)
Expand Down Expand Up @@ -705,16 +743,25 @@ def _download_map_sheets(
"""

for feature in tqdm(features):
if not overwrite:
if self._check_map_sheet_exists(feature):
continue
success = self._download_map(
feature, download_in_parallel=download_in_parallel
existing_id = self._check_map_sheet_exists(feature, metadata_fname)
if (
not overwrite and existing_id is not False
): # if map already exists and overwrite is False then skip
print(f'[INFO] "{existing_id}" already exists. Skipping download.')
continue
img_path = self._download_map(
feature,
existing_id,
download_in_parallel=download_in_parallel,
overwrite=overwrite,
)
if success:
if img_path is not False:
metadata_path = f"{path_save}/{metadata_fname}"
self._save_metadata(
feature=feature, out_filepath=metadata_path, **kwargs
feature=feature,
out_filepath=metadata_path,
img_path=img_path,
**kwargs,
)

def download_all_map_sheets(
Expand Down
30 changes: 22 additions & 8 deletions mapreader/download/tile_merging.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,12 @@ def _load_tile_size(self, grid_bb: GridBoundingBox):
tile_size = img_size[0]
return tile_size

def merge(self, grid_bb: GridBoundingBox, file_name: str | None = None) -> bool:
def merge(
self,
grid_bb: GridBoundingBox,
file_name: str | None = None,
overwrite: bool = False,
) -> str | bool:
"""Merges cells contained within GridBoundingBox.
Parameters
Expand All @@ -151,11 +156,13 @@ def merge(self, grid_bb: GridBoundingBox, file_name: str | None = None) -> bool:
GridBoundingBox containing tiles to merge
file_name : Union[str, None], optional
Name to use when saving map
If None, default name will be used, by default None
overwrite : bool, optional
Whether or not to overwrite existing files, by default False
Returns
-------
bool
True if file has successfully downloaded, False if not.
str or bool
out path if file has successfully downloaded, False if not.
"""
os.makedirs(self.output_folder, exist_ok=True)

Expand Down Expand Up @@ -191,11 +198,18 @@ def merge(self, grid_bb: GridBoundingBox, file_name: str | None = None) -> bool:
file_name = self._get_output_name(grid_bb)

out_path = f"{self.output_folder}{file_name}.{self.img_output_format[0]}"
if not overwrite:
i = 1
while os.path.exists(out_path):
out_path = (
f"{self.output_folder}{file_name}_{i}.{self.img_output_format[0]}"
)
i += 1
merged_image.save(out_path, self.img_output_format[1])
success = True if os.path.exists(out_path) else False
if success:
logger.info(f"Merge successful! The image has been stored at '{out_path}'")
else:
success = out_path if os.path.exists(out_path) else False
if success is False:
logger.warning(f"Merge unsuccessful! '{out_path}' not saved.")
else:
logger.info(f"Merge successful! The image has been stored at '{out_path}'")

return success
Loading

0 comments on commit fe370ee

Please sign in to comment.