Skip to content

Commit

Permalink
feat: move concatenate_json_files and pull_netpune_data functions to …
Browse files Browse the repository at this point in the history
…json_utils.py
  • Loading branch information
RuanJohn committed Feb 26, 2024
1 parent b2669c0 commit b1a6a36
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 76 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,9 @@ Here `run_1` to `run_n` correspond to the number of independent runs in a given

[**JSON Logger**](marl_eval/json_tools/json_logger.py): `JsonLogger` handles logging data according to the structured format detailed [above](#exp_structure).

[**Neptune Data Pulling Script**](marl_eval/json_tools/pull_neptune_data.py): `pull_neptune_data` connects to a Neptune project, retrieves experiment data from a given list of tags and downloads it to a local directory. This function is particularly useful when there is a need to pull data from multiple experiments that were logged separately on Neptune.
[**Neptune Data Pulling Script**](marl_eval/json_tools/json_utils.py): `pull_neptune_data` connects to a Neptune project, retrieves experiment data from a given list of tags and downloads it to a local directory. This function is particularly useful when there is a need to pull data from multiple experiments that were logged separately on Neptune.

[**JSON File Merging Script**](marl_eval/json_tools/merge_json_files.py): `concatenate_json_files` reads multiple JSON files from a specified local directory and concatenates their contents into a single structured JSON file.
[**JSON File Merging Script**](marl_eval/json_tools/json_utils.py): `concatenate_json_files` reads multiple JSON files from a specified local directory and concatenates their contents into a single structured JSON file.

> 📌 Using `pull_neptune_data` followed by `concatenate_files` forms an effective workflow, where multiple JSON files from different experiment runs are first pulled from Neptune and then merged into a single file, ready for use in marl-eval.
Expand Down
3 changes: 1 addition & 2 deletions marl_eval/json_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@

"""JSON tools for data preprocessing."""
from .json_logger import JsonLogger
from .merge_json_files import concatenate_json_files
from .pull_neptune_data import pull_neptune_data
from .json_utils import concatenate_json_files, pull_neptune_data
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@

import json
import os
import zipfile
from collections import defaultdict
from typing import Dict, Tuple
from typing import Dict, List, Tuple

import neptune
from colorama import Fore, Style
from tqdm import tqdm


def _read_json_files(directory: str) -> list:
Expand Down Expand Up @@ -104,3 +107,52 @@ def concatenate_json_files(
+ f"{output_json_path}metrics.json successfully!{Style.RESET_ALL}"
)
return concatenated_data


def pull_neptune_data(
project_name: str,
tag: List,
store_directory: str = "./downloaded_json_data",
neptune_data_key: str = "metrics",
) -> None:
"""Pulls experiment json data from Neptune to a local directory.
Args:
project_name (str): Name of the Neptune project.
tag (List): List of tags.
store_directory (str, optional): Directory to store the data.
neptune_data_key (str): Key in the neptune run where the json data is stored.
"""
# Get the run ids
project = neptune.init_project(project=project_name)
runs_table_df = project.fetch_runs_table(state="inactive", tag=tag).to_pandas()
run_ids = runs_table_df["sys/id"].values.tolist()

# Check if store_directory exists
if not os.path.exists(store_directory):
os.makedirs(store_directory)

# Download and unzip the data
for run_id in tqdm(run_ids, desc="Downloading Neptune Data"):
run = neptune.init_run(project=project_name, with_id=run_id, mode="read-only")
for data_key in run.get_structure()[neptune_data_key].keys():
file_path = f"{store_directory}/{data_key}"
run[f"{neptune_data_key}/{data_key}"].download(destination=file_path)
# Try to unzip the file else continue to the next file
try:
with zipfile.ZipFile(file_path, "r") as zip_ref:
# Create a directory with to store unzipped data
os.makedirs(f"{file_path}_unzip", exist_ok=True)
# Unzip the data
zip_ref.extractall(f"{file_path}_unzip")
# Remove the zip file
os.remove(file_path)
except zipfile.BadZipFile:
# If the file is not zipped continue to the next file
# as it is already downloaded.
continue
except Exception as e:
print(f"An error occurred while unzipping or storing {file_path}: {e}")
run.stop()

print(f"{Fore.CYAN}{Style.BRIGHT}Data downloaded successfully!{Style.RESET_ALL}")
71 changes: 0 additions & 71 deletions marl_eval/json_tools/pull_neptune_data.py

This file was deleted.

0 comments on commit b1a6a36

Please sign in to comment.