From bee9701212b5a0cafe40cd32f3a2d2b9cab44bfd Mon Sep 17 00:00:00 2001 From: James Dean <24254612+AcylSilane@users.noreply.github.com> Date: Fri, 23 Apr 2021 19:34:17 -0700 Subject: [PATCH 1/3] reset branch to origin From f91c3a2306c091100cc37ba3bad498a87a31167f Mon Sep 17 00:00:00 2001 From: James Dean <24254612+AcylSilane@users.noreply.github.com> Date: Fri, 23 Apr 2021 19:50:45 -0700 Subject: [PATCH 2/3] Create initial outline of Python templating approach to constructing assets.js and tree.js --- assets/python/assets_template.j2 | 25 +++++++++++++++++++++++++ assets/python/build_assets.py | 4 ++++ assets/python/tree_template.j2 | 25 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 assets/python/assets_template.j2 create mode 100644 assets/python/build_assets.py create mode 100644 assets/python/tree_template.j2 diff --git a/assets/python/assets_template.j2 b/assets/python/assets_template.j2 new file mode 100644 index 00000000..552c0a27 --- /dev/null +++ b/assets/python/assets_template.j2 @@ -0,0 +1,25 @@ +import {makeObjectsFromContextProviderNames, readAssetFile} from "../utils"; + +const applicationName = "python"; +const executableName = "python"; + +// Here, we're returning a delayed-evaluation lambda, to avoid loading the asset files in scenarios where they're not +// available, like on the client. +export default () => { + const allAssets = [ + /* + * PYTHON + */ + {% for config in configs %} + { + "content": readAssetFile(applicationName, {{ config.assetFileName | quotedstrings | safe }}), + "name": {{ config.name | quotedstrings | safe }}, + "contextProviders": {{ config.contextProviders | quotedstrings | safe }}, + "applicationName": applicationName, + "executableName": executableName, + }, + {% endfor %} + ]; + + return allAssets.map(a => makeObjectsFromContextProviderNames(a)); +}; diff --git a/assets/python/build_assets.py b/assets/python/build_assets.py new file mode 100644 index 00000000..117079f5 --- /dev/null +++ b/assets/python/build_assets.py @@ -0,0 +1,4 @@ +import os + +js_src_path = "../../src/js/python/" + diff --git a/assets/python/tree_template.j2 b/assets/python/tree_template.j2 new file mode 100644 index 00000000..3d78134e --- /dev/null +++ b/assets/python/tree_template.j2 @@ -0,0 +1,25 @@ +import monitors from "../allowed_monitors"; + +export default { + "python": { + "monitors": [ + monitors.standard_output + ], + "results": [], + "flavors": { + {% for config in configs %} + {{ longName | quotedstrings | safe }}: { + "input": [ + {% for input in config.inputs %} + { + "name": {{ input.name }}, + "templateName": {{ input.templateName }}, + }, + {% endfor %} + ], + "monitors": [monitors.standard_output], + }, + {% endfor %} + } + } +} From 6ed1cf4543d59646df8049012b03d79cef8d64a6 Mon Sep 17 00:00:00 2001 From: James Dean <24254612+AcylSilane@users.noreply.github.com> Date: Tue, 27 Apr 2021 17:19:51 -0700 Subject: [PATCH 3/3] Add various YAML files and template generators --- assets/python/assets_template.j2 | 4 +- assets/python/build_tree.py | 130 ++++++++++++++++++++++++++ assets/python/manifest.yaml | 13 +++ assets/python/manifest_util.yaml | 12 +++ assets/python/ml/data_input.yaml | 8 ++ assets/python/ml/misc.yaml | 16 ++++ assets/python/ml/post_processing.yaml | 12 +++ assets/python/ml/pre_processing.yaml | 17 ++++ assets/python/tree_template.j2 | 12 ++- build-pythonML.sh | 10 ++ 10 files changed, 228 insertions(+), 6 deletions(-) create mode 100755 assets/python/build_tree.py create mode 100644 assets/python/manifest.yaml create mode 100644 assets/python/manifest_util.yaml create mode 100644 assets/python/ml/data_input.yaml create mode 100644 assets/python/ml/misc.yaml create mode 100644 assets/python/ml/post_processing.yaml create mode 100644 assets/python/ml/pre_processing.yaml create mode 100644 build-pythonML.sh diff --git a/assets/python/assets_template.j2 b/assets/python/assets_template.j2 index 552c0a27..390a59ea 100644 --- a/assets/python/assets_template.j2 +++ b/assets/python/assets_template.j2 @@ -12,8 +12,8 @@ export default () => { */ {% for config in configs %} { - "content": readAssetFile(applicationName, {{ config.assetFileName | quotedstrings | safe }}), - "name": {{ config.name | quotedstrings | safe }}, + "content": readAssetFile({{ config.applicationDirname }}, {{ config.sourceFilename | quotedstrings | safe }}), + "name": {{ (config.flavorListDisplayName + config.trueExtension) | quotedstrings | safe }}, "contextProviders": {{ config.contextProviders | quotedstrings | safe }}, "applicationName": applicationName, "executableName": executableName, diff --git a/assets/python/build_tree.py b/assets/python/build_tree.py new file mode 100755 index 00000000..b9827e43 --- /dev/null +++ b/assets/python/build_tree.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python +""" +Build script to generate an assets.js and tree.js file for Python sources +to the file specified in stdin. +""" +import os, sys, subprocess +from typing import List, Dict +import argparse +import yaml + +# Set up argparse for the script. Currently just takes in a single argument, for the output script. +# Todo: Should we have these build tools to /src, and have the resultant Python/JS packages exist in /build instead? +parser = argparse.ArgumentParser( + description="Configuration script un by `build-pythonML.sh` in the parent dir. Don't run this directly." +) +parser.add_argument('relative_sources_path', type=str, nargs=1, + help="Path to the Python folder of the JS package") +parser.add_argument('-b', '--base_dir', metavar="base_dir", type=str, nargs=1, + help="Base directory of the package") +args = parser.parse_args() +relative_sources_path: str = args.relative_sources_path.pop() +base_dir: str = args.base_dir.pop() + +# Determine where to output the build files +output_path = os.path.join(base_dir, relative_sources_path) +current_dir = os.path.dirname(__file__) + +# Read in the main manifest +with open(os.path.join(current_dir, "manifest.yaml"), "r") as inp: + main_manifest = yaml.safe_load(inp) + + +class Config(object): + def __init__(self, flavor_list_display_name: str, true_extension: str, source_filename: str, + application_dirname: str, + context_providers: List[str] = None, flavor_list_template_name: str = None, + inputs: List[Dict[str, str]] = None): + self.flavorListDisplayName = flavor_list_display_name + self.trueExtension = true_extension + self.sourceFilename = source_filename + self.application_dirname = application_dirname + + if flavor_list_template_name is None: + self.flavorListTemplateName = flavor_list_display_name + true_extension + else: + self.flavorListTemplateName = flavor_list_template_name + + # Using None as a sentry value to avoid setting the empty list as a default arg, since we don't want the same + # list instance shared by all members of this class. + if context_providers is None: + self.contextProviders = [] + else: + self.contextProviders = context_providers + + if inputs is None: + self.inputs = [] + else: + self.inputs = inputs + + @classmethod + def from_config_and_manifest(cls, path: str, flavor: Dict[str, str]): + result = None + + if path.startswith("."): + # General python scripts are found in this section. Many of the are a bit old, + # so the naming convention between the name in assets/tree isn't entirely consistent + # ToDo: Rename the names to allow us to remove this branching behavior + flavor_list_display_name = flavor["flavor_list_display_name"] + true_extension = flavor["true_extension"] + source_filename = flavor["source_filename"] + application_dirname = "python" + + if "extra_inputs" in flavor: + inputs = [{ + "name": flavor['extra_inputs'], + "templateName": flavor['extra_inputs'] + }] + else: + inputs = None + + result = cls(flavor_list_display_name, + true_extension, + source_filename, + application_dirname, + inputs=inputs) + + elif path.startswith("ml"): + # Machine learning scripts here. + flavor_list_display_name = "" + true_extension = "" + source_filename = "" + application_dirname = "python/ml" + + if "extra_inputs" in flavor: + inputs = [{ + "name": flavor['extra_inputs'], + "templateName": flavor['extra_inputs'] + }] + else: + inputs = None + + result = cls(flavor_list_display_name, + true_extension, + source_filename, + application_dirname, + inputs=inputs) + + + if result is None: + raise NotImplementedError(f"The path {path} has not been configured for automatic configuration yet.") + + return result + + +# Figure out where our other manifests are +manifest_paths = [] +for directory, manifests in main_manifest.items(): + if directory == "refactor_into_its_own_directory": + directory = "." + for manifest in manifests: + path_to_manifest = os.path.join(directory, manifest) + manifest_paths.append(path_to_manifest) + +# Write the assets files +configs = [] +for path in manifest_paths: + with open(path, "r") as inp: + flavors = list(yaml.safe_load_all(inp)) + for flavor in flavors: + Config.from_config_and_manifest(path, flavor) diff --git a/assets/python/manifest.yaml b/assets/python/manifest.yaml new file mode 100644 index 00000000..c8116778 --- /dev/null +++ b/assets/python/manifest.yaml @@ -0,0 +1,13 @@ +refactor_into_its_own_directory: + - manifest_util.yaml +ml: + - "data_input.yaml" + - "model.yaml" + - "misc.yaml" + - "pre_processing.yaml" + - "post_processing.yaml" + + + + + diff --git a/assets/python/manifest_util.yaml b/assets/python/manifest_util.yaml new file mode 100644 index 00000000..2c4a12ce --- /dev/null +++ b/assets/python/manifest_util.yaml @@ -0,0 +1,12 @@ +flavor_list_display_name: "hello_world" +true_extension: ".py" +source_filename: "hello_world.pyi" +extra_inputs: "requirements.txt" +--- +flavor_list_display_name: "espresso_xml_get_qpt_irr" +true_extension: ".py" +source_filename: "espresso_xml_get_qpt_irr.pyi" +--- +flavor_list_display_name: "requirements" +true_extension: ".txt" +source_filename: "requirements.j2.txt" diff --git a/assets/python/ml/data_input.yaml b/assets/python/ml/data_input.yaml new file mode 100644 index 00000000..3cd4d942 --- /dev/null +++ b/assets/python/ml/data_input.yaml @@ -0,0 +1,8 @@ + +name: "read_csv" +provider: "pandas" + +--- + +name: "train_test_split" +provider: "sklearn" diff --git a/assets/python/ml/misc.yaml b/assets/python/ml/misc.yaml new file mode 100644 index 00000000..1012dc8d --- /dev/null +++ b/assets/python/ml/misc.yaml @@ -0,0 +1,16 @@ +flavor_list_display_name: "pyml:custom" +true_extension: "py" +source_filename: "pyml:custom.pyi" + +--- + +flavor_list_display_name: "pyml:setup_variables_packages" +true_extension: "py" +filename_when_the_job_is_running: "settings.py" +source_filename: "pyml:setup_variables_packages.pyi" +extra_inputs: "requirements.txt" + +--- + +flavor_list_display_name: "requirements" +true_extension: "txt" diff --git a/assets/python/ml/post_processing.yaml b/assets/python/ml/post_processing.yaml new file mode 100644 index 00000000..8b242b27 --- /dev/null +++ b/assets/python/ml/post_processing.yaml @@ -0,0 +1,12 @@ +name: "parity_plot" +provider: "matplotlib" + +--- + +name: "pca_2d_clusters" +provider: "matplotlib" + +--- + +name: "roc_curve" +provider: "sklearn" diff --git a/assets/python/ml/pre_processing.yaml b/assets/python/ml/pre_processing.yaml new file mode 100644 index 00000000..84768f76 --- /dev/null +++ b/assets/python/ml/pre_processing.yaml @@ -0,0 +1,17 @@ +name: "min_max_scaler" +provider: "sklearn" + +--- + +name: "remove_duplicates" +provider: "pandas" + +--- + +name: "remove_missing" +provider: "pandas" + +--- + +name: "standardization" +provider: "sklearn" diff --git a/assets/python/tree_template.j2 b/assets/python/tree_template.j2 index 3d78134e..5f1e9076 100644 --- a/assets/python/tree_template.j2 +++ b/assets/python/tree_template.j2 @@ -8,12 +8,16 @@ export default { "results": [], "flavors": { {% for config in configs %} - {{ longName | quotedstrings | safe }}: { + {{ config.flavorListDisplayName | quotedstrings | safe }}: { "input": [ - {% for input in config.inputs %} { - "name": {{ input.name }}, - "templateName": {{ input.templateName }}, + "name": {{ (config.flavorListDisplayName + config.trueExtension) | quotedstrings | safe}}, + "templateName": {{ config.flavorListTemplateName | quotedstrings | safe}}, + }, + {% for input in config.additionalInputs %} + { + "name": {{ input.name | quotedstrings | safe }}, + "templateName": {{ input.templateName | quotedstrings | safe }}, }, {% endfor %} ], diff --git a/build-pythonML.sh b/build-pythonML.sh new file mode 100644 index 00000000..a80a7d24 --- /dev/null +++ b/build-pythonML.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +CURRENT_DIR=$( pwd ) +BASE_DIR=$(dirname "$0") +PYTHON_ASSETS_PATH="assets/python" +PATH_TO_PYTHON_TREE="src/js/python" + +cd $PYTHON_ASSETS_PATH +python "build_tree.py" $PATH_TO_PYTHON_TREE -b $BASE_DIR +cd $CURRENT_DIR