From 79a550c29ce702a7d7696899360e5137b1e6c359 Mon Sep 17 00:00:00 2001 From: Forthoney Date: Sat, 16 Dec 2023 14:49:05 +0900 Subject: [PATCH 1/2] make separate cli module for all argparsers --- compiler/cli.py | 266 ++++++++++++++++++++++++++ compiler/config.py | 164 ---------------- compiler/pash.py | 78 +------- compiler/pash_compilation_server.py | 10 +- compiler/pash_compiler.py | 26 +-- compiler/preprocessor/preprocessor.py | 33 +--- 6 files changed, 279 insertions(+), 298 deletions(-) create mode 100644 compiler/cli.py diff --git a/compiler/cli.py b/compiler/cli.py new file mode 100644 index 000000000..48863de31 --- /dev/null +++ b/compiler/cli.py @@ -0,0 +1,266 @@ +import argparse +import os + + +class BaseParser(argparse.ArgumentParser): + """ + Base class for all Argument Parsers used by PaSh. It has two configurable flags + by default: debug and log_file. + + Other flags are available by classes which inherit BaseParser + """ + + @staticmethod + def _get_width(): + cpus = os.cpu_count() + assert cpus is not None + return cpus // 8 if cpus >= 16 else 2 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.add_argument( + "-d", + "--debug", + type=int, + help="configure debug level; defaults to 0", + default=0, + ) + self.add_argument( + "--log_file", + help="configure where to write the log; defaults to stderr.", + default="", + ) + + def add_pash_args(self): + self.add_argument( + "-w", + "--width", + type=int, + default=self._get_width(), + help="set data-parallelism factor", + ) + self.add_argument( + "--no_optimize", + help="not apply transformations over the DFG", + action="store_true", + ) + self.add_argument( + "--dry_run_compiler", + help="not execute the compiled script, even if the compiler succeeded", + action="store_true", + ) + self.add_argument( + "--assert_compiler_success", + help="assert that the compiler succeeded (used to make tests more robust)", + action="store_true", + ) + self.add_argument( + "--avoid_pash_runtime_completion", + help="avoid the pash_runtime execution completion (only relevant when --debug > 0)", + action="store_true", + ) + self.add_argument( + "-p", + "--output_optimized", # FIXME: --print + help="output the parallel shell script for inspection", + action="store_true", + ) + self.add_argument( + "--graphviz", + help="generates graphical representations of the dataflow graphs. The option argument corresponds to the format. PaSh stores them in a timestamped directory in the argument of --graphviz_dir", + choices=["no", "dot", "svg", "pdf", "png"], + default="no", + ) + ## TODO: To discuss: Do we maybe want to have graphviz to always be included + ## in the temp directory (under a graphviz subdirectory) instead of in its own? + ## kk: I think that ideally we want a log-directory where we can put logs, graphviz, + ## and other observability and monitoring info (instead of putting them in the temp). + self.add_argument( + "--graphviz_dir", + help="the directory in which to store graphical representations", + default="/tmp", + ) + self.add_argument( + "--parallel_pipelines", + help="Run multiple pipelines in parallel if they are safe to run", + action="store_true", + default=False, + ) + self.add_argument( + "--r_split_batch_size", + type=int, + help="configure the batch size of r_split (default: 1MB)", + default=1000000, + ) + self.add_argument( + "--config_path", + help="determines the config file path. By default it is 'PASH_TOP/compiler/config.yaml'.", + default="", + ) + self.add_argument( + "--version", + action="version", + version="%(prog)s {version}".format( + version="0.12.2" + ), # What does this version mean? + ) + + self.add_experimental_args() + + def add_experimental_args(self): + self.add_argument( + "--no_eager", + help="(experimental) disable eager nodes before merging nodes", + action="store_true", + ) + self.add_argument( + "--profile_driven", + help="(experimental) use profiling information when optimizing", + action="store_true", + ) + self.add_argument( + "--speculative", + help="(experimental) use the speculative execution preprocessing and runtime (NOTE: this has nothing to do with --speculation, which is actually misnamed, and should be named concurrent compilation/execution and is now obsolete)", + action="store_true", + default=False, + ) + self.add_argument( + "--termination", + help="(experimental) determine the termination behavior of the DFG. Defaults to cleanup after the last process dies, but can drain all streams until depletion", + choices=["clean_up_graph", "drain_stream"], + default="clean_up_graph", + ) + self.add_argument( + "--daemon_communicates_through_unix_pipes", + help="(experimental) the daemon communicates through unix pipes instead of sockets", + action="store_true", + ) + self.add_argument( + "--distributed_exec", + help="(experimental) execute the script in a distributed environment. Remote machines should be configured and ready", + action="store_true", + default=False, + ) + + +class RunnerParser(BaseParser): + """ + Parser for the PaSh Runner in compiler/pash.py + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.add_pash_args() + + self.add_argument( + "input", + nargs="*", + help="the script to be compiled and executed (followed by any command-line arguments", + ) + self.add_argument( + "--preprocess_only", + help="only preprocess the input script and not execute it", + action="store_true", + ) + self.add_argument( + "--output_preprocessed", + help=" output the preprocessed script", + action="store_true", + ) + self.add_argument( + "--interactive", + help="Executes the script using an interactive internal shell session (experimental)", + action="store_true", + ) + self.add_argument( + "-c", + "--command", + help="Evaluate the following as a script, rather than a file", + default=None, + ) + ## This is not the correct way to parse these, because more than one option can be given together, e.g., -ae + self.add_argument( + "-a", + help="Enabling the `allexport` shell option", + action="store_true", + default=False, + ) + self.add_argument( + "+a", + help="Disabling the `allexport` shell option", + action="store_false", + default=False, + ) + ## These two are here for compatibility with respect to bash + self.add_argument( + "-v", + help="(experimental) prints shell input lines as they are read", + action="store_true", + ) + self.add_argument( + "-x", + help="(experimental) prints commands and their arguments as they execute", + action="store_true", + ) + self.set_defaults(preprocess_mode="pash") + + +class CompilerParser(BaseParser): + """ + Parser for the PaSh compiler in compiler/pash_compiler.py + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.add_pash_args() + + self.add_argument( + "compiled_script_file", + help="the file in which to output the compiled script", + ) + self.add_argument( + "input_ir", + help="the file containing the dataflow graph to be optimized and executed", + ) + self.add_argument( + "--var_file", + help="determines the path of a file containing all shell variables.", + default=None, + ) + + +class PreprocessorParser(BaseParser): + """ + Parser for the preprocessor in compiler/preprocessor/preprocessor.py + Generates two subparsers + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + subparser = self.add_subparsers(help="sub-command help") + self.add_pash_subparser(subparser) + self.add_spec_subparser(subparser) + + @staticmethod + def add_pash_subparser(subparser): + parser_pash = subparser.add_parser( + "pash", help="Preprocess the script so that it can be run with PaSh" + ) + parser_pash.add_pash_args() + parser_pash.add_argument("input", help="the script to be preprocessed") + parser_pash.set_defaults(preprocess_mode="pash") + + @staticmethod + def add_spec_subparser(subparser): + # create the parser for the "b" command + parser_spec = subparser.add_parser( + "spec", help="Preprocess the script so that it can be run with speculation" + ) + parser_spec.add_argument("input", help="the script to be preprocessed") + + ## TODO: When we better integrate, this should be automatically set. + parser_spec.add_argument( + "partial_order_file", + help="the file to store the partial order (currently just a sequence)", + ) + parser_spec.set_defaults(preprocess_mode="spec") diff --git a/compiler/config.py b/compiler/config.py index e8276bd9a..0eaa87186 100644 --- a/compiler/config.py +++ b/compiler/config.py @@ -2,7 +2,6 @@ import logging import os import subprocess -import math from util import * @@ -61,7 +60,6 @@ def set_config_globals_from_pash_args(given_pash_args): global pash_args, OUTPUT_TIME, DEBUG_LEVEL, LOG_FILE pash_args = given_pash_args - OUTPUT_TIME = pash_args.output_time DEBUG_LEVEL = pash_args.debug LOG_FILE = pash_args.log_file @@ -112,166 +110,6 @@ def load_config(config_file_path=""): config = pash_config -def getWidth(): - cpus = os.cpu_count() - return math.floor(cpus / 8) if cpus >= 16 else 2 - - -def add_general_config_arguments(parser): - ## TODO: Delete that at some point, or make it have a different use (e.g., outputting time even without -d 1). - parser.add_argument( - "-t", - "--output_time", # FIXME: --time - help="(obsolete, time is always logged now) output the time it took for every step", - action="store_true", - ) - parser.add_argument( - "-d", - "--debug", - type=int, - help="configure debug level; defaults to 0", - default=0, - ) - parser.add_argument( - "--log_file", - help="configure where to write the log; defaults to stderr.", - default="", - ) - - -## These are arguments that are common to pash.py and pash_compiler.py -def add_common_arguments(parser): - add_general_config_arguments(parser) - - parser.add_argument( - "-w", - "--width", - type=int, - default=getWidth(), - help="set data-parallelism factor", - ) - parser.add_argument( - "--no_optimize", - help="not apply transformations over the DFG", - action="store_true", - ) - parser.add_argument( - "--dry_run_compiler", - help="not execute the compiled script, even if the compiler succeeded", - action="store_true", - ) - parser.add_argument( - "--assert_compiler_success", - help="assert that the compiler succeeded (used to make tests more robust)", - action="store_true", - ) - parser.add_argument( - "--avoid_pash_runtime_completion", - help="avoid the pash_runtime execution completion (only relevant when --debug > 0)", - action="store_true", - ) - parser.add_argument( - "--profile_driven", - help="(experimental) use profiling information when optimizing", - action="store_true", - ) - parser.add_argument( - "-p", - "--output_optimized", # FIXME: --print - help="output the parallel shell script for inspection", - action="store_true", - ) - parser.add_argument( - "--graphviz", - help="generates graphical representations of the dataflow graphs. The option argument corresponds to the format. PaSh stores them in a timestamped directory in the argument of --graphviz_dir", - choices=["no", "dot", "svg", "pdf", "png"], - default="no", - ) - ## TODO: To discuss: Do we maybe want to have graphviz to always be included - ## in the temp directory (under a graphviz subdirectory) instead of in its own? - ## kk: I think that ideally we want a log-directory where we can put logs, graphviz, - ## and other observability and monitoring info (instead of putting them in the temp). - parser.add_argument( - "--graphviz_dir", - help="the directory in which to store graphical representations", - default="/tmp", - ) - parser.add_argument( - "--no_eager", - help="(experimental) disable eager nodes before merging nodes", - action="store_true", - ) - parser.add_argument( - "--no_daemon", - help="(obsolete) does nothing -- Run the compiler everytime we need a compilation instead of using the daemon", - action="store_true", - default=False, - ) - parser.add_argument( - "--parallel_pipelines", - help="Run multiple pipelines in parallel if they are safe to run", - action="store_true", - default=False, - ) - parser.add_argument( - "--r_split_batch_size", - type=int, - help="configure the batch size of r_split (default: 1MB)", - default=1000000, - ) - parser.add_argument( - "--r_split", - help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)", - action="store_true", - ) - parser.add_argument( - "--dgsh_tee", - help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)", - action="store_true", - ) - parser.add_argument( - "--speculative", - help="(experimental) use the speculative execution preprocessing and runtime (NOTE: this has nothing to do with --speculation, which is actually misnamed, and should be named concurrent compilation/execution and is now obsolete)", - action="store_true", - default=False, - ) - ## This is misnamed, it should be named concurrent compilation/execution - parser.add_argument( - "--speculation", - help="(obsolete) does nothing -- run the original script during compilation; if compilation succeeds, abort the original and run only the parallel (quick_abort) (Default: no_spec)", - choices=["no_spec", "quick_abort"], - default="no_spec", - ) - parser.add_argument( - "--termination", - help="(experimental) determine the termination behavior of the DFG. Defaults to cleanup after the last process dies, but can drain all streams until depletion", - choices=["clean_up_graph", "drain_stream"], - default="clean_up_graph", - ) - parser.add_argument( - "--daemon_communicates_through_unix_pipes", - help="(experimental) the daemon communicates through unix pipes instead of sockets", - action="store_true", - ) - parser.add_argument( - "--distributed_exec", - help="(experimental) execute the script in a distributed environment. Remote machines should be configured and ready", - action="store_true", - default=False, - ) - parser.add_argument( - "--config_path", - help="determines the config file path. By default it is 'PASH_TOP/compiler/config.yaml'.", - default="", - ) - parser.add_argument( - "--version", - action="version", - version="%(prog)s {version}".format(version=__version__), - ) - return - - def pass_common_arguments(pash_arguments): arguments = [] if pash_arguments.no_optimize: @@ -284,8 +122,6 @@ def pass_common_arguments(pash_arguments): arguments.append("--avoid_pash_runtime_completion") if pash_arguments.profile_driven: arguments.append("--profile_driven") - if pash_arguments.output_time: - arguments.append("--output_time") if pash_arguments.output_optimized: arguments.append("--output_optimized") arguments.append("--graphviz") diff --git a/compiler/pash.py b/compiler/pash.py index 627da39af..6554bcc1b 100755 --- a/compiler/pash.py +++ b/compiler/pash.py @@ -1,19 +1,14 @@ import sys import os import subprocess -import argparse -from datetime import datetime - -from shell_ast import ast_to_ast from ir import * -from parse import parse_shell_to_asts_interactive from pash_graphviz import maybe_init_graphviz_dir from preprocessor.preprocessor import preprocess from speculative import util_spec from util import * import config -import shutil +from cli import RunnerParser LOGGING_PREFIX = "PaSh: " @@ -72,69 +67,7 @@ def parse_args(): if "PASH_FROM_SH" in os.environ: prog_name = os.environ["PASH_FROM_SH"] ## We need to set `+` as a prefix char too - parser = argparse.ArgumentParser(prog_name, prefix_chars="-+") - parser.add_argument( - "input", - nargs="*", - help="the script to be compiled and executed (followed by any command-line arguments", - ) - parser.add_argument( - "--preprocess_only", - help="only preprocess the input script and not execute it", - action="store_true", - ) - parser.add_argument( - "--output_preprocessed", - help=" output the preprocessed script", - action="store_true", - ) - parser.add_argument( - "--interactive", - help="Executes the script using an interactive internal shell session (experimental)", - action="store_true", - ) - parser.add_argument( - "-c", - "--command", - help="Evaluate the following as a script, rather than a file", - default=None, - ) - ## This is not the correct way to parse these, because more than one option can be given together, e.g., -ae - parser.add_argument( - "-a", - help="Enabling the `allexport` shell option", - action="store_true", - default=False, - ) - parser.add_argument( - "+a", - help="Disabling the `allexport` shell option", - action="store_false", - default=False, - ) - ## These two are here for compatibility with respect to bash - parser.add_argument( - "-v", - help="(experimental) prints shell input lines as they are read", - action="store_true", - ) - parser.add_argument( - "-x", - help="(experimental) prints commands and their arguments as they execute", - action="store_true", - ) - ## Deprecated argument... keeping here just to output the message - ## TODO: Do that with a custom argparse Action (KK: I tried and failed) - parser.add_argument( - "--expand_using_bash_mirror", - help="DEPRECATED: instead of expanding using the internal expansion code, expand using a bash mirror process (slow)", - action="store_true", - ) - - ## Set the preprocessing mode to PaSh - parser.set_defaults(preprocess_mode="pash") - - config.add_common_arguments(parser) + parser = RunnerParser(prog_name, prefix_chars="-+") args = parser.parse_args() config.set_config_globals_from_pash_args(args) @@ -159,13 +92,6 @@ def parse_args(): log(arg_name, arg_val) log("-" * 40) - ## Print the deprecated argument - if args.expand_using_bash_mirror: - log( - "WARNING: Option --expand_using_bash_mirror is deprecated and is *ignored*.", - level=0, - ) - ## TODO: We might need to have a better default (like $0 of pa.sh) shell_name = "pash" diff --git a/compiler/pash_compilation_server.py b/compiler/pash_compilation_server.py index 47e352867..537bceb8a 100644 --- a/compiler/pash_compilation_server.py +++ b/compiler/pash_compilation_server.py @@ -1,6 +1,4 @@ -import argparse import signal -import traceback from threading import Thread from datetime import datetime, timedelta @@ -15,6 +13,8 @@ from dspash.worker_manager import WorkersManager import server_util +from cli import BaseParser + ## ## A Daemon (not with the strict Unix sense) ## that responds to requests for compilation @@ -30,9 +30,9 @@ def handler(signum, frame): def parse_args(): - parser = argparse.ArgumentParser(add_help=False) - config.add_common_arguments(parser) - args, unknown_args = parser.parse_known_args() + parser = BaseParser(add_help=False) + parser.add_pash_args() + args, _ = parser.parse_known_args() return args diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index 6b4e6829a..c4fc7282e 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -1,13 +1,8 @@ -import argparse import sys import pickle import traceback from datetime import datetime -from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorKind import ( - AggregatorKindEnum, -) - from sh_expand import env_vars_util import config @@ -19,11 +14,9 @@ from definitions.ir.aggregator_node import * -from definitions.ir.dfg_node import DFGNode from definitions.ir.nodes.eager import * from definitions.ir.nodes.pash_split import * -import definitions.ir.nodes.r_merge as r_merge import definitions.ir.nodes.r_split as r_split import definitions.ir.nodes.r_unwrap as r_unwrap import definitions.ir.nodes.dgsh_tee as dgsh_tee @@ -32,6 +25,8 @@ # Distirbuted Exec import dspash.hdfs_utils as hdfs_utils +from cli import CompilerParser + runtime_config = {} @@ -74,21 +69,8 @@ def main_body(): def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "compiled_script_file", help="the file in which to output the compiled script" - ) - parser.add_argument( - "input_ir", - help="the file containing the dataflow graph to be optimized and executed", - ) - parser.add_argument( - "--var_file", - help="determines the path of a file containing all shell variables.", - default=None, - ) - config.add_common_arguments(parser) - args, unknown_args = parser.parse_known_args() + parser = CompilerParser() + args, _ = parser.parse_known_args() return args diff --git a/compiler/preprocessor/preprocessor.py b/compiler/preprocessor/preprocessor.py index 11139e17b..817aeaf84 100644 --- a/compiler/preprocessor/preprocessor.py +++ b/compiler/preprocessor/preprocessor.py @@ -1,14 +1,13 @@ -import argparse from datetime import datetime import os import config from shell_ast import transformation_options, ast_to_ast -from ir import FileIdGen from parse import parse_shell_to_asts, from_ast_objects_to_shell from util import * import server_util from speculative import util_spec +from cli import PreprocessorParser LOGGING_PREFIX = "PaSh Preprocessor: " @@ -82,36 +81,8 @@ def preprocess_asts(ast_objects, args): return preprocessed_asts -## -## This is the command line interface for the preprocessor -## def main(): - parser = argparse.ArgumentParser() - config.add_general_config_arguments(parser) - - subparsers = parser.add_subparsers(help="sub-command help") - - # create the parser for the "a" command - parser_pash = subparsers.add_parser( - "pash", help="Preprocess the script so that it can be run with PaSh" - ) - config.add_common_arguments(parser_pash) - parser_pash.add_argument("input", help="the script to be preprocessed") - parser_pash.set_defaults(preprocess_mode="pash") - - # create the parser for the "b" command - parser_spec = subparsers.add_parser( - "spec", help="Preprocess the script so that it can be run with speculation" - ) - parser_spec.add_argument("input", help="the script to be preprocessed") - - ## TODO: When we better integrate, this should be automatically set. - parser_spec.add_argument( - "partial_order_file", - help="the file to store the partial order (currently just a sequence)", - ) - parser_spec.set_defaults(preprocess_mode="spec") - + parser = PreprocessorParser() args = parser.parse_args() config.set_config_globals_from_pash_args(args) From 0f046c589b00b1118628b3bd91f241d15e42cdad Mon Sep 17 00:00:00 2001 From: Forthoney Date: Sun, 17 Dec 2023 10:06:40 +0900 Subject: [PATCH 2/2] add obsolete arguments back --- compiler/cli.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/compiler/cli.py b/compiler/cli.py index 2650c711c..7f9a3db6b 100644 --- a/compiler/cli.py +++ b/compiler/cli.py @@ -18,6 +18,12 @@ def _get_width(): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.add_argument( + "-t", + "--output_time", # FIXME: --time + help="(obsolete, time is always logged now) output the time it took for every step", + action="store_true", + ) self.add_argument( "-d", "--debug", @@ -112,6 +118,37 @@ def add_pash_args(self): ) self.add_experimental_args() + self.add_obsolete_args() + + def add_obsolete_args(self): + self.add_argument( + "--no_daemon", + help="(obsolete) does nothing -- Run the compiler everytime we need a compilation instead of using the daemon", + action="store_true", + default=False, + ) + self.add_argument( + "--parallel_pipelines", + help="(obsolete) Run multiple pipelines in parallel if they are safe to run. Now true by default. See --no_parallel_pipelines.", + action="store_true", + default=True, + ) + self.add_argument( + "--r_split", + help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)", + action="store_true", + ) + self.add_argument( + "--dgsh_tee", + help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)", + action="store_true", + ) + self.add_argument( + "--speculation", + help="(obsolete) does nothing -- run the original script during compilation; if compilation succeeds, abort the original and run only the parallel (quick_abort) (Default: no_spec)", + choices=["no_spec", "quick_abort"], + default="no_spec", + ) def add_experimental_args(self): self.add_argument(