diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index f8f6bb0e6..fba31c8d1 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -81,7 +81,9 @@ jobs: runs-on: ubuntu-latest if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} - uses: ludeeus/action-shellcheck@master env: diff --git a/compiler/annotations_utils/util_cmd_invocations.py b/compiler/annotations_utils/util_cmd_invocations.py index f012b0dd8..26dd8cb6b 100644 --- a/compiler/annotations_utils/util_cmd_invocations.py +++ b/compiler/annotations_utils/util_cmd_invocations.py @@ -96,8 +96,8 @@ def construct_property_container_from_list_of_properties(list_properties): # this function is needed to wrap a node in `r_wrap` def to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping(cmd_inv, edges): # we already expand here - whole_cmd = Arg(string_to_argument("\'")) - arg_cmd_name = Arg(string_to_argument(cmd_inv.cmd_name)) + whole_cmd = Arg.string_to_arg("\'") + arg_cmd_name = Arg.string_to_arg(cmd_inv.cmd_name) arg_flagoptions = [] for flagoption in cmd_inv.flag_option_list: arg_flagoptions += to_arg_flagoption(flagoption, edges) @@ -107,14 +107,14 @@ def to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wra all_cmd_parts_arg.extend(arg_operands) for part in all_cmd_parts_arg: whole_cmd.concatenate(part) - whole_cmd.concatenate(Arg(string_to_argument("\'"))) + whole_cmd.concatenate(Arg.string_to_arg("\'")) return whole_cmd def to_arg_flagoption(flagoption, edges): if isinstance(flagoption, Flag): - return [Arg(string_to_argument(flagoption.get_name()))] + return [Arg.string_to_arg(flagoption.get_name())] elif isinstance(flagoption, OptionWithIO): - opt_name_arg = Arg(string_to_argument(flagoption.get_name())) + opt_name_arg = Arg.string_to_arg(flagoption.get_name()) opt_arg_arg = translate_io_var_to_arg_if_applicable(flagoption.get_arg(), edges) return [opt_name_arg, opt_arg_arg] diff --git a/compiler/ast_to_ir.py b/compiler/ast_to_ir.py index b5ce38624..2fda09d92 100644 --- a/compiler/ast_to_ir.py +++ b/compiler/ast_to_ir.py @@ -1,12 +1,12 @@ +import subprocess + +from shasta.ast_node import * +from sh_expand.expand import expand_command, ExpansionState + from shell_ast.ast_util import * from ir import * -from shell_ast.ast_node import * -from shell_ast.ast_node_c import * from util import * from parse import from_ast_objects_to_shell -from shell_ast.expand import * -import subprocess -import config ## TODO: Separate the ir stuff to the bare minimum and ## try to move this to the shell_ast folder. @@ -40,22 +40,25 @@ lambda ast_node: compile_node_redir_subshell(ast_node, fileIdGen, config)), "Background": (lambda fileIdGen, config: lambda ast_node: compile_node_background(ast_node, fileIdGen, config)), - "Defun": (lambda fileIdGen, config: - lambda ast_node: compile_node_defun(ast_node, fileIdGen, config)), "For": (lambda fileIdGen, config: lambda ast_node: compile_node_for(ast_node, fileIdGen, config)) } -def compile_asts(ast_objects, fileIdGen, config): +def compile_asts(ast_objects: "list[AstNode]", fileIdGen, config): compiled_asts = [] acc_ir = None for i, ast_object in enumerate(ast_objects): # log("Compiling AST {}".format(i)) # log(ast_object) + assert(isinstance(ast_object, AstNode)) ## Compile subtrees of the AST to out intermediate representation - expanded_ast = expand_command(ast_object, config) + ## KK 2023-05-25: Would we ever want to pass this state to the expansion + ## of the next object? I don't think so. + exp_state = ExpansionState(config['shell_variables']) + expanded_ast = expand_command(ast_object, exp_state) + # log("Expanded:", expanded_ast) compiled_ast = compile_node(expanded_ast, fileIdGen, config) # log("Compiled AST:") @@ -112,14 +115,12 @@ def compile_node_pipe(ast_node, fileIdGen, config): ## pipeline) the compiled_pipe_nodes should always ## be one IR compiled_ir = compiled_pipe_nodes[0] - ## Note: Save the old ast for the end-to-end prototype - old_ast_node = make_kv(ast_node.construct.value, [ast_node.is_background, ast_node.items]) - compiled_ir.set_ast(old_ast_node) + ## Save the old ast for the end-to-end prototype + old_untyped_ast_node = ast_node.json() + compiled_ir.set_ast(old_untyped_ast_node) ## Set the IR background so that it can be parallelized with ## the next command if the pipeline was run in background compiled_ir.set_background(ast_node.is_background) - ## TODO: If the pipeline is in background, I also have to - ## redirect its stdin, stdout compiled_ast = compiled_ir return compiled_ast @@ -145,70 +146,39 @@ def combine_pipe(ast_nodes): return [combined_nodes] def compile_node_command(ast_node, fileIdGen, config): - construct_str = ast_node.construct.value - old_ast_node = make_kv(construct_str, [ast_node.line_number, - ast_node.assignments, ast_node.arguments, ast_node.redir_list]) - - ## TODO: Do we need the line number? - ## Compile assignments and redirection list compiled_assignments = compile_assignments(ast_node.assignments, fileIdGen, config) compiled_redirections = compile_redirections(ast_node.redir_list, fileIdGen, config) - ## If there are no arguments, the command is just an - ## assignment - ## - ## TODO: The if-branch of this conditional should never be possible since the preprocessor - ## wouldn't replace a call without arguments (simple assignment). - ## - ## Also the return is not in the correct indentation so probably it never gets called - ## in our tests. - ## - ## We should remove it and add the following assert: - ## assert len(ast_node.arguments) > 0 - if(len(ast_node.arguments) == 0): - ## Just compile the assignments. Specifically compile the - ## assigned values, because they might have command - ## substitutions etc.. - compiled_ast = make_kv(construct_str, [ast_node.line_number] + - [compiled_assignments] + [ast_node.arguments, compiled_redirections]) - else: - arguments = ast_node.arguments - command_name = arguments[0] - options = compile_command_arguments(arguments[1:], fileIdGen, config) + ## This should never be possible since the preprocessor + ## wouldn't replace a call without arguments (simple assignment). + assert len(ast_node.arguments) > 0 + + arguments = ast_node.arguments + command_name = arguments[0] + options = compile_command_arguments(arguments[1:], fileIdGen, config) + + try: + ## If the command is not compileable to a DFG the following call will fail + ir = compile_command_to_DFG(fileIdGen, + command_name, + options, + redirections=compiled_redirections) + compiled_ast = ir + except ValueError as err: + log("Command not compiled to DFG:", err) + ## TODO: Maybe we want to fail here instead of waiting for later? + ## Is there any case where a non-compiled command is fine? + # log(traceback.format_exc()) + compiled_arguments = compile_command_arguments(arguments, fileIdGen, config) + compiled_ast = make_kv(type(ast_node).NodeName, + [ast_node.line_number, compiled_assignments, + compiled_arguments, compiled_redirections]) - ## Question: Should we return the command in an IR if one of - ## its arguments is a command substitution? Meaning that we - ## will have to wait for its command to execute first? - ## - ## ANSWER: Kind of. If a command has a command substitution or - ## anything that evaluates we should add it to the IR, but we - ## should also make sure that its category is set to the most - ## general one. That means that it can be executed - ## concurrently with other commands, but it cannot be - ## parallelized. - try: - ## If the command is not compileable to a DFG the following call will fail - ir = compile_command_to_DFG(fileIdGen, - command_name, - options, - redirections=compiled_redirections) - compiled_ast = ir - except ValueError as err: - ## TODO: Delete this log from here - log(err) - ## TODO: Maybe we want to fail here instead of waiting for later? - ## Is there any case where a non-compiled command is fine? - # log(traceback.format_exc()) - compiled_arguments = compile_command_arguments(arguments, fileIdGen, config) - compiled_ast = make_kv(construct_str, - [ast_node.line_number, compiled_assignments, - compiled_arguments, compiled_redirections]) - - return compiled_ast + return compiled_ast def compile_node_and_or_semi(ast_node, fileIdGen, config): - compiled_ast = make_kv(ast_node.construct.value, + compiled_ast = make_kv(type(ast_node).NodeName, [compile_node(ast_node.left_operand, fileIdGen, config), compile_node(ast_node.right_operand, fileIdGen, config)]) return compiled_ast @@ -221,7 +191,7 @@ def compile_node_redir_subshell(ast_node, fileIdGen, config): ## the IR accordingly compiled_ast = compiled_node else: - compiled_ast = make_kv(ast_node.construct.value, [ast_node.line_number, + compiled_ast = make_kv(type(ast_node).NodeName, [ast_node.line_number, compiled_node, ast_node.redir_list]) return compiled_ast @@ -248,24 +218,10 @@ def compile_node_background(ast_node, fileIdGen, config): return compiled_ast -def compile_node_defun(ast_node, fileIdGen, config): - ## It is not clear how we should handle functions. - ## - ## - Should we transform their body to IR? - ## - Should we handle calls to the functions as commands? - ## - ## It seems that we should do both. But we have to think if - ## this introduces any possible problem. - - ## TODO: Investigate whether it is fine to just compile the - ## body of functions. - compiled_body = compile_node(ast_node.body, fileIdGen, config) - return make_kv(construct, [ast_node.line_number, ast_node.name, compiled_body]) - def compile_node_for(ast_node, fileIdGen, config): ## TODO: Investigate what kind of check could we do to make a for ## loop parallel - compiled_ast = make_kv(ast_node.construct.value, + compiled_ast = make_kv(type(ast_node).NodeName, [ast_node.line_number, compile_command_argument(ast_node.argument, fileIdGen, config), compile_node(ast_node.body, fileIdGen, config), @@ -352,25 +308,25 @@ def expand_command_argument(argument, config): ## This function compiles an arg char by recursing if it contains quotes or command substitution. ## ## It is currently being extended to also expand any arguments that are safe to expand. -def compile_arg_char(arg_char, fileIdGen, config): +def compile_arg_char(arg_char: ArgChar, fileIdGen, config): ## Compile the arg char - key, val = get_kv(arg_char) - if (key in ['C', # Single character - 'E']): # Escape + if isinstance(arg_char, CArgChar) \ + or isinstance(arg_char, EArgChar): + # Single character or escape return arg_char - elif (key == 'B'): + elif isinstance(arg_char, BArgChar): ## TODO: I probably have to redirect the input of the compiled ## node (IR) to be closed, and the output to be ## redirected to some file that we will use to write to ## the command argument to complete the command ## substitution. - compiled_node = compile_node(val, fileIdGen, config) - return [key, compiled_node] - elif (key == 'Q'): - compiled_val = compile_command_argument(val, fileIdGen, config) - return [key, compiled_val] + arg_char.node = compile_node(arg_char.node, fileIdGen, config) + return arg_char + elif isinstance(arg_char, QArgChar): + arg_char.arg = compile_command_argument(arg_char.arg, fileIdGen, config) + return arg_char else: - log("Unknown arg_char:", arg_char) + log(f'Unknown arg_char: {arg_char}') ## TODO: Complete this return arg_char @@ -390,11 +346,9 @@ def compile_assignments(assignments, fileIdGen, config): return compiled_assignments def compile_redirection(redirection, fileIdGen, config): - redir_type = redirection[0] - redir_subtype = redirection[1][0] - stream_id = redirection[1][1] - file_arg = compile_command_argument(redirection[1][2], fileIdGen, config) - return [redir_type, [redir_subtype, stream_id, file_arg]] + file_arg = compile_command_argument(redirection.arg, fileIdGen, config) + redirection.arg = file_arg + return redirection def compile_redirections(redirections, fileIdGen, config): compiled_redirections = [compile_redirection(redirection, fileIdGen, config) diff --git a/compiler/config.py b/compiler/config.py index 4d0f2f6fb..c6a9c662b 100644 --- a/compiler/config.py +++ b/compiler/config.py @@ -1,10 +1,8 @@ import json +import logging import os import subprocess import math -import shlex - -from datetime import datetime from util import * @@ -20,11 +18,15 @@ PLANNER_EXECUTABLE = os.path.join(PASH_TOP, "compiler/pash_compiler.py") RUNTIME_EXECUTABLE = os.path.join(PASH_TOP, "compiler/pash_runtime.sh") SAVE_ARGS_EXECUTABLE = os.path.join(PASH_TOP, "runtime/save_args.sh") +SAVE_SHELL_STATE_EXECUTABLE = os.path.join(PASH_TOP, "compiler/orchestrator_runtime/save_shell_state.sh") ## Ensure that PASH_TMP_PREFIX is set by pa.sh assert(not os.getenv('PASH_TMP_PREFIX') is None) PASH_TMP_PREFIX = os.getenv('PASH_TMP_PREFIX') +SOCKET_BUF_SIZE = 8192 + + ## ## Global configuration used by all pash components ## @@ -51,6 +53,22 @@ def set_config_globals_from_pash_args(given_pash_args): DEBUG_LEVEL = pash_args.debug LOG_FILE = pash_args.log_file + ## Also set logging here + # Format logging + # ref: https://docs.python.org/3/library/logging.html#formatter-objects + ## TODO: When we add more logging levels bring back the levelname+time + if given_pash_args.log_file == "": + logging.basicConfig(format="%(message)s") + else: + logging.basicConfig(format="%(message)s", + filename=f"{os.path.abspath(given_pash_args.log_file)}", + filemode="w") + + # Set debug level + if given_pash_args.debug == 1: + logging.getLogger().setLevel(logging.INFO) + elif given_pash_args.debug >= 2: + logging.getLogger().setLevel(logging.DEBUG) ## Increase the recursion limit (it seems that the parser/unparser needs it for bigger graphs) sys.setrecursionlimit(10000) @@ -229,179 +247,11 @@ def init_log_file(): pass -def is_array_variable(token): - return ('a' in token) - -## Based on the following: -## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting -def ansi_c_expand(string): - return bytes(string, "utf-8").decode("unicode_escape") - -## This finds the end of this variable/function -def find_next_delimiter(tokens, i): - if (tokens[i] == "declare"): - return i + 3 - else: - ## TODO: When is this case actually useful? - j = i + 1 - while j < len(tokens) and (tokens[j] != "declare"): - j += 1 - return j - -def parse_array_variable(tokens, i): - ## The `declare` keyword - _declare = tokens[i] - ## The type - declare_type = tokens[i+1] - assert(is_array_variable(declare_type)) - - ## The variable name and first argument - ## TODO: Test with empty array and single value array - name_and_start=tokens[i+2] - first_equal_index = name_and_start.find('=') - - ## If it doesn't contain any = then it is empty - if first_equal_index == -1: - ## Then the name is the whole token, - ## the type is None (TODO) - ## and the value is empty - return name_and_start, None, "", i+3 - - var_name = name_and_start[:first_equal_index] - array_start = name_and_start[first_equal_index+1:] - - var_values = [] - if array_start == "()": - next_i = i+3 - else: - ## Remove the opening parenthesis - array_item = array_start[1:] - - ## Set the index that points to array items - curr_i = i+2 - - done = False - while not done: - ## TODO: Is this check adequate? Or could it miss the end - ## (or be misleaded into an earlier end by the item value?) - if array_item.endswith(")"): - done = True - array_item = array_item[:-1] - - first_equal_index = array_item.find('=') - ## Find the index and value of the array item - item_index_raw = array_item[:first_equal_index] - item_value = array_item[first_equal_index+1:] - - ## Sometimes the value starts with a dollar mark, see Bash ANSI-C quoting: - ## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting - if item_value.startswith("$"): - ## TODO: Figure out if this is adequate - item_value = ansi_c_expand(item_value[1:]) - - item_index = int(item_index_raw[1:-1]) - - ## Add None values if the index is larger than the next item (see Bash sparse arrays) - ## TODO: Keep bash array values as maps to avoid sparse costs - var_values += [None] * (item_index - len(var_values)) - ## Set the next item - var_values.append(item_value) - - - - ## Get next array_item - curr_i += 1 - array_item = tokens[curr_i] - - next_i = curr_i - - ## TODO: Michael? - var_type = None - - return var_name, var_type, var_values, next_i - ## -## Read a shell variables file +## Set the shell variables ## -def read_vars_file(var_file_path): - global config - - log("Reading variables from:", var_file_path) - - - config['shell_variables'] = None +def set_vars_file(var_file_path: str, var_dict: dict): + global config + config['shell_variables'] = var_dict config['shell_variables_file_path'] = var_file_path - if(not var_file_path is None): - vars_dict = {} - # with open(var_file_path) as f: - # lines = [line.rstrip() for line in f.readlines()] - - with open(var_file_path) as f: - variable_reading_start_time = datetime.now() - data = f.read() - variable_reading_end_time = datetime.now() - print_time_delta("Variable Reading", variable_reading_start_time, variable_reading_end_time) - - variable_tokenizing_start_time = datetime.now() - ## TODO: Can we replace this tokenizing process with our own code? This is very slow :'( - ## It takes about 15ms on deathstar. - tokens = shlex.split(data) - variable_tokenizing_end_time = datetime.now() - print_time_delta("Variable Tokenizing", variable_tokenizing_start_time, variable_tokenizing_end_time) - # log("Tokens:", tokens) - - # MMG 2021-03-09 definitively breaking on newlines (e.g., IFS) and function outputs (i.e., `declare -f`) - # KK 2021-10-26 no longer breaking on newlines (probably) - - ## At the start of each iteration token_i should point to a 'declare' - token_i = 0 - while token_i < len(tokens): - # FIXME is this assignment needed? - export_or_typeset = tokens[token_i] - - ## Array variables require special parsing treatment - if (export_or_typeset == "declare" and is_array_variable(tokens[token_i+1])): - var_name, var_type, var_value, new_token_i = parse_array_variable(tokens, token_i) - vars_dict[var_name] = (var_type, var_value) - token_i = new_token_i - continue - - new_token_i = find_next_delimiter(tokens, token_i) - rest = " ".join(tokens[(token_i+1):new_token_i]) - token_i = new_token_i - - space_index = rest.find(' ') - eq_index = rest.find('=') - var_type = None - - ## Declared but unset? - if eq_index == -1: - if space_index != -1: - var_name = rest[(space_index+1):] - var_type = rest[:space_index] - else: - var_name = rest - var_value = "" - ## Set, with type - elif(space_index < eq_index and not space_index == -1): - var_type = rest[:space_index] - - if var_type == "--": - var_type = None - - var_name = rest[(space_index+1):eq_index] - var_value = rest[(eq_index+1):] - ## Set, without type - else: - var_name = rest[:eq_index] - var_value = rest[(eq_index+1):] - - ## Strip quotes - if var_value is not None and len(var_value) >= 2 and \ - var_value[0] == "\"" and var_value[-1] == "\"": - var_value = var_value[1:-1] - - vars_dict[var_name] = (var_type, var_value) - - config['shell_variables'] = vars_dict diff --git a/compiler/definitions/ir/aggregator_node.py b/compiler/definitions/ir/aggregator_node.py index be607d7b9..125ce46db 100644 --- a/compiler/definitions/ir/aggregator_node.py +++ b/compiler/definitions/ir/aggregator_node.py @@ -10,7 +10,7 @@ class MapperAggregatorNode(DFGNode): def __init__(self, old_node, input_ids, output_ids, name_string, new_options, flag_option_list): ## The name of the aggregator command - name = Arg(string_to_argument(name_string)) + name = Arg.string_to_arg(name_string) ## TODO: The category should also be acquired through annotations (and maybe should be asserted to be at most pure) com_category="pure" diff --git a/compiler/definitions/ir/arg.py b/compiler/definitions/ir/arg.py index b75627862..41fcafc6a 100644 --- a/compiler/definitions/ir/arg.py +++ b/compiler/definitions/ir/arg.py @@ -1,15 +1,16 @@ from __future__ import annotations +from shasta.ast_node import * from shell_ast.ast_util import * from util import * class Arg: - def __init__(self, arg_char_list): - if(isinstance(arg_char_list, Arg)): - ## TODO: Make sure that this does not happen using an assertion. - ## TODO: We might need to copy here? - self.arg_char_list = arg_char_list.arg_char_list - else: - self.arg_char_list = arg_char_list + arg_char_list: "list[ArgChar]" + + def __init__(self, arg_char_list: "list[ArgChar]"): + assert(not isinstance(arg_char_list, Arg)) + for arg_char in arg_char_list: + assert(isinstance(arg_char, ArgChar)) + self.arg_char_list = arg_char_list def __repr__(self): return format_arg_chars(self.arg_char_list) @@ -27,12 +28,12 @@ def to_ast(self): return self.arg_char_list def concatenate(self, other): - space = [['C', 32]] + space = [CArgChar(32)] # space self.arg_char_list.extend(space) self.arg_char_list.extend(other.arg_char_list) @staticmethod - def string_to_arg(string) -> Arg: - return Arg(string_to_argument(string)) + def string_to_arg(string: str) -> Arg: + return Arg(string_to_carg_char_list(string)) diff --git a/compiler/definitions/ir/nodes/dfs_split_reader.py b/compiler/definitions/ir/nodes/dfs_split_reader.py index 04c69e81b..63855e325 100644 --- a/compiler/definitions/ir/nodes/dfs_split_reader.py +++ b/compiler/definitions/ir/nodes/dfs_split_reader.py @@ -11,15 +11,15 @@ def __init__(self, inputs, outputs, com_name, com_category, com_assignments=com_assignments) def set_server_address(self, addr): # ex addr: 127.0.0.1:50051 - self.com_options.append((3, Arg(string_to_argument(f"--addr {addr}")))) + self.com_options.append((3, Arg.string_to_arg(f"--addr {addr}"))) def make_dfs_split_reader_node(inputs, output, split_num, prefix): split_reader_bin = os.path.join(config.PASH_TOP, config.config['runtime']['dfs_split_reader_binary']) - com_name = Arg(string_to_argument(split_reader_bin)) + com_name = Arg.string_to_arg(split_reader_bin) com_category = "pure" options = [] - options.append((1, Arg(string_to_argument(f"--prefix '{prefix}'")))) - options.append((2, Arg(string_to_argument(f"--split {split_num}")))) + options.append((1, Arg.string_to_arg(f"--prefix '{prefix}'"))) + options.append((2, Arg.string_to_arg(f"--split {split_num}"))) return DFSSplitReader(inputs, [output], diff --git a/compiler/definitions/ir/nodes/dgsh_tee.py b/compiler/definitions/ir/nodes/dgsh_tee.py index 381d96f0d..16bd5efff 100644 --- a/compiler/definitions/ir/nodes/dgsh_tee.py +++ b/compiler/definitions/ir/nodes/dgsh_tee.py @@ -26,7 +26,7 @@ def make_dgsh_tee_node(input_id, output_id): OptionWithIO("-o", output_id), Flag("-I"), Flag("-f"), - OptionWithIO("-b", ArgStringType(Arg(string_to_argument(str(config.config['runtime']['dgsh_buffer_size'])))))] + OptionWithIO("-b", ArgStringType(Arg.string_to_arg(str(config.config['runtime']['dgsh_buffer_size']))))] cmd_inv_with_io_vars = CommandInvocationWithIOVars( cmd_name=dgsh_tee_bin, diff --git a/compiler/definitions/ir/nodes/r_split.py b/compiler/definitions/ir/nodes/r_split.py index ac2462b49..aefce4b7c 100644 --- a/compiler/definitions/ir/nodes/r_split.py +++ b/compiler/definitions/ir/nodes/r_split.py @@ -31,7 +31,7 @@ def add_r_flag(self): def make_r_split(input_id, out_ids, r_split_batch_size): r_split_bin = os.path.join(config.PASH_TOP, config.config['runtime']['r_split_binary']) operand_list = [input_id, - Operand(Arg(string_to_argument(str(r_split_batch_size))))] + Operand(Arg.string_to_arg(str(r_split_batch_size)))] operand_list.extend(out_ids) access_map = {output_id: make_stream_output() for output_id in out_ids} access_map[input_id] = make_stream_input() diff --git a/compiler/definitions/ir/nodes/r_wrap.py b/compiler/definitions/ir/nodes/r_wrap.py index 1d045a838..2a5f79ee9 100644 --- a/compiler/definitions/ir/nodes/r_wrap.py +++ b/compiler/definitions/ir/nodes/r_wrap.py @@ -55,7 +55,7 @@ def wrap_node(node: DFGNode, edges): # any non-streaming inputs or outputs are converted here already! cmd = to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping(cmd_inv_with_io_vars, edges) - bash_command_arg = [Arg(string_to_argument("bash -c"))] + bash_command_arg = [Arg.string_to_arg("bash -c")] operand_list = bash_command_arg + [cmd] cmd_inv_with_io_vars = CommandInvocationWithIOVars( diff --git a/compiler/definitions/ir/nodes/remote_pipe.py b/compiler/definitions/ir/nodes/remote_pipe.py index fd052ea09..7e35faf32 100644 --- a/compiler/definitions/ir/nodes/remote_pipe.py +++ b/compiler/definitions/ir/nodes/remote_pipe.py @@ -18,10 +18,10 @@ def make_remote_pipe(inputs, outputs, host_ip, port, is_remote_read, id): else: remote_pipe_bin = os.path.join(config.PASH_TOP, config.config['runtime']['remote_write_binary']) - com_name = Arg(string_to_argument(remote_pipe_bin)) + com_name = Arg.string_to_arg(remote_pipe_bin) - options.append((opt_count, Arg(string_to_argument(f"--addr {host_ip}:{port}")))) - options.append((opt_count + 1, Arg(string_to_argument(f"--id {id}")))) + options.append((opt_count, Arg.string_to_arg(f"--addr {host_ip}:{port}"))) + options.append((opt_count + 1, Arg.string_to_arg(f"--id {id}"))) return RemotePipe(inputs, outputs, diff --git a/compiler/definitions/ir/redirection.py b/compiler/definitions/ir/redirection.py index 2203a5a6e..5a4a745df 100644 --- a/compiler/definitions/ir/redirection.py +++ b/compiler/definitions/ir/redirection.py @@ -2,16 +2,17 @@ from shell_ast.ast_util import * class Redirection(): - def __init__(self, redirection): - ## Handle initialization from an existing Redirection object - if(isinstance(redirection, Redirection)): - redirection = redirection.to_ast() - assert(len(redirection) == 2) - self.redir_type = redirection[0] - assert(len(redirection[1]) == 3) - self.redir_subtype = redirection[1][0] - self.stream_id = redirection[1][1] - self.file_arg = Arg(redirection[1][2]) + def __init__(self, redirection: RedirectionNode): + if isinstance(redirection, FileRedirNode): + self.redir_type = FileRedirNode.NodeName + elif isinstance(redirection, DupRedirNode): + self.redir_type = DupRedirNode.NodeName + elif isinstance(redirection, HeredocRedirNode): + self.redir_type = HeredocRedirNode.NodeName + + self.redir_subtype = redirection.redir_type + self.stream_id = redirection.fd + self.file_arg = Arg(redirection.arg) # log(redirection) ## TODO: Support all redirections diff --git a/compiler/definitions/no_match_exception.py b/compiler/definitions/no_match_exception.py deleted file mode 100644 index 4e027906f..000000000 --- a/compiler/definitions/no_match_exception.py +++ /dev/null @@ -1,4 +0,0 @@ -class NoMatchException(Exception): - def __init__(self, message, errors): - super().__init__(message) - self.errors = errors diff --git a/compiler/dspash/ir_helper.py b/compiler/dspash/ir_helper.py index dc1d8b198..7ce37d80e 100644 --- a/compiler/dspash/ir_helper.py +++ b/compiler/dspash/ir_helper.py @@ -11,7 +11,6 @@ import config from ir import * from ast_to_ir import compile_asts -from json_ast import * from ir_to_ast import to_shell from util import * from dspash.hdfs_utils import HDFSFileConfig diff --git a/compiler/env_var_names.py b/compiler/env_var_names.py new file mode 100644 index 000000000..81c45b289 --- /dev/null +++ b/compiler/env_var_names.py @@ -0,0 +1,10 @@ + +## +## Variable names used in the pash runtime +## + +def loop_iters_var() -> str: + return 'pash_loop_iters' + +def loop_iter_var(loop_id: int) -> str: + return f'pash_loop_{loop_id}_iter' \ No newline at end of file diff --git a/compiler/env_vars_util.py b/compiler/env_vars_util.py new file mode 100644 index 000000000..6a7ec62b0 --- /dev/null +++ b/compiler/env_vars_util.py @@ -0,0 +1,232 @@ +import shlex +from datetime import datetime + +from util import log, print_time_delta + +def read_vars_file(var_file_path): + log("Reading variables from:", var_file_path) + + if(not var_file_path is None): + vars_dict = {} + # with open(var_file_path) as f: + # lines = [line.rstrip() for line in f.readlines()] + + with open(var_file_path) as f: + variable_reading_start_time = datetime.now() + data = f.read() + variable_reading_end_time = datetime.now() + print_time_delta("Variable Reading", variable_reading_start_time, variable_reading_end_time) + + variable_tokenizing_start_time = datetime.now() + ## TODO: Can we replace this tokenizing process with our own code? This is very slow :'( + ## It takes about 15ms on deathstar. + tokens = shlex.split(data) + variable_tokenizing_end_time = datetime.now() + print_time_delta("Variable Tokenizing", variable_tokenizing_start_time, variable_tokenizing_end_time) + # log("Tokens:", tokens) + + # MMG 2021-03-09 definitively breaking on newlines (e.g., IFS) and function outputs (i.e., `declare -f`) + # KK 2021-10-26 no longer breaking on newlines (probably) + + ## At the start of each iteration token_i should point to a 'declare' + token_i = 0 + while token_i < len(tokens): + # FIXME is this assignment needed? + export_or_typeset = tokens[token_i] + + ## Array variables require special parsing treatment + if (export_or_typeset == "declare" and is_array_variable(tokens[token_i+1])): + var_name, var_type, var_value, new_token_i = parse_array_variable(tokens, token_i) + vars_dict[var_name] = (var_type, var_value) + token_i = new_token_i + continue + + new_token_i = find_next_delimiter(tokens, token_i) + rest = " ".join(tokens[(token_i+1):new_token_i]) + token_i = new_token_i + + space_index = rest.find(' ') + eq_index = rest.find('=') + var_type = None + + ## Declared but unset? + if eq_index == -1: + if space_index != -1: + var_name = rest[(space_index+1):] + var_type = rest[:space_index] + else: + var_name = rest + var_value = "" + ## Set, with type + elif(space_index < eq_index and not space_index == -1): + var_type = rest[:space_index] + + if var_type == "--": + var_type = None + + var_name = rest[(space_index+1):eq_index] + var_value = rest[(eq_index+1):] + ## Set, without type + else: + var_name = rest[:eq_index] + var_value = rest[(eq_index+1):] + + ## Strip quotes + if var_value is not None and len(var_value) >= 2 and \ + var_value[0] == "\"" and var_value[-1] == "\"": + var_value = var_value[1:-1] + + vars_dict[var_name] = (var_type, var_value) + + final_vars_dict = set_special_parameters(vars_dict) + return final_vars_dict + + +## This sets the values of the special shell parameters correctly +## +## TODO KK PR#246 Do we need to split using IFS or is it always spaces? +## +## TODO MMG this isn't quite adequate: if pash_input_args contains +## spaces, we'll miscount. KK and I wrote a test +## evaluation/tests/interface_tests that's disabled as of PR#246. +## +## the right solution here is: +## +## - positional arguments get their own field in the +## exp_state---they're not store with ordinary shell +## variables +## +## - we save those separately, probably in a separate file +## +## ``` +## echo pash_argc=$# >pash_positional_args +## for i in $(seq 0 $#) +## do +## echo "pash_arg$i=\"$i\"" >pash_positional_args +## done +## ``` +## +## - we load these separately. pretty annoying; here's a sketch +## +## ``` +## cmd="set --" +## for i in $(seq 0 $pash_argc) +## do +## cmd="$cmd \"\$pash_arg$i\"" +## done +## eval "$cmd" +def set_special_parameters(variables: dict): + new_vars = variables.copy() + + ia_t, input_args = get_var(variables, 'pash_input_args') + es_t, exit_status = get_var(variables, 'pash_previous_exit_status') + ss_t, set_status = get_var(variables, 'pash_previous_set_status') + sn_t, shell_name = get_var(variables, 'pash_shell_name') + + ## TODO: Set the types of variables correctly + new_vars['@'] = ia_t, " ".join(input_args) + new_vars['?'] = es_t, exit_status + new_vars['-'] = ss_t, set_status + new_vars['0'] = sn_t, shell_name + new_vars['#'] = ia_t, str(len(input_args)) + + for i, arg in enumerate(input_args): + index = i + 1 + new_vars[str(index)] = input_args[i] + + return new_vars + +def get_var(variables: dict, varname: str): + type, value = variables.get(varname, [None, None]) + return type, value + +def is_array_variable(token): + return ('a' in token) + +## Based on the following: +## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting +def ansi_c_expand(string): + return bytes(string, "utf-8").decode("unicode_escape") + +## This finds the end of this variable/function +def find_next_delimiter(tokens, i): + if (tokens[i] == "declare"): + return i + 3 + else: + ## TODO: When is this case actually useful? + j = i + 1 + while j < len(tokens) and (tokens[j] != "declare"): + j += 1 + return j + +def parse_array_variable(tokens, i): + ## The `declare` keyword + _declare = tokens[i] + ## The type + declare_type = tokens[i+1] + assert(is_array_variable(declare_type)) + + ## The variable name and first argument + ## TODO: Test with empty array and single value array + name_and_start=tokens[i+2] + first_equal_index = name_and_start.find('=') + + ## If it doesn't contain any = then it is empty + if first_equal_index == -1: + ## Then the name is the whole token, + ## the type is None (TODO) + ## and the value is empty + return name_and_start, None, "", i+3 + + var_name = name_and_start[:first_equal_index] + array_start = name_and_start[first_equal_index+1:] + + var_values = [] + if array_start == "()": + next_i = i+3 + else: + ## Remove the opening parenthesis + array_item = array_start[1:] + + ## Set the index that points to array items + curr_i = i+2 + + done = False + while not done: + ## TODO: Is this check adequate? Or could it miss the end + ## (or be misleaded into an earlier end by the item value?) + if array_item.endswith(")"): + done = True + array_item = array_item[:-1] + + first_equal_index = array_item.find('=') + ## Find the index and value of the array item + item_index_raw = array_item[:first_equal_index] + item_value = array_item[first_equal_index+1:] + + ## Sometimes the value starts with a dollar mark, see Bash ANSI-C quoting: + ## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting + if item_value.startswith("$"): + ## TODO: Figure out if this is adequate + item_value = ansi_c_expand(item_value[1:]) + + item_index = int(item_index_raw[1:-1]) + + ## Add None values if the index is larger than the next item (see Bash sparse arrays) + ## TODO: Keep bash array values as maps to avoid sparse costs + var_values += [None] * (item_index - len(var_values)) + ## Set the next item + var_values.append(item_value) + + + + ## Get next array_item + curr_i += 1 + array_item = tokens[curr_i] + + next_i = curr_i + + ## TODO: Michael? + var_type = None + + return var_name, var_type, var_values, next_i diff --git a/compiler/ir.py b/compiler/ir.py index 5d9de8675..211d1242b 100644 --- a/compiler/ir.py +++ b/compiler/ir.py @@ -219,7 +219,7 @@ def compile_command_to_DFG(fileIdGen, command, options, parallelizer_list=parallelizer_list, cmd_related_properties=cmd_related_properties ) - + # log(f'Dfg node: {dfg_node}') node_id = dfg_node.get_id() ## Assign the from, to node in edges @@ -235,6 +235,7 @@ def compile_command_to_DFG(fileIdGen, command, options, dfg_nodes = {node_id : dfg_node} dfg = IR(dfg_nodes, dfg_edges) + # log(f'IR: {dfg}') return dfg @@ -243,7 +244,7 @@ def compile_command_to_DFG(fileIdGen, command, options, ## def make_tee(input, outputs): - com_name = Arg(string_to_argument("tee")) + com_name = Arg.string_to_arg("tee") com_category = "pure" return DFGNode([input], outputs, @@ -380,7 +381,7 @@ def serialize(self): return output - def to_ast(self, drain_streams): + def to_ast(self, drain_streams) -> "list[AstNode]": asts = [] ## Initialize the pids_to_kill variable @@ -441,7 +442,9 @@ def to_ast(self, drain_streams): assignment = self.collect_pid_assignment() asts.append(assignment) - return asts + ## TODO: Ideally we would like to make them as typed nodes already + class_asts = [to_ast_node(ast_node_to_untyped_deep(ast)) for ast in asts] + return class_asts def collect_pid_assignment(self): ## Creates: @@ -1145,7 +1148,7 @@ def create_reduce_tree(self, init_func, input_ids, fileIdGen): drain_fids = [fileIdGen.next_file_id() for final_auxiliary_output in final_auxiliary_outputs] for drain_fid in drain_fids: - drain_fid.set_resource(FileResource(Arg(string_to_argument('/dev/null')))) + drain_fid.set_resource(FileResource(Arg.string_to_arg('/dev/null'))) new_edges.append(drain_fid) drain_ids = [fid.get_ident() for fid in drain_fids] diff --git a/compiler/ir_to_ast.py b/compiler/ir_to_ast.py index ed75852b2..033eb34d6 100644 --- a/compiler/ir_to_ast.py +++ b/compiler/ir_to_ast.py @@ -76,7 +76,7 @@ def make_rms_f_prologue_epilogue(ephemeral_fids): asts.append(command) return asts -def make_ir_prologue(ephemeral_fids): +def make_ir_prologue(ephemeral_fids) -> "list[AstNode]": asts = [] ## Create an `rm -f` for each ephemeral fid rm_asts = make_rms_f_prologue_epilogue(ephemeral_fids) @@ -99,9 +99,10 @@ def make_ir_prologue(ephemeral_fids): call_mkfifos = make_command([string_to_argument(MKFIFO_PASH_FIFOS_NAME)]) asts.append(call_mkfifos) - return asts + class_asts = [to_ast_node(ast) for ast in asts] + return class_asts -def make_ir_epilogue(ephemeral_fids, clean_up_graph, log_file): +def make_ir_epilogue(ephemeral_fids, clean_up_graph, log_file) -> "list[AstNode]": asts = [] if (clean_up_graph): ## TODO: Wait for all output nodes not just one @@ -128,7 +129,9 @@ def make_ir_epilogue(ephemeral_fids, clean_up_graph, log_file): # (exit $internal_exec_status) exit_ec_ast = make_exit_ec_ast() asts.append(exit_ec_ast) - return asts + + class_asts = [to_ast_node(ast) for ast in asts] + return class_asts def make_exit_ec_ast(): command = make_command([string_to_argument("exit"), diff --git a/compiler/json_ast.py b/compiler/json_ast.py deleted file mode 100644 index 0342b5797..000000000 --- a/compiler/json_ast.py +++ /dev/null @@ -1,60 +0,0 @@ -import json -import config -from shell_ast.ast_node import CustomJSONEncoder -from subprocess import run, PIPE - -from util import * - -### --- From JSON --- ### - -## Returns the ast as a object -def parse_json_line(json_line): - ast_object = json.loads(json_line) - return ast_object - -def parse_json_ast_string(json_string): - ## Solve a bug where an empty string leads to a json decoder error - if(json_string == ""): - log("Warning: Empty json string was given for decoding.") - return [] - stripped_json_string = json_string.strip() - lines = stripped_json_string.split("\n") - ast_objects = [parse_json_line(line) for line in lines] - return ast_objects - -## Returns a list of AST objects -def parse_json_ast(json_filename): - with open(json_filename) as json_file: - file_string = json_file.read() - return parse_json_ast_string(file_string) - -### --- To JSON --- ### - -def save_asts_json(asts, json_filename): - json_string = serialize_asts_to_json(asts) - with open(json_filename, 'w') as json_file: - json_file.write(json_string) - -def serialize_asts_to_json(asts): - serialized_asts = [serialize_ast_json(ast) for ast in asts] - return "\n".join(serialized_asts) - -def serialize_ast_json(ast): - standard_json = json.dumps(ast, cls=CustomJSONEncoder) - return standard_json - -### --- AST to Shell --- ### - - -def json_to_shell(json_string): - subproc = run([config.PRINTER_BINARY], stdout=PIPE, input=json_string, - encoding='ascii', check=True) - return subproc.stdout - -def ast_to_shell(ast, verbose=True): - ast_json = serialize_ast_json(ast) - if verbose: - print(ast_json) - shell_string = json_to_shell(ast_json) - return shell_string - diff --git a/compiler/orchestrator_runtime/pash_init_setup.sh b/compiler/orchestrator_runtime/pash_init_setup.sh index 42cf9b02e..06e953481 100644 --- a/compiler/orchestrator_runtime/pash_init_setup.sh +++ b/compiler/orchestrator_runtime/pash_init_setup.sh @@ -2,6 +2,7 @@ [ -f ~/.pash_init ] && source ~/.pash_init ## File directory export RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") +export WRAPPER_LIB_DIR="$RUNTIME_DIR/../wrapper_library/" ## TODO: Is there a better way to do this? export RUNTIME_LIBRARY_DIR="$RUNTIME_DIR/../../runtime/" export PASH_REDIR="&2" diff --git a/compiler/orchestrator_runtime/pash_prepare_call_compiler.sh b/compiler/orchestrator_runtime/pash_prepare_call_compiler.sh new file mode 100644 index 000000000..c05faf681 --- /dev/null +++ b/compiler/orchestrator_runtime/pash_prepare_call_compiler.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +## OUTPUT: When it completes it sets "$pash_script_to_execute" + +## Only needed for expansion +export pash_input_args=( "$@" ) + +## Move some pash env variables to local so that tests pass +tmp="$pash_disable_parallel_pipelines" +unset pash_disable_parallel_pipelines +pash_disable_parallel_pipelines="$tmp" + +tmp="$pash_input_ir_file" +unset pash_input_ir_file +pash_input_ir_file="$tmp" + +tmp="$pash_sequential_script_file" +unset pash_sequential_script_file +pash_sequential_script_file="$tmp" + +## Save the shell variables to a file (necessary for expansion) +export pash_runtime_shell_variables_file="${PASH_TMP_PREFIX}/variables_$RANDOM$RANDOM$RANDOM" +source "$RUNTIME_DIR/pash_declare_vars.sh" "$pash_runtime_shell_variables_file" +pash_redir_output echo "$$: (1) Bash variables saved in: $pash_runtime_shell_variables_file" + +## The parallel script will be saved in the following file if compilation is successful. +pash_compiled_script_file="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" + +## TODO: Have a more proper communication protocol +## TODO: Make a proper client for the daemon +pash_redir_output echo "$$: (2) Before asking the daemon for compilation..." +## Send and receive from daemon +msg="Compile:${pash_compiled_script_file}| Variable File:${pash_runtime_shell_variables_file}| Input IR File:${pash_input_ir_file}" +daemon_response=$(pash_communicate_daemon "$msg") # Blocking step, daemon will not send response until it's safe to continue + +if [[ "$daemon_response" == *"OK:"* ]]; then + pash_runtime_return_code=0 +elif [ -z "$daemon_response" ]; then + ## Trouble... Daemon crashed, rip + pash_redir_output echo "$$: ERROR: (2) Daemon crashed!" + exit 1 +else + pash_runtime_return_code=1 +fi + +# Get assigned process id +# We need to split the daemon response into elements of an array by +# shell's field splitting. +# shellcheck disable=SC2206 +response_args=($daemon_response) +process_id=${response_args[1]} + +pash_redir_output echo "$$: (2) Compiler exited with code: $pash_runtime_return_code" +if [ "$pash_runtime_return_code" -ne 0 ] && [ "$pash_assert_compiler_success_flag" -eq 1 ]; then + pash_redir_output echo "$$: ERROR: (2) Compiler failed with error code: $pash_runtime_return_code while assert_compiler_success was enabled! Exiting PaSh..." + exit 1 +fi + +# store functions for distributed execution +if [ "$distributed_exec" -eq 1 ]; then + declared_functions="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" + declare -f > "$declared_functions" + export declared_functions +fi + +## If the compiler failed or if we dry_run the compiler, we have to run the sequential +if [ "$pash_runtime_return_code" -ne 0 ] || [ "$pash_dry_run_compiler_flag" -eq 1 ]; then + export pash_script_to_execute="${pash_sequential_script_file}" +else + export pash_script_to_execute="${pash_compiled_script_file}" +fi + +## Let daemon know that this region is done +function inform_daemon_exit () { + ## Send to daemon + msg="Exit:${process_id}" + daemon_response=$(pash_communicate_daemon_just_send "$msg") +} + diff --git a/compiler/orchestrator_runtime/pash_restore_state_and_execute.sh b/compiler/orchestrator_runtime/pash_restore_state_and_execute.sh new file mode 100755 index 000000000..5ef81e3f2 --- /dev/null +++ b/compiler/orchestrator_runtime/pash_restore_state_and_execute.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +## File directory +RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") + +## INPUT: Expects SCRIPT_TO_EXECUTE to be set + +#ONLY WAY OUT IS TO TREAT EXEC in special way + +## Recover the `set` state of the previous shell +# pash_redir_output echo "$$: (3) Previous BaSh set state: $pash_previous_set_status" +# pash_redir_output echo "$$: (3) PaSh-internal set state of current shell: $-" +export pash_current_set_state=$- +source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_current_set_state" "$pash_previous_set_status" +pash_redir_output echo "$$: (3) Reverted to BaSh set state: $-" + +## Execute the script +pash_redir_output echo "$$: (4) Restoring previous exit code: ${pash_previous_exit_status}" +pash_redir_output echo "$$: (4) Will execute script in ${SCRIPT_TO_EXECUTE}:" +pash_redir_output cat "${SCRIPT_TO_EXECUTE}" + +## Note: We run the `exit` in a checked position so that we don't simply exit when we are in `set -e`. +if (exit "$pash_previous_exit_status") +then +{ + ## This works w.r.t. arguments because source does not change them if there are no arguments + ## being given. + source "${SCRIPT_TO_EXECUTE}" +} +else +{ + source "${SCRIPT_TO_EXECUTE}" +} +fi diff --git a/compiler/orchestrator_runtime/pash_runtime_complete_execution.sh b/compiler/orchestrator_runtime/pash_runtime_complete_execution.sh deleted file mode 100644 index 881a84bb2..000000000 --- a/compiler/orchestrator_runtime/pash_runtime_complete_execution.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -## -## Completes execution by measuring and logging execution times and restoring state -## - -## -## (6) -## - -pash_exec_time_end=$(date +"%s%N") - -## TODO: Maybe remove the temp file after execution - -## We want the execution time in milliseconds -if [ "$pash_output_time_flag" -eq 1 ]; then - pash_exec_time_ms=$(echo "scale = 3; ($pash_exec_time_end-$pash_exec_time_start)/1000000" | bc) - pash_redir_output echo "Execution time: $pash_exec_time_ms ms" -fi - -## Source back the output variables of the compiled script. -## In all cases we should have executed a script -pash_redir_output echo "$$: (7) Recovering BaSh variables from: $pash_output_var_file" -source "$RUNTIME_DIR/pash_source_declare_vars.sh" "$pash_output_var_file" - -## Save the previous `set` state to a variable -pash_redir_output echo "$$: (7) Reading current BaSh set state from: ${pash_output_set_file}" - -pash_redir_output echo "$$: (7) Current BaSh set state: $(cat "$pash_output_set_file")" -## WARNING: This has to happen after sourcing the variables so that it overwrites it -pash_previous_set_status=$(cat "$pash_output_set_file") - -export pash_input_args -pash_redir_output echo "$$: (7) Arguments (might) have been updated to be: ${pash_input_args[@]}" - -## Propagate the `set` state after running the script to the outer script -## TODO: Maybe move this to the end to avoid spurious failures -pash_redir_output echo "$$: (7) Current PaSh set state: $-" -source "$RUNTIME_DIR/pash_set_from_to.sh" "$-" "$(cat "$pash_output_set_file")" -pash_redir_output echo "$$: (7) Reverted to BaSh set state before exiting: $-" - -pash_redir_output echo "$$: (7) Reverting last BaSh exit code: $pash_runtime_final_status" -(exit "$pash_runtime_final_status") diff --git a/compiler/orchestrator_runtime/pash_runtime_shell_to_pash.sh b/compiler/orchestrator_runtime/pash_runtime_shell_to_pash.sh deleted file mode 100644 index 7780a9c68..000000000 --- a/compiler/orchestrator_runtime/pash_runtime_shell_to_pash.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -## -## This currently performs (5), i.e., reverting bash state to get back to pash mode. -## - -## TODO: Use that for (1) too - -output_vars_file=${1?Output var file not given} -output_set_file=${2?Output set file not given} - -pash_exec_status=${internal_exec_status} -pash_redir_output echo "$$: (5) BaSh script exited with ec: $pash_exec_status" - -## Save the current set options to a file so that they can be recovered -pash_final_set_vars=$- -pash_redir_output echo "$$: (5) Writing current BaSh set state to: $output_set_file" -pash_redir_output echo "$$: (5) Current BaSh shell: $-" -echo "$pash_final_set_vars" > "$output_set_file" - -## Revert to the old set state to avoid spurious fails -source "$RUNTIME_DIR/pash_set_from_to.sh" "$-" "$pash_current_set_state" -pash_redir_output echo "$$: (5) Reverted to PaSh set state to: $-" - - -## Save the current variables -source "$RUNTIME_DIR/pash_declare_vars.sh" "$output_vars_file" -# pash_redir_output echo "$$: (5) Exiting from BaSh with BaSh status: $pash_exec_status" -# (exit "$pash_exec_status") diff --git a/compiler/orchestrator_runtime/pash_source_declare_vars.sh b/compiler/orchestrator_runtime/pash_source_declare_vars.sh index 06d654f50..0723abd08 100755 --- a/compiler/orchestrator_runtime/pash_source_declare_vars.sh +++ b/compiler/orchestrator_runtime/pash_source_declare_vars.sh @@ -11,7 +11,9 @@ filter_vars_file() { - cat "$1" | grep -v "^declare -\([A-Za-z]\|-\)* \(pash\|BASH\|LINENO\|EUID\|GROUPS\)" + cat "$1" | grep -v "^declare -\([A-Za-z]\|-\)* \(pash\|BASH\|LINENO\|EUID\|GROUPS\)" + # The extension below is done for the speculative pash + # | grep -v "LS_COLORS" } ## TODO: Error handling if the argument is empty? diff --git a/compiler/orchestrator_runtime/pash_wrap_vars.sh b/compiler/orchestrator_runtime/pash_wrap_vars.sh deleted file mode 100755 index 47c6c3790..000000000 --- a/compiler/orchestrator_runtime/pash_wrap_vars.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -## File directory -RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") - -script_source="$1" - -#ONLY WAY OUT IS TO TREAT EXEC in special way - -## Recover the `set` state of the previous shell -# pash_redir_output echo "$$: (3) Previous BaSh set state: $pash_previous_set_status" -# pash_redir_output echo "$$: (3) PaSh-internal set state of current shell: $-" -export pash_current_set_state=$- -source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_current_set_state" "$pash_previous_set_status" -pash_redir_output echo "$$: (3) Reverted to BaSh set state: $-" - -## Recover the input arguments of the previous script -## Note: We don't need to care about wrap_vars arguments because we have stored all of them already. -# -# shellcheck disable=SC2086 -pash_redir_output echo "$$: (3) Array: ${pash_input_args[@]}" -pash_redir_output echo "$$: (3) Number of arguments: ${#pash_input_args[@]}" -eval "set -- \"\${pash_input_args[@]}\"" -pash_redir_output echo "$$: (3) Reverted to BaSh input arguments: $@" -pash_redir_output echo "$$: (3) Number of arguments: $#" - -## Execute the script -pash_redir_output echo "$$: (4) Restoring previous exit code: ${pash_previous_exit_status}" -pash_redir_output echo "$$: (4) Will execute script in ${script_source}:" -pash_redir_output cat "${script_source}" - -## Note: We run the `exit` in a checked position so that we don't simply exit when we are in `set -e`. -if (exit "$pash_previous_exit_status") -then -{ - source "${script_source}" - internal_exec_status=$? - ## Make sure that any input argument changes are propagated outside - export pash_input_args=( "$@" ) - (exit "$internal_exec_status") -} -else -{ - source "${script_source}" - internal_exec_status=$? - ## Make sure that any input argument changes are propagated outside - export pash_input_args=( "$@" ) - (exit "$internal_exec_status") -} -fi diff --git a/compiler/orchestrator_runtime/save_shell_state.sh b/compiler/orchestrator_runtime/save_shell_state.sh new file mode 100644 index 000000000..7fe15fa3b --- /dev/null +++ b/compiler/orchestrator_runtime/save_shell_state.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +## +## Works for bash +## + +## Configuration: +## DEFAULT_SET_STATE: Set this variable to determine the safe set state "huB" +## + +## +## Necessary for bash: +## - Last exit code $? +## - set state $- +## + + +## Save the previous exit code +export PREVIOUS_SHELL_EC="$?" + +## Store the current `set` status +export PREVIOUS_SET_STATUS=$- +source "$RUNTIME_DIR/pash_set_from_to.sh" "$PREVIOUS_SET_STATUS" "${DEFAULT_SET_STATE:-huB}" diff --git a/compiler/orchestrator_runtime/speculative/speculative_runtime.sh b/compiler/orchestrator_runtime/speculative/speculative_runtime.sh index 4e1193299..b9a188484 100644 --- a/compiler/orchestrator_runtime/speculative/speculative_runtime.sh +++ b/compiler/orchestrator_runtime/speculative/speculative_runtime.sh @@ -1,17 +1,57 @@ #!/bin/bash -## TODO: Ask the scheduler to let us know when a command has been committed and what is its exit code. ## TODO: Define the client in pash_spec_init_setup (which should be sourced by pash_init_setup) -## TODO: Then we need to extend the scheduler to also support this protocol (unix sockets only) and -## Respond when the command is actually done. +pash_redir_output echo "$$: (2) Before asking the scheduler for cmd: ${pash_speculative_command_id} exit code..." -export pash_speculative_command_id=$1 +## TODO: Correctly save variables +## Save the shell variables to a file (necessary for expansion) +export pash_runtime_shell_variables_file="${PASH_TMP_PREFIX}/variables_$RANDOM$RANDOM$RANDOM" +source "$RUNTIME_DIR/pash_declare_vars.sh" "$pash_runtime_shell_variables_file" +pash_redir_output echo "$$: (1) Bash variables saved in: $pash_runtime_shell_variables_file" -echo "STUB: This would call the scheduler for command with id: ${pash_speculative_command_id}" +## TODO: We want to send the environment to the scheduler. +## Once the scheduler determines if there are environment changes, it can then +## decide to rerun or not the speculated commands with the new environment. -## TODO: Set this based on what the scheduler returns -pash_runtime_final_status=$? + +## Determine all current loop iterations and send them to the scheduler +pash_loop_iter_counters=${pash_loop_iters:-None} +pash_redir_output echo "$$: Loop node iteration counters: $pash_loop_iter_counters" + +## Send and receive from daemon +msg="Wait:${pash_speculative_command_id}|Loop iters:${pash_loop_iter_counters}" +daemon_response=$(pash_spec_communicate_scheduler "$msg") # Blocking step, daemon will not send response until it's safe to continue + +## Receive an exit code +if [[ "$daemon_response" == *"OK:"* ]]; then + # shellcheck disable=SC2206 + response_args=($daemon_response) + pash_redir_output echo "$$: (2) Scheduler responded: $daemon_response" + cmd_exit_code=${response_args[1]} + output_variable_file=${response_args[2]} + stdout_file=${response_args[3]} +elif [ -z "$daemon_response" ]; then + ## Trouble... Daemon crashed, rip + pash_redir_output echo "$$: ERROR: (2) Scheduler crashed!" + exit 1 +else + pash_redir_output echo "$$: ERROR: (2) Scheduler responded garbage ${daemon_response}!" + exit 1 +fi + + +pash_redir_output echo "$$: (2) Scheduler returned exit code: ${cmd_exit_code} for cmd with id: ${pash_speculative_command_id}." + + +pash_runtime_final_status=${cmd_exit_code} + +## TODO: Restore the variables (doesn't work currently because variables are printed using `env`) +pash_redir_output echo "$$: (2) Recovering script variables from: $output_variable_file" +# source "$RUNTIME_DIR/pash_source_declare_vars.sh" "$output_variable_file" + +pash_redir_output echo "$$: (2) Recovering stdout from: $stdout_file" +cat "${stdout_file}" ## TODO: Also need to use wrap_vars maybe to `set` properly etc diff --git a/compiler/parse.py b/compiler/parse.py index 652f7c4e5..3d2bfc01b 100644 --- a/compiler/parse.py +++ b/compiler/parse.py @@ -4,19 +4,27 @@ import sys from shell_ast.ast_util import UnparsedScript -from shell_ast.ast_node import AstNode, ast_node_to_untyped_deep +from shasta.ast_node import ast_node_to_untyped_deep +from shasta.json_to_ast import to_ast_node +from shasta.ast_node import string_of_arg from util import * import libdash.parser -import libdash.printer ## Parses straight a shell script to an AST ## through python without calling it as an executable def parse_shell_to_asts(input_script_path): try: new_ast_objects = libdash.parser.parse(input_script_path) - return list(new_ast_objects) + + ## Transform the untyped ast objects to typed ones + typed_ast_objects = [] + for untyped_ast, original_text, linno_before, linno_after, in new_ast_objects: + typed_ast = to_ast_node(untyped_ast) + typed_ast_objects.append((typed_ast, original_text, linno_before, linno_after)) + + return typed_ast_objects except libdash.parser.ParsingException as e: log("Parsing error!", e) sys.exit(1) @@ -27,21 +35,11 @@ def parse_shell_to_asts_interactive(input_script_path: str): def from_ast_objects_to_shell(asts): shell_list = [] for ast in asts: + # log("Ast:", ast) if(isinstance(ast, UnparsedScript)): shell_list.append(ast.text) else: - ## We are working with two different abstractions for ASTs, one is the class and the other - ## is its JSON object form. Due to Python's _disgusting_ lack of types (and our bad code) - ## you can sometimes end up here with both. - ## - ## TODO: At some point this should be fixed and we should only work with the AstNode abstraction - ## and only serialize at the end. There is more info on that in ast_node.py - if(isinstance(ast, AstNode)): - serialized_ast = ast_node_to_untyped_deep(ast) - else: - serialized_ast = ast - - shell_list.append(libdash.printer.to_string(serialized_ast)) + shell_list.append(ast.pretty()) return "\n".join(shell_list) + "\n" def from_ast_objects_to_shell_file(asts, new_shell_filename): @@ -49,25 +47,6 @@ def from_ast_objects_to_shell_file(asts, new_shell_filename): with open(new_shell_filename, 'w') as new_shell_file: new_shell_file.write(script) -def parse_shell(input_script_path): - if(not os.path.isfile(input_script_path)): - log("Error! File:", input_script_path, "does not exist.", level=0) - sys.exit(1) - parser_output = subprocess.run([config.PARSER_BINARY, input_script_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) - if (not parser_output.returncode == 0): - log(parser_output.stderr) - parser_output.check_returncode() - return parser_output.stdout - - -## Simply wraps the ceda string_of_arg +## Simply wraps the string_of_arg def pash_string_of_arg(arg, quoted=False): - return libdash.printer.string_of_arg(arg, quoted) - -### Legacy - -def from_ir_to_shell_legacy(ir_filename): - printer_output = subprocess.run([config.PRINTER_BINARY, ir_filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) - printer_output.check_returncode() - preprocessed_script = printer_output.stdout - return preprocessed_script + return string_of_arg(arg, quoted) diff --git a/compiler/pash.py b/compiler/pash.py index 981b3799a..c8fee1391 100755 --- a/compiler/pash.py +++ b/compiler/pash.py @@ -10,6 +10,7 @@ from parse import parse_shell_to_asts_interactive from pash_graphviz import maybe_init_graphviz_dir from preprocessor.preprocessor import preprocess +from speculative import util_spec from util import * import config import shutil @@ -26,8 +27,6 @@ def main(): if(len(args.input) == 0 or args.interactive): log("ERROR: --interactive option is not supported!", level=0) assert(False) - ## This should never be used! - interactive(args, shell_name) else: input_script_path = args.input[0] input_script_arguments = args.input[1:] @@ -60,59 +59,6 @@ def preprocess_and_execute_asts(input_script_path, args, input_script_arguments, return return_code -## TODO: Create an interactive pash -def interactive(args, shell_name): - ## This means that we are interactive - assert(len(args.input) == 0 or args.interactive) - - if len(args.input) == 0: - ## Read from stdin - input_script_path = "-" - script_args = [] - else: - input_script_path = args.input[0] - script_args = args.input[1:] - - ## Spawn a bash shell in interactive mode - new_env = shell_env(shell_name) - subprocess_args = bash_prefix_args() - ## Add this option to read code from stdin - subprocess_args.append("-s") - ## Add the script arguments here - subprocess_args += script_args - with subprocess.Popen(subprocess_args, - env=new_env, - stdin=subprocess.PIPE, - universal_newlines=True, - close_fds=False) as shell_proc: - ## TODO: Do we need to pipe stdout/stderror - - ## First send an exec so that we change the name of the shell - ## - ## TODO: Can this be done in a less ad-hoc way? - command = bash_exec_string(shell_name) - shell_proc.stdin.write(command) - - ## For each parsed AST: - ## 1. Preprocess it - ## 2. Translate it to shell syntax - ## 3. Send it to the interactive bash - ast_objects = parse_shell_to_asts_interactive(input_script_path) - for ast_object in ast_objects: - ## Preprocess each ast object and produce a preprocessed shell script fragment - preprocessed_shell_script = preprocess([ast_object], args) - log("Sending script to shell process...") - ## Send the preprocessed script fragment to the shell process - shell_proc.stdin.write(preprocessed_shell_script) - shell_proc.stdin.flush() - - ## Close the input and wait for the internal process to finish - shell_proc.stdin.close() - shell_proc.wait() - - log("-" * 40) #log end marker - ## Return the exit code of the executed script - sys.exit(shell_proc.returncode) def parse_args(): @@ -167,7 +113,7 @@ def parse_args(): if args.speculative: log("PaSh is running in speculative mode...") args.__dict__["preprocess_mode"] = "spec" - args.__dict__["partial_order_file"] = ptempfile() + args.__dict__["partial_order_file"] = util_spec.partial_order_file_path() log(" -- Its partial order file will be stored in:", args.partial_order_file) ## Initialize the log file diff --git a/compiler/pash_compilation_server.py b/compiler/pash_compilation_server.py index 94c39d899..3cbdf1a4b 100644 --- a/compiler/pash_compilation_server.py +++ b/compiler/pash_compilation_server.py @@ -2,10 +2,11 @@ import signal import traceback from threading import Thread -from datetime import datetime +from datetime import datetime, timedelta # import queue import config +import env_vars_util from pash_graphviz import maybe_generate_graphviz import pash_compiler from util import * @@ -55,15 +56,22 @@ def init(): ## This class holds information for each process id ## class ProcIdInfo: - def __init__(self, input_ir, compiler_config, exec_time=None): + def __init__(self, input_ir, compiler_config, exec_time=None, start_exec_time=None): self.input_ir = input_ir self.compiler_config = compiler_config self.exec_time = exec_time + self.start_exec_time = start_exec_time ## TODO: Extend it with other info from scheduler, like dependencies def set_exec_time(self, exec_time): self.exec_time = exec_time + def set_start_exec_time(self, start_exec_time): + self.start_exec_time = start_exec_time + + def get_start_exec_time(self): + return self.start_exec_time + def __repr__(self): return f'ProcIdInfo(InputIR:{self.input_ir}, CompConfig:{self.compiler_config}, ExecTime:{self.exec_time})' @@ -233,7 +241,8 @@ def compile_and_add(self, compiled_script_file, var_file, input_ir_file): variable_reading_start_time = datetime.now() # Read any shell variables files if present - config.read_vars_file(var_file) + vars_dict = env_vars_util.read_vars_file(var_file) + config.set_vars_file(var_file, vars_dict) variable_reading_end_time = datetime.now() print_time_delta("Variable Loading", variable_reading_start_time, variable_reading_end_time) @@ -250,7 +259,6 @@ def compile_and_add(self, compiled_script_file, var_file, input_ir_file): daemon_compile_end_time = datetime.now() print_time_delta("Daemon Compile", daemon_compile_start_time, daemon_compile_end_time) - self.wait_unsafe() if ast_or_ir != None: compile_success = True @@ -287,6 +295,10 @@ def compile_and_add(self, compiled_script_file, var_file, input_ir_file): pass else: self.running_procs += 1 + + ## Get the time before we start executing (roughly) to determine how much time this command execution will take + command_exec_start_time = datetime.now() + self.process_id_input_ir_map[process_id].set_start_exec_time(command_exec_start_time) return response def remove_process(self, process_id): @@ -319,13 +331,12 @@ def wait_for_all(self): def handle_exit(self, input_cmd): assert(input_cmd.startswith("Exit:")) - exit_part, time_part = input_cmd.split("|") - process_id = int(exit_part.split(":")[1]) - log("Time part is:", time_part) - try: - exec_time = float(time_part.split(":")[1]) - except: - exec_time = None + process_id = int(input_cmd.split(":")[1]) + + ## Get the execution time + command_finish_exec_time = datetime.now() + command_start_exec_time = self.process_id_input_ir_map[process_id].get_start_exec_time() + exec_time = (command_finish_exec_time - command_start_exec_time) / timedelta(milliseconds=1) log("Process:", process_id, "exited. Exec time was:", exec_time) self.handle_time_measurement(process_id, exec_time) self.remove_process(process_id) diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index eb29794f6..3d6995471 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -7,9 +7,9 @@ from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorKind import AggregatorKindEnum import config +import env_vars_util from ir import * from ast_to_ir import compile_asts -from json_ast import * from ir_to_ast import to_shell from pash_graphviz import maybe_generate_graphviz from util import * @@ -53,7 +53,8 @@ def main_body(): runtime_config = config.config['distr_planner'] ## Read any shell variables files if present - config.read_vars_file(args.var_file) + vars_dict = env_vars_util.read_vars_file(args.var_file) + config.set_vars_file(args.var_file, vars_dict) log("Input:", args.input_ir, "Compiled file:", args.compiled_script_file) diff --git a/compiler/pash_runtime.sh b/compiler/pash_runtime.sh index f679b8530..e5e6c70e5 100755 --- a/compiler/pash_runtime.sh +++ b/compiler/pash_runtime.sh @@ -1,5 +1,13 @@ #!/bin/bash +## Assumes the following two variables are set: +## pash_sequential_script_file: the sequential script. Just running it should work for all tests. +## pash_input_ir_file: the file that contains the IR to be compiled + +## When called by spec, assumes this variable is set: +## pash_spec_command_id: the node id for the specific command + + ## ## High level design. ## @@ -14,7 +22,6 @@ ## (3) Then it should make sure to revert the exit code and `set` state to the saved values. ## ## (4) Then it should execute the inside script (either original or parallel) -## TODO: Figure out what could be different before (1), during (4), and after (7) ## ## (5) Then it save all necessary state and revert to pash-internal state. ## (At the moment this happens automatically because the script is ran in a subshell.) @@ -45,53 +52,30 @@ ## /----(7)----/ ## ... | -## TODO: Make a list/properly define what needs to be saved at (1), (3), (5), (7) ## -## Necessary for pash: -## - PATH important for PaSh but might be changed in bash -## - IFS has to be kept default for PaSh to work -## -## Necessary for bash: -## - Last PID $! (TODO) -## - Last exit code $? -## - set state $- -## - File descriptors (TODO) -## - Loop state (?) Maybe `source` is adequate for this (TODO) -## - Traos (TODO) +## (1) ## -## (maybe) TODO: After that, maybe we can create cleaner functions for (1), (3), (5), (7). -## E.g. we can have a correspondence between variable names and revert them using them +## TODO: Make a shell script that is called wrap and +## takes three arguments (pre_script, script, post_script) +## and wraps a shell script file with a pre and a post code. ## -## (1) +## This wrap function should perform only the necessary shell setting, +## exit code, and $!, transfer. Arguments should ideally be transfered too but +## I don't know of a good way to do it for an external script from an internal one. ## +## Maybe it can happen with eval +## +## The challenging aspect is how to make this work for the parallel pipelines -## Store the previous exit status to propagate to the compiler -## export pash_previous_exit_status=$? -## The assignment now happens outside -export pash_previous_exit_status - -## Store the current `set` status to pash to the inside script -export pash_previous_set_status=$- +## First save the state of the shell +source "$RUNTIME_DIR/save_shell_state.sh" +## Rename variables to pash specific names +export pash_previous_exit_status="$PREVIOUS_SHELL_EC" +export pash_previous_set_status="$PREVIOUS_SET_STATUS" pash_redir_output echo "$$: (1) Previous exit status: $pash_previous_exit_status" pash_redir_output echo "$$: (1) Previous set state: $pash_previous_set_status" - -## Prepare a file with all shell variables -## -## This is only needed by PaSh to expand. -## -## TODO: Maybe we can get rid of it since PaSh has access to the environment anyway? -pash_runtime_shell_variables_file="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" -source "$RUNTIME_DIR/pash_declare_vars.sh" "$pash_runtime_shell_variables_file" -pash_redir_output echo "$$: (1) Bash variables saved in: $pash_runtime_shell_variables_file" - -## Abort script if variable is unset -pash_default_set_state="huB" - -## Revert the `set` state to not have spurious failures -pash_redir_output echo "$$: (1) Bash set state at start of execution: $pash_previous_set_status" -source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_previous_set_status" "$pash_default_set_state" pash_redir_output echo "$$: (1) Set state reverted to PaSh-internal set state: $-" ## @@ -103,10 +87,10 @@ if [ "$pash_speculative_flag" -eq 1 ]; then ## we just want to ask the scheduler in (3) to let us know when the df_region ## has finished executing and what is its exit code. - ## The first argument is just the command id - export pash_speculative_command_id=$1 + ## TODO: We probably need to make this a local variable so that POSIX tests pass + export pash_speculative_command_id=$pash_spec_command_id - source "$RUNTIME_DIR/speculative/speculative_runtime.sh" "${pash_speculative_command_id}" + source "$RUNTIME_DIR/speculative/speculative_runtime.sh" ## TODO: ## 2. Check the flag in pash.py and if it is set, do the speculative transformation. @@ -116,126 +100,17 @@ if [ "$pash_speculative_flag" -eq 1 ]; then ## TODO: (Future) Check how we could support the steps (5), (6) with speculative and how to refactor this code the best way possible. ## TODO: (Future) We might not need all the set state and other config done in (1) and (3) for speculative else - - ## The first argument contains the sequential script. Just running it should work for all tests. - pash_sequential_script_file=$1 - - ## The second argument SHOULD be the file that contains the IR to be compiled - pash_input_ir_file=$2 - - ## The parallel script will be saved in the following file if compilation is successful. - pash_compiled_script_file="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" - - - ## TODO: Have a more proper communication protocol - ## TODO: Make a proper client for the daemon - pash_redir_output echo "$$: (2) Before asking the daemon for compilation..." - ## Send and receive from daemon - msg="Compile:${pash_compiled_script_file}| Variable File:${pash_runtime_shell_variables_file}| Input IR File:${pash_input_ir_file}" - daemon_response=$(pash_communicate_daemon "$msg") # Blocking step, daemon will not send response until it's safe to continue - - if [[ "$daemon_response" == *"OK:"* ]]; then - pash_runtime_return_code=0 - elif [ -z "$daemon_response" ]; then - ## Trouble... Daemon crashed, rip - pash_redir_output echo "$$: ERROR: (2) Daemon crashed!" - exit 1 - else - pash_runtime_return_code=1 - fi - - # Get assigned process id - # We need to split the daemon response into elements of an array by - # shell's field splitting. - # shellcheck disable=SC2206 - response_args=($daemon_response) - process_id=${response_args[1]} - - pash_redir_output echo "$$: (2) Compiler exited with code: $pash_runtime_return_code" - if [ "$pash_runtime_return_code" -ne 0 ] && [ "$pash_assert_compiler_success_flag" -eq 1 ]; then - pash_redir_output echo "$$: ERROR: (2) Compiler failed with error code: $pash_runtime_return_code while assert_compiler_success was enabled! Exiting PaSh..." - exit 1 - fi - - # store functions for distributed execution - if [ "$distributed_exec" -eq 1 ]; then - declared_functions="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" - declare -f > "$declared_functions" - export declared_functions - fi - - ## - ## (3) - ## - - ## Count the execution time - pash_exec_time_start=$(date +"%s%N") - - ## If the compiler failed or if we dry_run the compiler, we have to run the sequential - if [ "$pash_runtime_return_code" -ne 0 ] || [ "$pash_dry_run_compiler_flag" -eq 1 ]; then - pash_script_to_execute="${pash_sequential_script_file}" - else - pash_script_to_execute="${pash_compiled_script_file}" - fi - - # ## - # ## (4) - # ## - - ## TODO: It might make sense to move these functions in pash_init_setup to avoid the cost of redefining them here. - function clean_up () { - if [ "$parallel_script_time_start" == "None" ] || [ "$pash_profile_driven_flag" -eq 0 ]; then - exec_time="" - else - parallel_script_time_end=$(date +"%s%N") - parallel_script_time_ms=$(echo "scale = 3; ($parallel_script_time_end-$parallel_script_time_start)/1000000" | bc) - pash_redir_output echo " --- --- Execution time: $parallel_script_time_ms ms" - exec_time=$parallel_script_time_ms - fi - ## Send to daemon - msg="Exit:${process_id}|Time:$exec_time" - daemon_response=$(pash_communicate_daemon_just_send "$msg") - } + ## Invoke the compiler and make any necessary preparations + source "$RUNTIME_DIR/pash_prepare_call_compiler.sh" function run_parallel() { - trap clean_up SIGTERM SIGINT EXIT - if [ "$pash_profile_driven_flag" -eq 1 ]; then - parallel_script_time_start=$(date +"%s%N") - fi - source "$RUNTIME_DIR/pash_wrap_vars.sh" "$pash_script_to_execute" - internal_exec_status=$? - final_steps - clean_up - (exit $internal_exec_status) - } - - ## We only want to execute (5) and (6) if we are in debug mode and it is not explicitly avoided - function final_steps() { - if [ "$PASH_DEBUG_LEVEL" -ne 0 ] && [ "$pash_avoid_pash_runtime_completion_flag" -ne 1 ]; then - ## - ## (5) - ## - - ## Prepare a file for the output shell variables to be saved in - pash_output_var_file=$("$RUNTIME_DIR/pash_ptempfile_name.sh" "$distro") - # pash_redir_output echo "$$: Output vars: $pash_output_var_file" - - ## Prepare a file for the `set` state of the inner shell to be output - pash_output_set_file=$("$RUNTIME_DIR/pash_ptempfile_name.sh" "$distro") - - source "$RUNTIME_DIR/pash_runtime_shell_to_pash.sh" "$pash_output_var_file" "$pash_output_set_file" - - ## - ## (6) - ## - source "$RUNTIME_DIR/pash_runtime_complete_execution.sh" - fi + trap inform_daemon_exit SIGTERM SIGINT EXIT + export SCRIPT_TO_EXECUTE="$pash_script_to_execute" + source "$RUNTIME_DIR/pash_restore_state_and_execute.sh" + inform_daemon_exit } - ## TODO: Add a check that `set -e` is not on - ## Check if there are traps set, and if so do not execute in parallel - ## ## TODO: This might be an overkill but is conservative traps_set=$(trap) pash_redir_output echo "$$: (2) Traps set: $traps_set" @@ -251,40 +126,42 @@ else # This is safe because the script is run sequentially and the shell # won't be able to move forward until this is finished - ## Needed to clear up any past script time start execution times. - parallel_script_time_start=None - clean_up - source "$RUNTIME_DIR/pash_wrap_vars.sh" "$pash_script_to_execute" - pash_runtime_final_status=$? - final_steps + ## Inform the daemon (this happens before because otherwise when set -e is set we don't send the inform exit) + ## However, this doesn't allow the compiler to get the proper execution time for a command + ## TODO: Properly set and restore traps and then move inform afterwards + ## First make a test that has set traps and set -e to exit (check set-e.sh) + ## + ## TODO: Also inform the daemon that the timing does not work now so that it + ## doesn't measure time for profile driven optimizations. + inform_daemon_exit + # echo $traps_set + + ## Run the script + export SCRIPT_TO_EXECUTE="$pash_script_to_execute" + source "$RUNTIME_DIR/pash_restore_state_and_execute.sh" + ## Save the state after execution + source "$RUNTIME_DIR/save_shell_state.sh" + ## We don't need to save the arguments because they are already set + pash_runtime_final_status="$PREVIOUS_SHELL_EC" + export pash_previous_set_status="$PREVIOUS_SET_STATUS" + + pash_redir_output echo "$$: (5) BaSh script exited with ec: $pash_runtime_final_status" else # Should we redirect errors aswell? # TODO: capturing the return state here isn't completely correct. - # Might need more complex design if this end up being a problem - run_parallel <&0 & - pash_runtime_final_status=$? - pash_redir_output echo "$$: (2) Running pipeline" - - ## Here we need to also revert the state back to bash state - ## since run_parallel will do that in a separate shell - ## - ## This happens right before we exit from pash_runtime! - - ## Recover the `set` state of the previous shell - # pash_redir_output echo "$$: (3) Previous BaSh set state: $pash_previous_set_status" - # pash_redir_output echo "$$: (3) PaSh-internal set state of current shell: $-" - pash_current_set_state=$- - source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_current_set_state" "$pash_previous_set_status" - pash_redir_output echo "$$: (5) Reverted to BaSh set state: $-" - - ## TODO: This might not be necessary - ## Recover the input arguments of the previous script - ## Note: We don't need to care about wrap_vars arguments because we have stored all of them already. - # - # shellcheck disable=SC2086 - eval "set -- \"\${pash_input_args[@]}\"" - pash_redir_output echo "$$: (5) Reverted to BaSh input arguments: $@" - - ## TODO: We probably need to exit with the exit code here or something! + run_parallel "$@" <&0 & + ## Setting this to 0 since we can't capture this exit value + pash_runtime_final_status=0 + pash_redir_output echo "$$: (2) Running pipeline..." + + ## The only thing we can recover here is the set state: + ## - arguments and variables are not modified since it is run in parallel and thus is pure + ## - exit code cannot be returned fi + ## Set the shell state before exiting + pash_redir_output echo "$$: (7) Current PaSh set state: $-" + source "$RUNTIME_DIR/pash_set_from_to.sh" "$-" "$pash_previous_set_status" + pash_redir_output echo "$$: (7) Reverted to BaSh set state before exiting: $-" + ## Set the exit code + (exit "$pash_runtime_final_status") fi diff --git a/compiler/preprocessor/preprocessor.py b/compiler/preprocessor/preprocessor.py index e36f94aa2..d44a5e0fd 100644 --- a/compiler/preprocessor/preprocessor.py +++ b/compiler/preprocessor/preprocessor.py @@ -7,6 +7,7 @@ from ir import FileIdGen from parse import parse_shell_to_asts, from_ast_objects_to_shell from util import * +import server_util from speculative import util_spec LOGGING_PREFIX = "PaSh Preprocessor: " @@ -42,13 +43,24 @@ def preprocess_asts(ast_objects, args): po_file=args.partial_order_file) util_spec.initialize(trans_options) else: - trans_options = ast_to_ast.TransformationOptions(mode=trans_mode) + trans_options = ast_to_ast.TransformationState(mode=trans_mode) ## Preprocess ASTs by replacing AST regions with calls to PaSh's runtime. ## Then the runtime will do the compilation and optimization with additional ## information. preprocessed_asts = ast_to_ast.replace_ast_regions(ast_objects, trans_options) + ## Let the scheduler know that we are done with the partial_order file + ## TODO: We could stream the partial_order_file to the scheduler + if trans_mode is ast_to_ast.TransformationType.SPECULATIVE: + ## First complete the partial_order file + util_spec.serialize_partial_order(trans_options) + + ## Then inform the scheduler that it can read it + unix_socket_file = os.getenv("PASH_SPEC_SCHEDULER_SOCKET") + msg = util_spec.scheduler_server_init_po_msg(trans_options.get_partial_order_file()) + server_util.unix_socket_send_and_forget(unix_socket_file, msg) + return preprocessed_asts ## diff --git a/compiler/server_util.py b/compiler/server_util.py index 3cda4b750..0bee98d3e 100644 --- a/compiler/server_util.py +++ b/compiler/server_util.py @@ -1,6 +1,7 @@ import os import socket +import config from util import log def success_response(string): @@ -94,6 +95,24 @@ def close(self): self.fin.close() +def unix_socket_send_and_forget(socket_file: str, msg: str): + try: + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.connect(socket_file) + msg_with_newline = msg + '\n' + byte_msg = msg_with_newline.encode('utf-8') + sock.sendall(byte_msg) + data = sock.recv(config.SOCKET_BUF_SIZE) + str_data = data.decode('utf-8') + ## There should be no response on these messages + assert(len(str_data) == 0) + finally: + log("Sent message:", msg, "to server.", level=1) + sock.close() + + + + ## TODO: Instead of this, think of using a standard SocketServer ## see: https://docs.python.org/3/library/socketserver.html#module-socketserver ## @@ -102,7 +121,7 @@ class SocketManager: def __init__(self, socket_addr: str): ## Configure them outside server_address = socket_addr - self.buf_size = 8192 + self.buf_size = config.SOCKET_BUF_SIZE # Make sure the socket does not already exist ## TODO: Is this necessary? diff --git a/compiler/shell_ast/ast_node.py b/compiler/shell_ast/ast_node.py deleted file mode 100644 index e83fbe3d6..000000000 --- a/compiler/shell_ast/ast_node.py +++ /dev/null @@ -1,203 +0,0 @@ -from json import JSONEncoder - -from shell_ast.ast_node_c import * -from definitions.no_match_exception import * -from util import * - - -## TODO: Create subclasses for all different types of AstNodes -class AstNode: - # create an AstNode object from an ast object as parsed by libdash - def __init__(self, ast_object): - try: - self.construct = AstNodeConstructor(ast_object[0]) - self.parse_args(ast_object[1]) - except ValueError as no_matching_construct: - raise NoMatchException('{} is not a construct we can handle'.format(ast_object[0])) - - def parse_args(self, args): - if self.construct is AstNodeConstructor.PIPE: - self.is_background = args[0] - self.items = args[1] - elif self.construct is AstNodeConstructor.COMMAND: - self.line_number = args[0] - self.assignments = args[1] - self.arguments = args[2] - self.redir_list = args[3] - elif self.construct is AstNodeConstructor.SUBSHELL: - self.line_number = args[0] - self.body = args[1] - self.redir_list = args[2] - elif self.construct in [AstNodeConstructor.AND, AstNodeConstructor.OR, AstNodeConstructor.SEMI]: - self.left_operand = args[0] - self.right_operand = args[1] - elif self.construct is AstNodeConstructor.NOT: - self.body = args - elif self.construct in [AstNodeConstructor.REDIR, AstNodeConstructor.BACKGROUND]: - self.line_number = args[0] - # TODO maybe pick a better name? - self.node = args[1] - self.redir_list = args[2] - elif self.construct is AstNodeConstructor.DEFUN: - self.line_number = args[0] - self.name = args[1] - self.body = args[2] - elif self.construct is AstNodeConstructor.FOR: - self.line_number = args[0] - self.argument = args[1] - self.body = args[2] - self.variable = args[3] - elif self.construct is AstNodeConstructor.WHILE: - self.test = args[0] - self.body = args[1] - elif self.construct is AstNodeConstructor.IF: - self.cond = args[0] - self.then_b = args[1] - self.else_b = args[2] - elif self.construct is AstNodeConstructor.CASE: - self.line_number = args[0] - self.argument = args[1] - self.cases = args[2] - else: - raise ValueError() - - def __repr__(self): - if self.construct is AstNodeConstructor.PIPE: - if (self.is_background): - return "Background Pipe: {}".format(self.items) - else: - return "Pipe: {}".format(self.items) - elif self.construct is AstNodeConstructor.COMMAND: - output = "Command: {}".format(self.arguments) - if(len(self.assignments) > 0): - output += ", ass[{}]".format(self.assignments) - if(len(self.redir_list) > 0): - output += ", reds[{}]".format(self.redir_list) - return output - elif self.construct is AstNodeConstructor.FOR: - output = "for {} in {}; do ({})".format(self.variable, self.argument, self.body) - return output - elif self.construct is AstNodeConstructor.AND: - output = "{} && {}".format(self.left_operand, self.right_operand) - return output - elif self.construct is AstNodeConstructor.SEMI: - output = "{} ; {}".format(self.left_operand, self.right_operand) - return output - elif self.construct is AstNodeConstructor.OR: - output = "{} || {}".format(self.left_operand, self.right_operand) - return output - log(self.construct) - return NotImplemented - - def check(self, **kwargs): - # user-supplied custom checks - for key, value in kwargs.items(): - try: - assert(value()) - except Exception as exc: - log("check for {} construct failed at key {}".format(self.construct, key)) - raise exc - - def json_serialize(self): - if self.construct is AstNodeConstructor.FOR: - json_output = make_kv(self.construct.value, - [self.line_number, - self.argument, - self.body, - self.variable]) - elif self.construct is AstNodeConstructor.WHILE: - json_output = make_kv(self.construct.value, - [self.test, - self.body]) - elif self.construct is AstNodeConstructor.COMMAND: - json_output = make_kv(self.construct.value, - [self.line_number, - self.assignments, - self.arguments, - self.redir_list]) - elif self.construct is AstNodeConstructor.REDIR: - json_output = make_kv(self.construct.value, - [self.line_number, - self.node, - self.redir_list]) - elif self.construct is AstNodeConstructor.BACKGROUND: - json_output = make_kv(self.construct.value, - [self.line_number, - self.node, - self.redir_list]) - elif self.construct is AstNodeConstructor.SUBSHELL: - json_output = make_kv(self.construct.value, - [self.line_number, - self.body, - self.redir_list]) - elif self.construct is AstNodeConstructor.PIPE: - json_output = make_kv(self.construct.value, - [self.is_background, - self.items]) - elif self.construct is AstNodeConstructor.DEFUN: - json_output = make_kv(self.construct.value, - [self.line_number, - self.name, - self.body]) - elif self.construct is AstNodeConstructor.IF: - json_output = make_kv(self.construct.value, - [self.cond, - self.then_b, - self.else_b]) - elif self.construct is AstNodeConstructor.SEMI: - json_output = make_kv(self.construct.value, - [self.left_operand, - self.right_operand]) - elif self.construct is AstNodeConstructor.OR: - json_output = make_kv(self.construct.value, - [self.left_operand, - self.right_operand]) - elif self.construct is AstNodeConstructor.AND: - json_output = make_kv(self.construct.value, - [self.left_operand, - self.right_operand]) - elif self.construct is AstNodeConstructor.NOT: - json_output = make_kv(self.construct.value, - self.body) - elif self.construct is AstNodeConstructor.CASE: - json_output = make_kv(self.construct.value, - [self.line_number, - self.argument, - self.cases]) - else: - log("Not implemented serialization", self) - json_output = NotImplemented - return json_output - -class CustomJSONEncoder(JSONEncoder): - def default(self, obj): - if isinstance(obj, AstNode): - return obj.json_serialize() - # Let the base class default method raise the TypeError - return JSONEncoder.default(self, obj) - - -## This function takes an object that contains a mix of untyped and typed AstNodes (yuck) -## and turns it into untyped json-like object. It is required atm because the infrastructure that -## we have does not translate everything to its typed form at once before compiling, and therefore -## we end up with these abomination objects. -## -## Very important TODO: -## We need to fix this by properly defining types (based on `compiler/parser/ast_atd.atd`) -## and creating a bidirectional transformation from these types to the untyped json object. -## Then we can have all ast_to_ir infrastructure work on these objects, and only in the end -## requiring to go to the untyped form to interface with printing and parsing -## (which ATM does not interface with the typed form). -def ast_node_to_untyped_deep(node): - if(isinstance(node, AstNode)): - json_key, json_val = node.json_serialize() - untyped_json_val = [ast_node_to_untyped_deep(obj) for obj in json_val] - return [json_key, untyped_json_val] - elif(isinstance(node, list)): - return [ast_node_to_untyped_deep(obj) for obj in node] - elif(isinstance(node, tuple)): - return [ast_node_to_untyped_deep(obj) for obj in node] - elif(isinstance(node, dict)): - return {k: ast_node_to_untyped_deep(v) for k, v in node.items()} - else: - return node diff --git a/compiler/shell_ast/ast_node_c.py b/compiler/shell_ast/ast_node_c.py deleted file mode 100644 index 4f5e46521..000000000 --- a/compiler/shell_ast/ast_node_c.py +++ /dev/null @@ -1,17 +0,0 @@ -from enum import Enum - -class AstNodeConstructor(Enum): - PIPE = 'Pipe' - COMMAND = 'Command' - AND = 'And' - OR = 'Or' - NOT = 'Not' - SEMI = 'Semi' - REDIR = 'Redir' - SUBSHELL = 'Subshell' - BACKGROUND = 'Background' - DEFUN = 'Defun' - FOR = 'For' - WHILE = 'While' - IF = 'If' - CASE = 'Case' \ No newline at end of file diff --git a/compiler/shell_ast/ast_to_ast.py b/compiler/shell_ast/ast_to_ast.py index 8a5b0fbba..7af0828c7 100644 --- a/compiler/shell_ast/ast_to_ast.py +++ b/compiler/shell_ast/ast_to_ast.py @@ -1,9 +1,13 @@ from enum import Enum +import copy import pickle import config +from env_var_names import * from shell_ast.ast_util import * +from shasta.ast_node import ast_match +from shasta.json_to_ast import to_ast_node from parse import from_ast_objects_to_shell from speculative import util_spec @@ -14,27 +18,79 @@ class TransformationType(Enum): ## Use this object to pass options inside the preprocessing ## trasnformation. -class TransformationOptions: +class TransformationState: def __init__(self, mode: TransformationType): self.mode = mode + self.node_counter = 0 + self.loop_counter = 0 + self.loop_contexts = [] def get_mode(self): return self.mode + ## Node id related + def get_next_id(self): + new_id = self.node_counter + self.node_counter += 1 + return new_id + + def get_current_id(self): + return self.node_counter - 1 + + def get_number_of_ids(self): + return self.node_counter + + ## Loop id related + def get_next_loop_id(self): + new_id = self.loop_counter + self.loop_counter += 1 + return new_id + + def get_current_loop_context(self): + ## We want to copy that + return self.loop_contexts[:] + + def get_current_loop_id(self): + if len(self.loop_contexts) == 0: + return None + else: + return self.loop_contexts[0] + + def enter_loop(self): + new_loop_id = self.get_next_loop_id() + self.loop_contexts.insert(0, new_loop_id) + return new_loop_id + + def exit_loop(self): + self.loop_contexts.pop(0) + ## TODO: Turn it into a Transformation State class, and make a subclass for ## each of the two transformations. It is important for it to be state, because ## it will need to be passed around while traversing the tree. -class SpeculativeTransformationState(TransformationOptions): +class SpeculativeTransformationState(TransformationState): def __init__(self, mode: TransformationType, po_file: str): super().__init__(mode) assert(self.mode is TransformationType.SPECULATIVE) self.partial_order_file = po_file + self.partial_order_edges = [] + self.partial_order_node_loop_contexts = {} def get_partial_order_file(self): assert(self.mode is TransformationType.SPECULATIVE) return self.partial_order_file + def add_edge(self, from_id: int, to_id: int): + self.partial_order_edges.append((from_id, to_id)) + + def get_all_edges(self): + return self.partial_order_edges + + def add_node_loop_context(self, node_id: int, loop_contexts): + self.partial_order_node_loop_contexts[node_id] = loop_contexts + + def get_all_loop_contexts(self): + return self.partial_order_node_loop_contexts ## @@ -99,7 +155,7 @@ def replace_ast_regions(ast_objects, trans_options): last_object = True ast, original_text, _linno_before, _linno_after = ast_object - ## TODO: Turn the untyped ast to an AstNode + assert(isinstance(ast, AstNode)) ## Goals: This transformation can approximate in several directions. ## 1. Not replacing a candidate dataflow region. @@ -182,7 +238,7 @@ def join_original_text_lines(shell_source_lines_or_none): def preprocess_node(ast_object, trans_options, last_object=False): global preprocess_cases - return ast_match_untyped(ast_object, preprocess_cases, trans_options, last_object) + return ast_match(ast_object, preprocess_cases, trans_options, last_object) ## This preprocesses the AST node and also replaces it if it needs replacement . ## It is called by constructs that cannot be included in a dataflow region. @@ -247,7 +303,6 @@ def preprocess_node_command(ast_node, trans_options, last_object=False): def preprocess_node_redir(ast_node, trans_options, last_object=False): preprocessed_node, something_replaced = preprocess_close_node(ast_node.node, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update ast_node.node = preprocessed_node preprocessed_ast_object = PreprocessedAST(ast_node, replace_whole=False, @@ -279,7 +334,6 @@ def preprocess_node_background(ast_node, trans_options, last_object=False): def preprocess_node_subshell(ast_node, trans_options, last_object=False): preprocessed_body, something_replaced = preprocess_close_node(ast_node.body, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update ast_node.body = preprocessed_body preprocessed_ast_object = PreprocessedAST(ast_node, replace_whole=False, @@ -293,20 +347,63 @@ def preprocess_node_subshell(ast_node, trans_options, last_object=False): ## TODO: This is not efficient at all since it calls the PaSh runtime everytime the loop is entered. ## We have to find a way to improve that. def preprocess_node_for(ast_node, trans_options, last_object=False): + ## If we are in a loop, we push the loop identifier into the loop context + loop_id = trans_options.enter_loop() preprocessed_body, something_replaced = preprocess_close_node(ast_node.body, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update - ast_node.body = preprocessed_body - preprocessed_ast_object = PreprocessedAST(ast_node, + + ## TODO: Then send this iteration identifier when talking to the spec scheduler + ## TODO: After running checks put this behind a check to only run under speculation + + ## Create a new variable that tracks loop iterations + var_name = loop_iter_var(loop_id) + export_node = make_export_var_constant_string(var_name, '0') + increment_node = make_increment_var(var_name) + + ## Also store the whole sequence of loop iters in a file + all_loop_ids = trans_options.get_current_loop_context() + + ## export pash_loop_iters="$pash_loop_XXX_iter $pash_loop_YYY_iter ..." + save_loop_iters_node = export_pash_loop_iters_for_current_context(all_loop_ids) + + ## Prepend the increment in the body + ast_node.body = make_typed_semi_sequence( + [to_ast_node(increment_node), + to_ast_node(save_loop_iters_node), + copy.deepcopy(preprocessed_body)]) + + ## We pop the loop identifier from the loop context. + ## + ## KK 2023-04-27: Could this exit happen before the replacement leading to wrong + ## results? I think not because we use the _close_node preprocessing variant. + ## A similar issue might happen for while + trans_options.exit_loop() + + ## reset the loop iters after we exit the loop + out_of_loop_loop_ids = trans_options.get_current_loop_context() + reset_loop_iters_node = export_pash_loop_iters_for_current_context(out_of_loop_loop_ids) + + ## Prepend the export in front of the loop + # new_node = ast_node + new_node = make_typed_semi_sequence( + [to_ast_node(export_node), + ast_node, + to_ast_node(reset_loop_iters_node)]) + # print(new_node) + + preprocessed_ast_object = PreprocessedAST(new_node, replace_whole=False, non_maximal=False, something_replaced=something_replaced, last_ast=last_object) + return preprocessed_ast_object def preprocess_node_while(ast_node, trans_options, last_object=False): + ## If we are in a loop, we push the loop identifier into the loop context + trans_options.enter_loop() + preprocessed_test, sth_replaced_test = preprocess_close_node(ast_node.test, trans_options, last_object=last_object) preprocessed_body, sth_replaced_body = preprocess_close_node(ast_node.body, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update ast_node.test = preprocessed_test ast_node.body = preprocessed_body something_replaced = sth_replaced_test or sth_replaced_body @@ -315,13 +412,15 @@ def preprocess_node_while(ast_node, trans_options, last_object=False): non_maximal=False, something_replaced=something_replaced, last_ast=last_object) + + ## We pop the loop identifier from the loop context. + trans_options.exit_loop() return preprocessed_ast_object ## This is the same as the one for `For` def preprocess_node_defun(ast_node, trans_options, last_object=False): ## TODO: For now we don't want to compile function bodies # preprocessed_body = preprocess_close_node(ast_node.body) - ## TODO: Could there be a problem with the in-place update # ast_node.body = preprocessed_body preprocessed_ast_object = PreprocessedAST(ast_node, replace_whole=False, @@ -337,7 +436,6 @@ def preprocess_node_semi(ast_node, trans_options, last_object=False): ## TODO: Is it valid that only the right one is considered the last command? preprocessed_left, sth_replaced_left = preprocess_close_node(ast_node.left_operand, trans_options, last_object=False) preprocessed_right, sth_replaced_right = preprocess_close_node(ast_node.right_operand, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update ast_node.left_operand = preprocessed_left ast_node.right_operand = preprocessed_right sth_replaced = sth_replaced_left or sth_replaced_right @@ -354,7 +452,6 @@ def preprocess_node_and(ast_node, trans_options, last_object=False): # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config) preprocessed_left, sth_replaced_left = preprocess_close_node(ast_node.left_operand, trans_options, last_object=last_object) preprocessed_right, sth_replaced_right = preprocess_close_node(ast_node.right_operand, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update ast_node.left_operand = preprocessed_left ast_node.right_operand = preprocessed_right sth_replaced = sth_replaced_left or sth_replaced_right @@ -369,7 +466,6 @@ def preprocess_node_or(ast_node, trans_options, last_object=False): # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config) preprocessed_left, sth_replaced_left = preprocess_close_node(ast_node.left_operand, trans_options, last_object=last_object) preprocessed_right, sth_replaced_right = preprocess_close_node(ast_node.right_operand, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update ast_node.left_operand = preprocessed_left ast_node.right_operand = preprocessed_right sth_replaced = sth_replaced_left or sth_replaced_right @@ -383,7 +479,6 @@ def preprocess_node_or(ast_node, trans_options, last_object=False): def preprocess_node_not(ast_node, trans_options, last_object=False): # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left) preprocessed_body, sth_replaced = preprocess_close_node(ast_node.body, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update ast_node.body = preprocessed_body preprocessed_ast_object = PreprocessedAST(ast_node, replace_whole=False, @@ -398,7 +493,6 @@ def preprocess_node_if(ast_node, trans_options, last_object=False): preprocessed_cond, sth_replaced_cond = preprocess_close_node(ast_node.cond, trans_options, last_object=last_object) preprocessed_then, sth_replaced_then = preprocess_close_node(ast_node.then_b, trans_options, last_object=last_object) preprocessed_else, sth_replaced_else = preprocess_close_node(ast_node.else_b, trans_options, last_object=last_object) - ## TODO: Could there be a problem with the in-place update ast_node.cond = preprocessed_cond ast_node.then_b = preprocessed_then ast_node.else_b = preprocessed_else @@ -418,7 +512,6 @@ def preprocess_case(case, trans_options, last_object=False): def preprocess_node_case(ast_node, trans_options, last_object=False): preprocessed_cases_replaced = [preprocess_case(case, trans_options, last_object=last_object) for case in ast_node.cases] preprocessed_cases, sth_replaced_cases = list(zip(*preprocessed_cases_replaced)) - ## TODO: Could there be a problem with the in-place update ast_node.cases = preprocessed_cases preprocessed_ast_object = PreprocessedAST(ast_node, replace_whole=False, @@ -451,7 +544,7 @@ def preprocess_node_case(ast_node, trans_options, last_object=False): ## ## If we are need to disable parallel pipelines, e.g., if we are in the context of an if, ## or if we are in the end of a script, then we set a variable. -def replace_df_region(asts, trans_options, disable_parallel_pipelines=False, ast_text=None): +def replace_df_region(asts, trans_options, disable_parallel_pipelines=False, ast_text=None) -> AstNode: transformation_mode = trans_options.get_mode() if transformation_mode is TransformationType.PASH: ir_filename = ptempfile() @@ -469,12 +562,14 @@ def replace_df_region(asts, trans_options, disable_parallel_pipelines=False, ast script_file.write(text_to_output) replaced_node = make_call_to_pash_runtime(ir_filename, sequential_script_file_name, disable_parallel_pipelines) elif transformation_mode is TransformationType.SPECULATIVE: - ## TODO: This currently writes each command on its own line, - ## though it should be improved to better serialize each command in its own file - ## and then only saving the ids of each command in the partial order file. text_to_output = get_shell_from_ast(asts, ast_text=ast_text) ## Generate an ID - df_region_id = util_spec.get_next_id() + df_region_id = trans_options.get_next_id() + + ## Get the current loop id and save it so that the runtime knows + ## which loop it is in. + loop_id = trans_options.get_current_loop_id() + ## Determine its predecessors ## TODO: To make this properly work, we should keep some state ## in the AST traversal to be able to determine predecessors. @@ -485,19 +580,18 @@ def replace_df_region(asts, trans_options, disable_parallel_pipelines=False, ast ## Write to a file indexed by its ID util_spec.save_df_region(text_to_output, trans_options, df_region_id, predecessors) ## TODO: Add an entry point to spec through normal PaSh - replaced_node = make_call_to_spec_runtime(df_region_id) + replaced_node = make_call_to_spec_runtime(df_region_id, loop_id) else: ## Unreachable assert(False) - return replaced_node + return to_ast_node(replaced_node) def get_shell_from_ast(asts, ast_text=None) -> str: ## If we don't have the original ast text, we need to unparse the ast if (ast_text is None): - kv_asts = [ast_node_to_untyped_deep(ast) for ast in asts] - text_to_output = from_ast_objects_to_shell(kv_asts) + text_to_output = from_ast_objects_to_shell(asts) else: text_to_output = ast_text return text_to_output @@ -507,71 +601,6 @@ def get_shell_from_ast(asts, ast_text=None) -> str: ## Code that constructs the preprocessed ASTs ## -def make_pre_runtime_nodes(): - previous_status_command = make_previous_status_command() - input_args_command = make_input_args_command() - return [previous_status_command, input_args_command] - -def make_post_runtime_nodes(): - set_args_node = restore_arguments_command() - set_exit_status_node = restore_exit_code_node() - return [set_args_node, set_exit_status_node] - -def make_previous_status_command(): - ## Save the previous exit state: - ## ``` - ## pash_previous_exit_status="$?" - ## ``` - assignments = [["pash_previous_exit_status", - [make_quoted_variable("?")]]] - previous_status_command = make_command([], assignments=assignments) - return previous_status_command - -def make_input_args_command(): - ## Save the input arguments - ## ``` - ## source $PASH_TOP/runtime/save_args.sh "${@}" - ## ``` - arguments = [string_to_argument("source"), - string_to_argument(config.SAVE_ARGS_EXECUTABLE), - [make_quoted_variable("@")]] - input_args_command = make_command(arguments) - return input_args_command - -def restore_arguments_command(): - ## Restore the arguments to propagate internal changes, e.g., from `shift` outside. - ## ``` - ## eval "set -- \"\${pash_input_args[@]}\"" - ## ``` - ## - ## Alternative Solution: (TODO if we need extra performance -- avoiding eval) - ## Implement an AST node that accepts and returns a literal string - ## bypassing unparsing. This would make this simpler and also more - ## efficient (avoiding eval). - ## However, it would require some work because we would need to implement - ## support for this node in various places of PaSh and the unparser. - ## - ## - ## TODO: Maybe we need to only do this if there is a change. - ## - set_arguments = [string_to_argument("eval"), - [['Q', string_to_argument('set -- ') + - [escaped_char('"')] + # The escaped quote - string_to_argument('\\${pash_input_args[@]}') + - [escaped_char('"')]]]] - set_args_node = make_command(set_arguments) - return set_args_node - -def restore_exit_code_node(): - ## Restore the exit code (since now we have executed `set` last) - ## ``` - ## ( exit "$pash_runtime_final_status") - ## ``` - set_exit_status_command_arguments = [string_to_argument("exit"), - [make_quoted_variable("pash_runtime_final_status")]] - set_exit_status_command = make_command(set_exit_status_command_arguments) - set_exit_status_node = make_kv('Subshell', [0, set_exit_status_command, []]) - return set_exit_status_node ## This function makes a command that calls the pash runtime ## together with the name of the file containing an IR. Then the @@ -597,40 +626,35 @@ def make_call_to_pash_runtime(ir_filename, sequential_script_file_name, else: assignments = [["pash_disable_parallel_pipelines", string_to_argument("0")]] - disable_parallel_pipelines_command = make_command([], - assignments=assignments) + assignments.append(["pash_sequential_script_file", + string_to_argument(sequential_script_file_name)]) + assignments.append(["pash_input_ir_file", + string_to_argument(ir_filename)]) ## Call the runtime arguments = [string_to_argument("source"), - string_to_argument(config.RUNTIME_EXECUTABLE), - string_to_argument(sequential_script_file_name), - string_to_argument(ir_filename)] - ## Pass all relevant argument to the planner - common_arguments_strings = config.pass_common_arguments(config.pash_args) - arguments += [string_to_argument(string) for string in common_arguments_strings] - runtime_node = make_command(arguments) - - ## Create generic wrapper commands - pre_runtime_nodes = make_pre_runtime_nodes() - post_runtime_nodes = make_post_runtime_nodes() - nodes = pre_runtime_nodes + [disable_parallel_pipelines_command, runtime_node] + post_runtime_nodes - sequence = make_semi_sequence(nodes) - return sequence + string_to_argument(config.RUNTIME_EXECUTABLE)] + runtime_node = make_command(arguments, + assignments=assignments) + return runtime_node ## TODO: Make that an actual call to the spec runtime -def make_call_to_spec_runtime(command_id: str) -> AstNode: +def make_call_to_spec_runtime(command_id: int, loop_id) -> AstNode: + assignments = [["pash_spec_command_id", + string_to_argument(str(command_id))]] + if loop_id is None: + loop_id_str = "" + else: + loop_id_str = str(loop_id) + + assignments.append(["pash_spec_loop_id", + string_to_argument(loop_id_str)]) + ## Call the runtime arguments = [string_to_argument("source"), - string_to_argument(config.RUNTIME_EXECUTABLE), - string_to_argument(str(command_id))] + string_to_argument(config.RUNTIME_EXECUTABLE)] ## Pass all relevant argument to the planner - common_arguments_strings = config.pass_common_arguments(config.pash_args) - arguments += [string_to_argument(string) for string in common_arguments_strings] - runtime_node = make_command(arguments) - - ## Create generic wrapper commands - pre_runtime_nodes = make_pre_runtime_nodes() - post_runtime_nodes = make_post_runtime_nodes() - nodes = pre_runtime_nodes + [runtime_node] + post_runtime_nodes - sequence = make_semi_sequence(nodes) - return sequence + runtime_node = make_command(arguments, + assignments=assignments) + + return runtime_node diff --git a/compiler/shell_ast/ast_util.py b/compiler/shell_ast/ast_util.py index 21c588a19..57529904f 100644 --- a/compiler/shell_ast/ast_util.py +++ b/compiler/shell_ast/ast_util.py @@ -1,5 +1,7 @@ -from shell_ast.ast_node import * +from env_var_names import * +from shasta.ast_node import * +from shasta.json_to_ast import * from util import * @@ -39,24 +41,6 @@ def __init__(self, text): def check_if_ast_is_supported(construct, arguments, **kwargs): return -def ast_match_untyped(untyped_ast_object, cases, *args): - ## TODO: This should construct the complete AstNode object (not just the surface level) - ## TODO: Remove this and then at some point make real proper use of the AstNode - ast_node = AstNode(untyped_ast_object) - if ast_node.construct is AstNodeConstructor.PIPE: - ast_node.check(children_count = lambda : len(ast_node.items) >= 2) - return ast_match(ast_node, cases, *args) - -def ast_match(ast_node, cases, *args): - ## TODO: Remove that once `ast_match_untyped` is fixed to - ## construct the whole AstNode object. - if(not isinstance(ast_node, AstNode)): - return ast_match_untyped(ast_node, cases, *args) - - return cases[ast_node.construct.value](*args)(ast_node) - - - def format_args(args): formatted_args = [format_arg_chars(arg_chars) for arg_chars in args] return formatted_args @@ -65,82 +49,12 @@ def format_arg_chars(arg_chars): chars = [format_arg_char(arg_char) for arg_char in arg_chars] return "".join(chars) -## -## BIG TODO: Fix the formating of arg_chars bask to shell scripts and string. -## We need to do this the proper way using the parser. -## -def format_arg_char(arg_char): - key, val = get_kv(arg_char) - if (key == 'C'): - return str(chr(val)) - elif (key == 'B'): - # The $() is just for illustration. This is backticks - return '$({})'.format(val) - elif (key == 'Q'): - formated_val = format_arg_chars(val) - return '"{}"'.format(formated_val) - elif (key == 'V'): - return '${{{}}}'.format(val[2]) - elif (key == 'E'): - ## TODO: This is not right. I think the main reason for the - ## problems is the differences between bash and the posix - ## standard. - # log(" -- escape-debug -- ", val, chr(val)) - non_escape_chars = [92, # \ - 61, # = - 91, # [ - 93, # ] - 45, # - - 58, # : - 126,# ~ - 42] # * - if(val in non_escape_chars): - return '{}'.format(chr(val)) - else: - return '\{}'.format(chr(val)) - else: - log("Cannot format arg_char:", arg_char) - ## TODO: Make this correct - raise NotImplementedError - -## This function finds the first raw character in an argument. -## It needs to be called on an expanded string. -def format_expanded_arg_chars(arg_chars): - chars = [format_expanded_arg_char(arg_char) for arg_char in arg_chars] - return "".join(chars) - -def format_expanded_arg_char(arg_char): - key, val = get_kv(arg_char) - if (key == 'C'): - return str(chr(val)) - elif (key == 'Q'): - formated_val = format_expanded_arg_chars(val) - return '{}'.format(formated_val) - elif (key == 'E'): - ## TODO: I am not sure if this should add \ or not - ## - ## TODO: This is not right. I think the main reason for the - ## problems is the differences between bash and the posix - ## standard. - # log(" -- escape-debug -- ", val, chr(val)) - non_escape_chars = [92, # \ - 61, # = - 91, # [ - 93, # ] - 45, # - - 58, # : - 126,# ~ - 42] # * - if(val in non_escape_chars): - return '{}'.format(chr(val)) - else: - return '\{}'.format(chr(val)) - else: - log("Expanded arg char should not contain:", arg_char) - ## TODO: Make this correct - raise ValueError - +def format_arg_char(arg_char: ArgChar) -> str: + return arg_char.format() +def string_to_carg_char_list(string: str) -> "list[CArgChar]": + ret = [CArgChar(ord(char)) for char in string] + return ret def string_to_arguments(string): return [string_to_argument(word) for word in string.split(" ")] @@ -149,6 +63,10 @@ def string_to_argument(string): ret = [char_to_arg_char(char) for char in string] return ret +def concat_arguments(arg1, arg2): + ## Arguments are simply `arg_char list` and therefore can just be concatenated + return arg1 + arg2 + ## FIXME: This is certainly not complete. It is used to generate the ## AST for the call to the distributed planner. It only handles simple ## characters @@ -161,6 +79,9 @@ def escaped_char(char): def standard_var_ast(string): return make_kv("V", ["Normal", False, string, []]) +def make_arith(arg): + return make_kv("A", arg) + def make_quoted_variable(string): return make_kv("Q", [standard_var_ast(string)]) @@ -228,6 +149,53 @@ def make_defun(name, body): ## Make some nodes ## +def make_export_var_constant_string(var_name: str, value: str): + node = make_export_var(var_name, string_to_argument(value)) + return node + +def make_export_var(var_name: str, arg_char_list): + ## An argument is an arg_char_list + arg1 = string_to_argument(f'{var_name}=') + arguments = [string_to_argument("export"), + concat_arguments(arg1, arg_char_list)] + ## Pass all relevant argument to the planner + node = make_command(arguments) + return node + +def export_pash_loop_iters_for_current_context(all_loop_ids: "list[int]"): + if len(all_loop_ids) > 0: + iter_var_names = [loop_iter_var(loop_id) for loop_id in all_loop_ids] + iter_vars = [standard_var_ast(iter_var_name) for iter_var_name in iter_var_names] + concatted_vars = [iter_vars[0]] + for iter_var in iter_vars[1:]: + concatted_vars.append(char_to_arg_char('-')) + concatted_vars.append(iter_var) + quoted_vars = [quote_arg(concatted_vars)] + else: + quoted_vars = [] + + ## export pash_loop_iters="$pash_loop_XXX_iter $pash_loop_YYY_iter ..." + save_loop_iters_node = make_export_var(loop_iters_var(), quoted_vars) + + return save_loop_iters_node + + +def make_unset_var(var_name: str): + ## An argument is an arg_char_list + arguments = [string_to_argument("unset"), + string_to_argument(var_name)] + ## Pass all relevant argument to the planner + node = make_command(arguments) + return node + +def make_increment_var(var_name: str): + arg = string_to_argument(f'{var_name}+1') + arith_expr = make_arith(arg) + assignments = [[var_name, + [arith_expr]]] + node = make_command([], assignments=assignments) + return node + def make_echo_ast(argument, var_file_path): nodes = [] ## Source variables if present diff --git a/compiler/shell_ast/expand.py b/compiler/shell_ast/expand.py deleted file mode 100644 index b1f231691..000000000 --- a/compiler/shell_ast/expand.py +++ /dev/null @@ -1,626 +0,0 @@ -import copy - -from shell_ast.ast_node import * -from shell_ast.ast_node_c import * - -import ast_to_ir -import config -## Could be useful for debugging -# import parse - -################################################################################ -# SAFE EXPANSION ANALYSIS -################################################################################ - -## This function checks if a word is safe to expand (i.e. if it will -## not have unpleasant side-effects) -def safe_to_expand(arg_char): - key, val = get_kv(arg_char) - if (key in ['V']): # Variable - return True - return False - -def guess_arg(arg): - res = "" - for arg_char in arg: - key, val = get_kv(arg_char) - - if (key in ['C', 'E']): - res += chr(val) - else: - return None - return res - -def safe_arg(arg): - return all([safe_arg_char(arg_char) for arg_char in arg]) - -def safe_args(args): - return all([safe_arg(arg) for arg in args]) - -def safe_arg_char(arg_char): - key, val = get_kv(arg_char) - # character, escaped---noop, but safe - if (key in ['C', 'E']): - return True - # tilde --- only reads system state, safe to do early assuming no writes to HOME prior - elif (key == 'T'): - return True # TODO 2020-11-24 MMG modified variable set? take in/output written vars... - # arithmetic -- depends on what we have - elif (key == 'A'): - return safe_arith(val) - # quoted -- safe if its contents are safe - elif (key == 'Q'): - return safe_arg(val) - # variables -- safe if the format is safe as are the remaining words - elif (key == 'V'): - return safe_var(*val) - # command substitution -- depends on the command - elif (key == 'B'): - return safe_command(val) - - raise ValueError("bad key {}, expected one of CETAVQB".format(key)) - -def safe_var(fmt, null, var, arg): - if (fmt in ['Normal', 'Length']): - return True - elif (fmt in ['Minus', 'Plus', 'TrimR', 'TrimRMax', 'TrimL', 'TrimLMax']): - return safe_arg(arg) - elif (fmt in ['Question']): - return False # TODO 2020-12-10 MMG unless we know `var` is set - elif (fmt in ['Assign']): - return False # TODO 2020-11-24 MMG unless we know `var` is set - - raise ValueError("bad parameter format {}".format(fmt)) - -def safe_arith(arg): - # operations are safe - # `+=` and `=` and family are UNSAFE - # NONPOSIX: `++` and `--` are UNSAFE - # `op="+=1"; $((x $op))` is UNSAFE - - # to determine safety, we: - # (a) check that every arg_char here is safe - # (b) pre-parse it symbolically well enough to ensure that no mutating operations occur - expr = guess_arg(arg) - - if (arg is None): - # TODO 2020-11-25 MMG symbolic pre-parse? - return False - elif ('=' in expr or '++' in expr or '--' in expr): - # TODO 2020-11-25 MMG false negatives: ==, >=, <= - return False - else: - # it's a concrete string that doesn't have mutation operations in it... go for it! - return True - -safe_cases = { - "Pipe": (lambda: - lambda ast_node: safe_pipe(ast_node)), - "Command": (lambda: - lambda ast_node: safe_simple(ast_node)), - "And": (lambda: - lambda ast_node: safe_and_or_semi(ast_node)), - "Or": (lambda: - lambda ast_node: safe_and_or_semi(ast_node)), - "Semi": (lambda: - lambda ast_node: safe_and_or_semi(ast_node)), - "Redir": (lambda: - lambda ast_node: safe_redir_subshell(ast_node)), - "Subshell": (lambda: - lambda ast_node: safe_redir_subshell(ast_node)), - "Background": (lambda: - lambda ast_node: safe_background(ast_node)), - "Defun": (lambda: - lambda ast_node: safe_defun(ast_node)), - "For": (lambda: - lambda ast_node: safe_for(ast_node)), - "While": (lambda: - lambda ast_node: safe_while(ast_node)), - "Case": (lambda: - lambda ast_node: safe_case(ast_node)), - "If": (lambda: - lambda ast_node: safe_if(ast_node)) - } - -def safe_command(command): - # TODO 2020-11-24 MMG which commands are safe to run in advance? - # TODO 2020-11-24 MMG how do we differentiate it being safe to do nested expansions? - global safe_cases - return ast_to_ir.ast_match(command, safe_cases) - -def safe_pipe(node): - return False - -safe_commands = ["echo", ":"] - -def safe_simple(node): - # TODO 2020-11-25 check redirs, assignments - - if (len(node.arguments) <= 0): - return True - - cmd = guess_arg(node.arguments[0]) - if (cmd is None or cmd not in safe_commands): - return False - else: - return safe_args(node.arguments[1:]) - -def safe_and_or_semi(node): - return False - -def safe_redir_subshell(node): - return False - -def safe_background(node): - return False - -def safe_defun(node): - return False - -def safe_for(node): - return False - -def safe_while(node): - return False - -def safe_case(node): - return False - -def safe_if(node): - return False - -################################################################################ -# EARLY EXPANSION -################################################################################ - -# General approach: -# -# - expand_* functions try to expand the AST -# + words return a string when it works, raises when it doesn't -# TODO MMG 2020-12-14 really should return (intermediate?) fields, not a single string -# + commands just set the structural bits appropriately - -# when early expansion detects an error -class EarlyError(RuntimeError): - def __init__(self, arg): - self.arg = arg - -class StuckExpansion(RuntimeError): - def __init__(self, reason, *info): - self.reason = reason - self.info = info - -class ImpureExpansion(RuntimeError): - def __init__(self, reason, *info): - self.reason = reason - self.info = info - -class Unimplemented(RuntimeError): - def __init__(self, msg, ast): - self.msg = msg - self.ast = ast - -class InvalidVariable(RuntimeError): - def __init__(self, var, reason): - self.var = var - self.reason = reason - -## TODO: Figure out if there is a way to batch calls to bash and ask it -## to expand everything at once! We would need to make variable lookups asynchronous. -## -## TODO: `config` doesn't need to be passed down since it is imported -def lookup_variable(var, _lookup_config): - ## If the variable is input arguments then get it from pash_input_args. - ## - ## TODO KK PR#246 Do we need to split using IFS or is it always spaces? - ## - ## TODO KK PR#246 Maybe instead of this we could do this setup - ## once during initialization and leave lookup unaltered? - ## - ## TODO MMG this isn't quite adequate: if pash_input_args contains - ## spaces, we'll miscount. KK and I wrote a test - ## evaluation/tests/interface_tests that's disabled as of PR#246. - ## - ## the right solution here is: - ## - ## - positional arguments get their own field in the - ## config---they're not store with ordinary shell - ## variables - ## - ## - we save those separately, probably in a separate file - ## - ## ``` - ## echo pash_argc=$# >pash_positional_args - ## for i in $(seq 0 $#) - ## do - ## echo "pash_arg$i=\"$i\"" >pash_positional_args - ## done - ## ``` - ## - ## - we load these separately. pretty annoying; here's a sketch - ## - ## ``` - ## cmd="set --" - ## for i in $(seq 0 $pash_argc) - ## do - ## cmd="$cmd \"\$pash_arg$i\"" - ## done - ## eval "$cmd" - - - if(var == '@'): - argument_values = lookup_variable_inner_core('pash_input_args') - expanded_var = " ".join(argument_values) - elif(var == '?'): - expanded_var = lookup_variable_inner('pash_previous_exit_status') - elif(var == '-'): - expanded_var = lookup_variable_inner('pash_previous_set_status') - elif(var == '#'): - argument_values = lookup_variable_inner_core('pash_input_args') - expanded_var = str(len(argument_values)) - elif(var.isnumeric() and int(var) >= 1): - input_args = lookup_variable_inner_core('pash_input_args') - # split_args = input_args.split() - index = int(var) - 1 - try: - expanded_var = input_args[index] - except: - ## If there are not enough arguments -u is set we need to raise - if is_u_set(): - raise StuckExpansion("-u is set and positional argument wasn't set", var) - - expanded_var = '' - elif(var == '0'): - expanded_var = lookup_variable_inner('pash_shell_name') - else: - ## TODO: We can pull this to expand any string. - expanded_var = lookup_variable_inner(var) - - return None, expanded_var - -## Looksup a variable and flattens it if it is an array -def lookup_variable_inner(varname): - value = lookup_variable_inner_core(varname) - if value is not None and not isinstance(value, str): - ## TODO: This is not handled at the moment (and it is unclear if it should be). - ## - ## This is only returned when we are in an array - raise Unimplemented("Expanded value is not None or a string", (varname, value)) - return value - -## Looks up the variable and if it is unset it raises an error -def lookup_variable_inner_core(varname): - value = lookup_variable_inner_unsafe(varname) - if value is None and is_u_set(): - raise StuckExpansion("-u is set and variable was unset", varname) - return value - - -def lookup_variable_inner_unsafe(varname): - ## TODO: Is it in there? If we have -u and it is in there. - _type, value = config.config['shell_variables'].get(varname, [None, None]) - return value - -## This function checks if the -u flag is set -def is_u_set(): - ## This variable is set by pash and is exported and therefore will be in the variable file. - _type, value = config.config['shell_variables']["pash_previous_set_status"] - # log("Previous set status is:", value) - return "u" in value - - -def invalidate_variable(var, reason, config): - config['shell_variables'][var] = [None, InvalidVariable(var, reason)] - return config - -def try_string(expanded): - res = "" - for arg_char in expanded: - key, val = get_kv(arg_char) - - if key in ['C', 'E']: - res += chr(val) - elif key in ['Q']: - # TODO 2020-12-17 fields issues - res += try_string(val) - else: - raise StuckExpansion("left over control code", expanded, val) - - return res - -def try_set_variable(var, expanded, config): - str = try_string(expanded) - config['shell_variables'][var] = [None, str] - - return config - -## TODO: Replace this with an expansion that happens in the bash mirror -## -## TODO: If there is any potential side-effect, exit early -def expand_args(args, config, quoted = False): - res = [] - for arg in args: - new = expand_arg(arg, config, quoted = quoted) - - # expanded! add the string in - res.append(new) - - return split_args(res, config) - -def split_args(args, config): - _, ifs = lookup_variable("IFS", config) - - if ifs is None: - ifs = "\n\t " - - ifs = [ord(c) for c in ifs] - - res = [] - for arg in args: - cur = [] - - for c in arg: - (key, val) = c - if key == 'C' and val in ifs: - # split! - if len(cur) > 0: # TODO(mmg): or if val isn't IFS whitespace - res.append(cur) - cur = [] - else: - cur.append(c) - - if len(cur) > 0: - res.append(cur) - - return res - -def char_code(c): - type = "C" - - if c in "'\\\"()${}[]*?": - type = "E" - - return [type, ord(c)] - -def expand_arg(arg_chars, config, quoted = False): - # log("expanding arg", arg_chars) - # log("unparsed_string:", parse.pash_string_of_arg(arg_chars)) - res = [] - for arg_char in arg_chars: - new = expand_arg_char(arg_char, quoted, config) - - if isinstance(new, str): - res += [char_code(c) for c in list(new)] - else: - res.extend(new) - - return res - -def expand_arg_char(arg_char, quoted, config): - key, val = get_kv(arg_char) - if key == 'C': - if val in ['*', '?', '{', '}', '[', ']'] and not quoted: - raise Unimplemented("globbing", arg_char) - - return [arg_char] - elif key == 'E': - ## 2021-09-15 MMG Just guessing here - if val in ['*', '?', '{', '}', '[', ']'] and not quoted: - raise Unimplemented("globbing", arg_char) - return [arg_char] - elif key == 'T': - if val is None or val == "" or val == "None": - _type, val = lookup_variable("HOME", config) - - if isinstance(val, InvalidVariable): - raise StuckExpansion("HOME invalid for ~", arg_char, val) - elif val is None: - return "~" - else: - return val - else: - # TODO 2020-12-10 getpwnam - raise Unimplemented("~ with prefix", arg_char) - elif key == 'A': - # TODO 2020-12-10 arithmetic parser and evaluator - raise Unimplemented("arithmetic", arg_char) - elif key == 'Q': - return [['Q', expand_arg(val, config, quoted = True)]] - elif key == 'V': - fmt, null, var, arg = val - return expand_var(fmt, null, var, arg, quoted, config) - elif key == 'B': - # TODO 2020-12-10 run commands? - raise ImpureExpansion("command substitution", arg_char) - else: - raise Unimplemented("weird key", key) - -def expand_var(fmt, null, var, arg, quoted, config): - # TODO 2020-12-10 special variables - - _type, value = lookup_variable(var, config) - - log("Var:", var, "value:", value) - - if isinstance(value, InvalidVariable): - raise StuckExpansion("couldn't expand invalid variable", value) - - if fmt == 'Normal': - if value is None: - return "" - else: - return value - elif fmt == 'Length': - if value is None: - return "0" - else: - return str(len(value)) - elif fmt == 'Minus': - if value is None or (null and value == ""): - return expand_arg(arg, config, quoted = quoted) - else: - return value - elif fmt == 'Assign': - if value is None or (null and value == ""): - raise ImpureExpansion("assignment format on unset/null variable", value, arg) -# new = expand_arg(arg, config, quoted = quoted) -# config = try_set_variable(var, new, config) -# return new - else: - return value - elif fmt == 'Plus': - if value is None or (null and value == ""): - return "" - else: - return expand_arg(arg, config, quoted = quoted) - elif fmt == 'Question': - if value is None or (null and value == ""): - # TODO 2020-12-10 more context probably helpful here - raise EarlyError(expand_arg(arg, config, quoted = quoted)) - else: - return value - elif fmt in ['TrimR', 'TrimRMax', 'TrimL', 'TrimLMax']: - # TODO need patterns - raise Unimplemented("patterns", [fmt, null, var, arg]) - else: - raise ValueError("bad parameter format {}".format(fmt)) - -expand_cases = { - "Pipe": (lambda config: - lambda ast_node: expand_pipe(ast_node, config)), - "Command": (lambda config: - lambda ast_node: expand_simple(ast_node, config)), - "And": (lambda config: - lambda ast_node: expand_and_or_semi(ast_node, config)), - "Or": (lambda config: - lambda ast_node: expand_and_or_semi(ast_node, config)), - "Semi": (lambda config: - lambda ast_node: expand_and_or_semi(ast_node, config)), - "Redir": (lambda config: - lambda ast_node: expand_redir_subshell(ast_node, config)), - "Subshell": (lambda config: - lambda ast_node: expand_redir_subshell(ast_node, config)), - "Background": (lambda config: - lambda ast_node: expand_background(ast_node, config)), - "Defun": (lambda config: - lambda ast_node: expand_defun(ast_node, config)), - "For": (lambda config: - lambda ast_node: expand_for(ast_node, config)), - "While": (lambda config: - lambda ast_node: expand_while(ast_node, config)), - "Case": (lambda config: - lambda ast_node: expand_case(ast_node, config)), - "If": (lambda config: - lambda ast_node: expand_if(ast_node, config)) - } - -def expand_command(command, config): - # TODO 2020-11-24 MMG which commands are safe to run in advance? - # TODO 2020-11-24 MMG how do we differentiate it being safe to do nested expansions? - global expand_cases - return ast_to_ir.ast_match(command, expand_cases, config) - -def expand_pipe(node, config): - for i, n in enumerate(node.items): - # copy environment to simulate subshell (no outer effect) - node.items[i] = expand_command(n, copy.deepcopy(config)) - - return node - -def expand_simple(node, config): - # TODO 2020-11-25 MMG is this the order bash does? - node.redir_list = expand_redir_list(node.redir_list, config) - - if len(node.assignments) > 0: - raise ImpureExpansion('assignment', node.assignments) - - #settable = dict() - # - #for (i, [x, arg]) in enumerate(node.assignments): - # exp = expand_arg(arg, config) - # node.assignments[i] = [x, exp] - # - # # assignment visibility: - # # - # # assignments are immediately done when no command... - # if len(node.arguments) == 0: - # config = try_set_variable(x, exp, config) - # else: - # # or deferred until later when there is one - # settable[x] = exp - # - ## once all values are found, _then_ set them before the command - ## TODO 2020-11-25 if node.arguments[0] is a special builtin, these things are global - ## if not... then the settings are just for the command, and shouldn't go in the config - #for (x,exp) in settable: - # try_set_variable(x, exp, config) - - node.arguments = expand_args(node.arguments, config) - - return node - -def expand_redir_list(redir_list, config): - for (i, r) in enumerate(redir_list): - redir_list[i] = expand_redir(r, config) - - return redir_list - -def expand_redir(redirection, config): - redir_type = redirection[0] - redir_subtype = redirection[1][0] - stream_id = redirection[1][1] - file_arg = expand_arg(redirection[1][2], config) - - redirection[1][2] = file_arg - return redirection - -def expand_and_or_semi(node, config): - node.left_operand = expand_command(node.left_operand, config) - node.right_operand = expand_command(node.right_operand, config) - - return node - -def expand_redir_subshell(node, config): - # copy environment to simulate subshell (no outer effect) - node.node = expand_command(node.node, copy.deepcopy(config)) - - return node - -def expand_background(node, config): - # copy environment to simulate subshell (no outer effect) - node.node = expand_command(node.node, copy.deepcopy(config)) - - return node - -def expand_defun(node, config): - # TODO 2020-11-24 MMG invalidate postional args - node.body = expand_command(node.body, copy.deepcopy(config)) - - return node - -def expand_for(node, config): - node.argument = expand_arg(node.argument, config) - - # TODO 2020-11-24 if node.argument is fully expanded, we can just unroll the loop - config = invalidate_variable(node.variable, "variable of for loop", config) - node.body = expand_command(node.body, config) - - return node - -def expand_while(node, config): - node.test = expand_command(node.test, config) - node.body = expand_command(node.body, config) - - return node - -def expand_case(node, config): - # TODO 2020-11-24 preprocess scrutinee, each pattern, each case - - raise Unimplemented("case statements", node) - -def expand_if(node, config): - node.cond = expand_command(node.cond, config) - node.then_b = expand_command(node.then_b, config) - node.else_b = expand_command(node.else_b, config) - - return node diff --git a/compiler/speculative/util_spec.py b/compiler/speculative/util_spec.py index 9b06067eb..7783832fe 100644 --- a/compiler/speculative/util_spec.py +++ b/compiler/speculative/util_spec.py @@ -2,35 +2,24 @@ import os import config +from shell_ast.ast_util import * + ## ## This file contains utility functions useful for the speculative execution component ## -## TODO: There is a similar class in ir.py. Could we avoid duplication? -class IdGen: - def __init__(self, counter=0): - self.counter = counter - - def get_next_id(self): - new_id = self.counter - self.counter += 1 - return new_id - -## TODO: Should we move this to the trans_options class -## (which we could rename to trans_config) and make a subclass for -## the two different transformations. -ID_GENERATOR = IdGen() - def initialize(trans_options) -> None: ## Make the directory that contains the files in the partial order dir_path = partial_order_directory() os.makedirs(dir_path) - ## Initialize the po file - initialize_po_file(trans_options, dir_path) + # ## Initialize the po file + # initialize_po_file(trans_options, dir_path) def partial_order_directory() -> str: return f'{config.PASH_TMP_PREFIX}/speculative/partial_order/' +def partial_order_file_path(): + return f'{config.PASH_TMP_PREFIX}/speculative/partial_order_file' def initialize_po_file(trans_options, dir_path) -> None: ## Initializae the partial order file @@ -38,23 +27,75 @@ def initialize_po_file(trans_options, dir_path) -> None: f.write(f'# Partial order files path:\n') f.write(f'{dir_path}\n') -def get_next_id(): - global ID_GENERATOR - return ID_GENERATOR.get_next_id() +def scheduler_server_init_po_msg(partial_order_file: str) -> str: + return f'Init:{partial_order_file}' ## TODO: To support partial orders, we need to pass some more context here, ## i.e., the connections of this node. Now it assumes we have a sequence. def save_df_region(text_to_output: str, trans_options, df_region_id: int, predecessor_ids: int) -> None: + ## To support loops we also need to associate nodes with their surrounding loops + current_loop_context = trans_options.get_current_loop_context() + log("Df region:", df_region_id, "loop context:", current_loop_context) + + # Add the loop context to the partial_order state + trans_options.add_node_loop_context(df_region_id, current_loop_context) + # Save df_region as text in its own file df_region_path = f'{partial_order_directory()}/{df_region_id}' with open(df_region_path, "w") as f: f.write(text_to_output) - # Save the edges in the partial order file + ## Save the edges in the partial order state + for predecessor in predecessor_ids: + trans_options.add_edge(predecessor, df_region_id) + + + +## TODO: Figure out a way to put all serialization/deserialization of messages +## and parsing/unparsing in a specific module. + +## TODO: Move serialization to a partial_order_file.py +def serialize_edge(from_id: int, to_id: int) -> str: + return f'{from_id} -> {to_id}\n' + +def serialize_number_of_nodes(number_of_ids: int) -> str: + return f'{number_of_ids}\n' + +def serialize_loop_context(node_id: int, loop_contexts) -> str: + ## Galaxy brain serialization + loop_contexts_str = ",".join([str(loop_ctx) for loop_ctx in loop_contexts]) + return f'{node_id}-loop_ctx-{loop_contexts_str}\n' + +## TODO: Eventually we might want to retrieve the number_of_ids from trans_options +def save_number_of_nodes(trans_options): + number_of_ids = trans_options.get_number_of_ids() + partial_order_file_path = trans_options.get_partial_order_file() + with open(partial_order_file_path, "a") as po_file: + po_file.write(serialize_number_of_nodes(number_of_ids)) + +def save_loop_contexts(trans_options): + loop_context_dict = trans_options.get_all_loop_contexts() + log("Loop context dict:", loop_context_dict) partial_order_file_path = trans_options.get_partial_order_file() with open(partial_order_file_path, "a") as po_file: - for predecessor in predecessor_ids: - po_file.write(serialize_edge(predecessor, df_region_id)) + for node_id in sorted(loop_context_dict.keys()): + loop_ctx = loop_context_dict[node_id] + po_file.write(serialize_loop_context(node_id, loop_ctx)) -def serialize_edge(from_id, to_id): - return f'{from_id} -> {to_id}\n' \ No newline at end of file +def serialize_partial_order(trans_options): + ## Initialize the po file + dir_path = partial_order_directory() + initialize_po_file(trans_options, dir_path) + + ## Save the number of nodes + save_number_of_nodes(trans_options) + + ## Save loop contexts + save_loop_contexts(trans_options) + + # Save the edges in the partial order file + partial_order_file_path = trans_options.get_partial_order_file() + edges = trans_options.get_all_edges() + with open(partial_order_file_path, "a") as po_file: + for from_id, to_id in edges: + po_file.write(serialize_edge(from_id, to_id)) diff --git a/compiler/test_expansion.py b/compiler/test_expansion.py deleted file mode 100644 index ca33b2a21..000000000 --- a/compiler/test_expansion.py +++ /dev/null @@ -1,94 +0,0 @@ -import parse -import config -from shell_ast import expand -import json_ast - -import copy - -import os -import traceback - -TEST_PATH = "./tests/expansion" - -if not config.config: - config.load_config() -config.read_vars_file(os.path.join(TEST_PATH, "sample.env")) -#print(config.config) - -def load_ast(file): - return json_ast.parse_json_ast_string(parse.parse_shell(test)) - -print("Using parser {} to parser tests from {}".format(config.PARSER_BINARY, TEST_PATH)) - -tests = os.listdir(TEST_PATH) -tests = [test for test in tests if test.endswith(".sh")] -tests.sort() - -print("* Analysis tests ") - -def safety(b): - if b: - return "safe" - else: - return "unsafe" - -failures = set() -for test_name in tests: - test = os.path.join(TEST_PATH, test_name) - ast_objects = load_ast(test) - - expected_safe = test_name.startswith("safe") - for (i, ast_object) in enumerate(ast_objects): - is_safe = expand.safe_command(ast_object) - - if is_safe != expected_safe: - print("{} command #{} expected {} got {}".format(test_name, i, expected_safe, is_safe)) - failures.add(test_name) - -if len(failures) == 0: - print("All {} tests passed".format(len(tests))) -else: - print("{}/{} tests failed: {}".format(len(failures), len(tests), failures)) - -print("\n* Expansion tests") - -failures = set() -for test_name in tests: - test = os.path.join(TEST_PATH, test_name) - ast_objects = load_ast(test) - - expanded = os.path.join(TEST_PATH, test_name.replace("sh","expanded")) - expected_safe = os.path.exists(expanded) - for (i, ast_object) in enumerate(ast_objects): - try: - cmd = expand.expand_command(ast_object, copy.deepcopy(config.config)) - got = json_ast.ast_to_shell(cmd, verbose=False) - - # ??? MMG 2020-12-17 unsure about fixing the pretty printing (which may need these backslashes!) - got = got.replace("\\'", "'") - - if not expected_safe: - print("Unexpected success in", test_name) - print(got) - failures.add(test_name) - else: - expected = open(expanded).read() - - if got != expected: - print("Expected:\n\t",expected,"Got:\n\t",got) - failures.add(test_name) - except (expand.EarlyError, expand.StuckExpansion,expand.Unimplemented) as e: - if expected_safe: - print("Found unexpected failure in", test_name) - print("Error:", traceback.format_exc()) - failures.add(test_name) - else: - print("Found expected failure in", test_name) - except Exception as e: - print("Error:", traceback.format_exc()) - failures.add(test_name) - -if len(failures) == 0: - print("All {} tests passed".format(len(tests))) -else: - print("{}/{} tests failed: {}".format(len(failures), len(tests), failures)) diff --git a/compiler/tests/expansion/invalidate0.sh b/compiler/tests/expansion/invalidate0.sh deleted file mode 100644 index 2dfb15557..000000000 --- a/compiler/tests/expansion/invalidate0.sh +++ /dev/null @@ -1,7 +0,0 @@ -: ${x=a}; echo $x;\ -for x in b c d -do - echo $x -done;\ -: ${x=e};\ -echo $x diff --git a/compiler/tests/expansion/invalidate1.sh b/compiler/tests/expansion/invalidate1.sh deleted file mode 100644 index 9412543e1..000000000 --- a/compiler/tests/expansion/invalidate1.sh +++ /dev/null @@ -1,7 +0,0 @@ -: ${x=a}; echo $x;\ -for x in b c d -do - echo $x -done;\ -x=e;\ -echo $x diff --git a/compiler/tests/expansion/safe0.expanded b/compiler/tests/expansion/safe0.expanded deleted file mode 100644 index 4a3e55b00..000000000 --- a/compiler/tests/expansion/safe0.expanded +++ /dev/null @@ -1 +0,0 @@ -echo nothing to expand diff --git a/compiler/tests/expansion/safe0.sh b/compiler/tests/expansion/safe0.sh deleted file mode 100644 index 4a3e55b00..000000000 --- a/compiler/tests/expansion/safe0.sh +++ /dev/null @@ -1 +0,0 @@ -echo nothing to expand diff --git a/compiler/tests/expansion/safe1.expanded b/compiler/tests/expansion/safe1.expanded deleted file mode 100644 index 0718ef1c4..000000000 --- a/compiler/tests/expansion/safe1.expanded +++ /dev/null @@ -1 +0,0 @@ -echo '/Users/mgree' is always safe diff --git a/compiler/tests/expansion/safe1.sh b/compiler/tests/expansion/safe1.sh deleted file mode 100644 index e213fee69..000000000 --- a/compiler/tests/expansion/safe1.sh +++ /dev/null @@ -1 +0,0 @@ -echo ~ is always safe diff --git a/compiler/tests/expansion/safe2.expanded b/compiler/tests/expansion/safe2.expanded deleted file mode 100644 index f5f9bca8b..000000000 --- a/compiler/tests/expansion/safe2.expanded +++ /dev/null @@ -1 +0,0 @@ -echo quoting safe stuff is safe diff --git a/compiler/tests/expansion/safe2.sh b/compiler/tests/expansion/safe2.sh deleted file mode 100644 index 2d66d108d..000000000 --- a/compiler/tests/expansion/safe2.sh +++ /dev/null @@ -1 +0,0 @@ -echo "quoting safe stuff is safe" diff --git a/compiler/tests/expansion/safe3.sh b/compiler/tests/expansion/safe3.sh deleted file mode 100644 index bfc7420b1..000000000 --- a/compiler/tests/expansion/safe3.sh +++ /dev/null @@ -1 +0,0 @@ -echo $((2 + 2)) = 4, safely diff --git a/compiler/tests/expansion/safe4.expanded b/compiler/tests/expansion/safe4.expanded deleted file mode 100644 index 3cfff43b4..000000000 --- a/compiler/tests/expansion/safe4.expanded +++ /dev/null @@ -1 +0,0 @@ -echo '/Users/mgree/pash/compiler' is fine to show diff --git a/compiler/tests/expansion/safe4.sh b/compiler/tests/expansion/safe4.sh deleted file mode 100644 index 5f9175015..000000000 --- a/compiler/tests/expansion/safe4.sh +++ /dev/null @@ -1 +0,0 @@ -echo ${PWD} is fine to show diff --git a/compiler/tests/expansion/safe5.sh b/compiler/tests/expansion/safe5.sh deleted file mode 100644 index 50ea6bc81..000000000 --- a/compiler/tests/expansion/safe5.sh +++ /dev/null @@ -1 +0,0 @@ -echo ${#PWD} is also cool, as is ${x-default} and ${x+alt} and and ${x%%a*} ${x%a*} ${x#a*} ${x##a*} diff --git a/compiler/tests/expansion/safe6.expanded b/compiler/tests/expansion/safe6.expanded deleted file mode 100644 index cbeebd59b..000000000 --- a/compiler/tests/expansion/safe6.expanded +++ /dev/null @@ -1 +0,0 @@ -x=5 ; { x=6 ; echo 6 ; } | { x=7 ; echo 7 ; } diff --git a/compiler/tests/expansion/safe6.sh b/compiler/tests/expansion/safe6.sh deleted file mode 100644 index 6518f9039..000000000 --- a/compiler/tests/expansion/safe6.sh +++ /dev/null @@ -1 +0,0 @@ -x=5 ; { x=6 ; echo $x; } | { x=7; echo $x; } diff --git a/compiler/tests/expansion/safe7.expanded b/compiler/tests/expansion/safe7.expanded deleted file mode 100644 index de20cf5ed..000000000 --- a/compiler/tests/expansion/safe7.expanded +++ /dev/null @@ -1 +0,0 @@ -echo 28 is also cool, as is default and and set now and set now diff --git a/compiler/tests/expansion/safe7.sh b/compiler/tests/expansion/safe7.sh deleted file mode 100644 index aa6b8d1b2..000000000 --- a/compiler/tests/expansion/safe7.sh +++ /dev/null @@ -1 +0,0 @@ -echo ${#PWD} is also cool, as is ${x-default} and "${x+alt}" and ${x=set now} and ${x?won\'t run} diff --git a/compiler/tests/expansion/sample.env b/compiler/tests/expansion/sample.env deleted file mode 100644 index 7e5e70c35..000000000 --- a/compiler/tests/expansion/sample.env +++ /dev/null @@ -1,41 +0,0 @@ -export Apple_PubSub_Socket_Render='/private/tmp/com.apple.launchd.62jEnrZY3y/Render' -export BC_ENV_ARGS='--quiet /Users/mgree/.bcrc' -export CAML_LD_LIBRARY_PATH='/Users/mgree/.opam/4.10.0/lib/stublibs:/Users/mgree/.opam/4.10.0/lib/ocaml/stublibs:/Users/mgree/.opam/4.10.0/lib/ocaml' -export CLICOLOR='' -export EDITOR='open -Wnt' -export GEM_HOME='/Users/mgree/.rvm/gems/ruby-2.7.0' -export GEM_PATH='/Users/mgree/.rvm/gems/ruby-2.7.0:/Users/mgree/.rvm/gems/ruby-2.7.0@global' -export HOME='/Users/mgree' -export IRBRC='/Users/mgree/.rvm/rubies/ruby-2.7.0/.irbrc' -export LANG='en_US.UTF-8' -export LOGNAME='mgree' -export MAKEFLAGS='-j 8' -export MANPATH='/usr/man:/usr/local/man:' -export MY_RUBY_HOME='/Users/mgree/.rvm/rubies/ruby-2.7.0' -export OCAML_TOPLEVEL_PATH='/Users/mgree/.opam/4.10.0/lib/toplevel' -export OPAM_SWITCH_PREFIX='/Users/mgree/.opam/4.10.0' -export PASH_PARSER='/Users/mgree/pash/parser/parse_to_json.native' -export PASH_TOP='/Users/mgree/pash' -export PATH='/Users/mgree/.rvm/gems/ruby-2.7.0/bin:/Users/mgree/.rvm/gems/ruby-2.7.0@global/bin:/Users/mgree/.rvm/rubies/ruby-2.7.0/bin:/Users/mgree/.cargo/bin:/Users/mgree/.opam/4.10.0/bin:/Users/mgree/.stack/snapshots/x86_64-osx/a7727f931711366e52e16daa7653e66fd32afad1c4cfebb2d03a12202d4a33a1/8.8.3/bin:/Users/mgree/.stack/compiler-tools/x86_64-osx/ghc-8.8.3/bin:/Users/mgree/.stack/programs/x86_64-osx/ghc-8.8.3/bin:/Users/mgree/.local/bin:/usr/local/texlive/2017/bin/x86_64-darwin:/usr/local/opt/sqlite/bin:/usr/local/Cellar/llvm/6.0.0/bin:/Applications/Visual Studio Code.app/Contents/Resources/app/bin/:/Applications/Racket v7.1/bin/:/usr/local/opt/python/libexec/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/Library/TeX/texbin:/Users/mgree/.rvm/bin' -export PS1='$ ' -export PWD='/Users/mgree/pash/compiler' -export PYTHONIOENCODING='utf8' -export RAN_PROFILE='1' -export RUBY_VERSION='ruby-2.7.0' -export RUST_SRC_PATH='/Users/mgree/.rustup/toolchains/stable-x86_64-apple-darwin/lib/rustlib/src/rust/src' -export SDKROOT='/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk' -export SHELL='/bin/bash' -export SHLVL='1' -export SSH_AUTH_SOCK='/private/tmp/com.apple.launchd.tWxsl4s4CG/Listeners' -export TERM='xterm-256color' -export TERM_PROGRAM='Apple_Terminal' -export TERM_PROGRAM_VERSION='404.1' -export TERM_SESSION_ID='2C6BA681-ECE7-4DF2-9186-7E6580DB8BB3' -export TMPDIR='/var/folders/gg/bcglb26n7cj18q50d00380tc0000gn/T/' -export USER='mgree' -export XPC_FLAGS='0x0' -export XPC_SERVICE_NAME='0' -export rvm_bin_path='/Users/mgree/.rvm/bin' -export rvm_path='/Users/mgree/.rvm' -export rvm_prefix='/Users/mgree' -export rvm_version='1.29.10 (latest)' diff --git a/compiler/tests/expansion/unsafe0.expanded b/compiler/tests/expansion/unsafe0.expanded deleted file mode 100644 index f864d6d7f..000000000 --- a/compiler/tests/expansion/unsafe0.expanded +++ /dev/null @@ -1 +0,0 @@ -echo uhoh diff --git a/compiler/tests/expansion/unsafe0.sh b/compiler/tests/expansion/unsafe0.sh deleted file mode 100644 index 9d7ef33a5..000000000 --- a/compiler/tests/expansion/unsafe0.sh +++ /dev/null @@ -1 +0,0 @@ -echo ${x=uhoh} diff --git a/compiler/tests/expansion/unsafe1.sh b/compiler/tests/expansion/unsafe1.sh deleted file mode 100644 index a60c58c71..000000000 --- a/compiler/tests/expansion/unsafe1.sh +++ /dev/null @@ -1 +0,0 @@ -echo $((x=2)) diff --git a/compiler/tests/expansion/unsafe2.sh b/compiler/tests/expansion/unsafe2.sh deleted file mode 100644 index 475e688c4..000000000 --- a/compiler/tests/expansion/unsafe2.sh +++ /dev/null @@ -1 +0,0 @@ -echo ${nonesuch?uhoh} is unsafe diff --git a/compiler/tests/variable_parse/test-shlex-aux.sh b/compiler/tests/variable_parse/test-shlex-aux.sh deleted file mode 100644 index 50857edd5..000000000 --- a/compiler/tests/variable_parse/test-shlex-aux.sh +++ /dev/null @@ -1,9 +0,0 @@ -comment_fun() -{ - cat > /dev/null #Consume data from pipe so writers don't get SIGPIPE -} - -bad_quote_fun() -{ - echo ${asf"asd} -} diff --git a/compiler/tests/variable_parse/test_shlex.py b/compiler/tests/variable_parse/test_shlex.py deleted file mode 100644 index 0bc8acb4f..000000000 --- a/compiler/tests/variable_parse/test_shlex.py +++ /dev/null @@ -1,66 +0,0 @@ -import os -import shlex -import subprocess -import sys - -GIT_TOP_CMD = [ 'git', 'rev-parse', '--show-toplevel', '--show-superproject-working-tree'] -if 'PASH_TOP' in os.environ: - PASH_TOP = os.environ['PASH_TOP'] -else: - PASH_TOP = subprocess.run(GIT_TOP_CMD, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True).stdout.rstrip() - -sys.path.append(os.path.join(PASH_TOP, "compiler")) - -## First set the tmp prefix because config needs it -os.environ['PASH_TMP_PREFIX'] = '/tmp' - -import config - - -def find_bug_with_first_output(): - with open("output.out") as f: - data = f.read() - - lines = data.split("\n") - - # 1031 - # 1315 - # 1397 - ## 1420 - target_data = "\n".join(lines[1431:1432]) - # target_data = data - print("data:", target_data) - tokens = shlex.split(target_data, comments=True, posix=True) - print("Done") - print(tokens) - -def find_bug_with_second_output(): - with open("output9.out") as f: - data = f.read() - - lines = data.split("\n") - - - target_data = "\n".join(lines[3453:3454]) - # target_data = data - print("data:", target_data) - tokens = shlex.split(target_data, comments=True, posix=False) - print("Done") - print(tokens) - -# find_bug_with_second_output() - -def test_var_file_read(): - ## Set some config state - class Object: - pass - - config.pash_args = Object() - config.pash_args.debug = 1 - config.pash_args.log_file = "" - filename = "test-shlex-aux.sh" - - config.read_vars_file(filename) - - -test_var_file_read() \ No newline at end of file diff --git a/compiler/util.py b/compiler/util.py index a09a24da7..2c131e0f7 100644 --- a/compiler/util.py +++ b/compiler/util.py @@ -1,5 +1,6 @@ from datetime import timedelta import functools +import logging from typing import Optional, TypeVar, Union, List, Any TType = TypeVar("TType") import os @@ -44,18 +45,13 @@ def wrapper(*args, **kwargs): return decorator ## This is a wrapper for prints -## -## TODO: Extend the configuration to allow for custom file to output PaSh log. This would -## allow us to not pollute the .time files. def log(*args, end='\n', level=1): ## If the debug logging level is at least ## as high as this log message. - if (config.DEBUG_LEVEL >= level): - if(config.LOG_FILE == ""): - print(config.LOGGING_PREFIX, *args, file=sys.stderr, end=end, flush=True) - else: - with open(config.LOG_FILE, "a") as f: - print(config.LOGGING_PREFIX, *args, file=f, end=end, flush=True) + ## TODO: Allow all levels + if level >= 1: + concatted_args = " ".join([str(a) for a in list(args)]) + logging.info(f'{config.LOGGING_PREFIX} {concatted_args}') def ptempfile(): fd, name = tempfile.mkstemp(dir=config.PASH_TMP_PREFIX) diff --git a/evaluation/tests/agg/fix-test-124 b/evaluation/tests/agg/fix-test-124 deleted file mode 100755 index 7e475e19f..000000000 --- a/evaluation/tests/agg/fix-test-124 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk '{$1=$1};1' > out diff --git a/evaluation/tests/agg/fix-test-127 b/evaluation/tests/agg/fix-test-127 deleted file mode 100755 index 5fe682577..000000000 --- a/evaluation/tests/agg/fix-test-127 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk '{print $2, $0}' > out diff --git a/evaluation/tests/agg/fix-test-128 b/evaluation/tests/agg/fix-test-128 deleted file mode 100755 index 5fe682577..000000000 --- a/evaluation/tests/agg/fix-test-128 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk '{print $2, $0}' > out diff --git a/evaluation/tests/agg/fix-test-154 b/evaluation/tests/agg/fix-test-154 deleted file mode 100755 index afff41e21..000000000 --- a/evaluation/tests/agg/fix-test-154 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep "\"" > out diff --git a/evaluation/tests/agg/fix-test-156 b/evaluation/tests/agg/fix-test-156 deleted file mode 100755 index ae11dcb66..000000000 --- a/evaluation/tests/agg/fix-test-156 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep Ritchie > out diff --git a/evaluation/tests/agg/fix-test-157 b/evaluation/tests/agg/fix-test-157 deleted file mode 100755 index 04c2b9aae..000000000 --- a/evaluation/tests/agg/fix-test-157 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep 'UNIX' > out diff --git a/evaluation/tests/agg/fix-test-159 b/evaluation/tests/agg/fix-test-159 deleted file mode 100755 index 5f60b5531..000000000 --- a/evaluation/tests/agg/fix-test-159 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep '\"' > out diff --git a/evaluation/tests/agg/fix-test-161 b/evaluation/tests/agg/fix-test-161 deleted file mode 100755 index 77341431b..000000000 --- a/evaluation/tests/agg/fix-test-161 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep 1969 > out diff --git a/evaluation/tests/agg/fix-test-162 b/evaluation/tests/agg/fix-test-162 deleted file mode 100755 index ded56cc50..000000000 --- a/evaluation/tests/agg/fix-test-162 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep 'AT&T' > out diff --git a/evaluation/tests/agg/fix-test-186 b/evaluation/tests/agg/fix-test-186 deleted file mode 100755 index 2f8529907..000000000 --- a/evaluation/tests/agg/fix-test-186 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr -cs '[a-z][A-Z]' '' > out diff --git a/evaluation/tests/agg/fix-test-191 b/evaluation/tests/agg/fix-test-191 deleted file mode 100755 index 9aba78f3a..000000000 --- a/evaluation/tests/agg/fix-test-191 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr -s ' ' '' > out diff --git a/evaluation/tests/agg/fix-test-22 b/evaluation/tests/agg/fix-test-22 deleted file mode 100755 index 974854f03..000000000 --- a/evaluation/tests/agg/fix-test-22 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep 'light.\*light' > out diff --git a/evaluation/tests/agg/fix-test-23 b/evaluation/tests/agg/fix-test-23 deleted file mode 100755 index b0fb0483b..000000000 --- a/evaluation/tests/agg/fix-test-23 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep 'the land of' > out diff --git a/evaluation/tests/agg/fix-test-33 b/evaluation/tests/agg/fix-test-33 deleted file mode 100755 index 6122388e4..000000000 --- a/evaluation/tests/agg/fix-test-33 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep gz > out diff --git a/evaluation/tests/agg/fix-test-60 b/evaluation/tests/agg/fix-test-60 deleted file mode 100755 index c7edcbdac..000000000 --- a/evaluation/tests/agg/fix-test-60 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr -c "[a-z][A-Z]" '' > out diff --git a/evaluation/tests/agg/fix-test-8 b/evaluation/tests/agg/fix-test-8 deleted file mode 100755 index 234289d0f..000000000 --- a/evaluation/tests/agg/fix-test-8 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | col -bx > out diff --git a/evaluation/tests/agg/fix-test-85 b/evaluation/tests/agg/fix-test-85 deleted file mode 100755 index 1e61e0eb2..000000000 --- a/evaluation/tests/agg/fix-test-85 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | col > out diff --git a/evaluation/tests/agg/run.sh b/evaluation/tests/agg/run.sh deleted file mode 100755 index 6b5a77ed8..000000000 --- a/evaluation/tests/agg/run.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash - -export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} -# time: print real in seconds, to simplify parsing - -bash="bash" -pash="$PASH_TOP/pa.sh --r_split_batch_size 1000000 --parallel_pipelines --profile_driven" - -output_dir="$PASH_TOP/evaluation/tests/agg/output" -rm -rf "$output_dir" -mkdir -p "$output_dir" - -run_test() -{ - local test=$1 - echo -n "Running $test..." - TIMEFORMAT="${test%%.*}:%3R" - { time $bash $test > "$output_dir/$test.bash.out"; } 2>> $output_dir/results.time_bash - test_bash_ec=$? - TIMEFORMAT="%3R" - { time $pash "$test" > "$output_dir/$test.pash.out"; } 2>> $output_dir/results.time_pash - test_pash_ec=$? - diff "$output_dir/$test.bash.out" "$output_dir/$test.pash.out" - test_diff_ec=$? - - ## Check if the two exit codes are both success or both error - { [ $test_bash_ec -eq 0 ] && [ $test_pash_ec -eq 0 ]; } || { [ $test_bash_ec -ne 0 ] && [ $test_pash_ec -ne 0 ]; } - test_ec=$? - if [ $test_diff_ec -ne 0 ]; then - echo -n "$test output mismatch " - fi - if [ $test_ec -ne 0 ]; then - echo -n "$test exit code mismatch " - fi - if [ $test_diff_ec -ne 0 ] || [ $test_ec -ne 0 ]; then - echo "$test are not identical" >> $output_dir/result_status - echo -e '\t\tFAIL' - return 1 - else - echo "$test are identical" >> $output_dir/result_status - echo -e '\t\tOK' - return 0 - fi -} -## We run all tests composed with && to exit on the first that fails -for testname in `ls test-* -v` -do - run_test "$testname" -done - -if type lsb_release >/dev/null 2>&1 ; then - distro=$(lsb_release -i -s) -elif [ -e /etc/os-release ] ; then - distro=$(awk -F= '$1 == "ID" {print $2}' /etc/os-release) -fi - -distro=$(printf '%s\n' "$distro" | LC_ALL=C tr '[:upper:]' '[:lower:]') -# now do different things depending on distro -case "$distro" in - freebsd*) - # change sed to gsed - sed () { - gsed $@ - } - ;; - *) - ;; -esac - -echo "group,Bash,Pash" > $output_dir/results.time -paste $output_dir/results.time_* | sed 's\,\.\g' | sed 's\:\,\g' | sed 's/\t/,/' >> $output_dir/results.time - -echo "Below follow the identical outputs:" -grep "are identical" "$output_dir"/result_status | awk '{print $1}' - -echo "Below follow the non-identical outputs:" -grep "are not identical" "$output_dir"/result_status | awk '{print $1}' - -TOTAL_TESTS=$(cat "$output_dir"/result_status | wc -l) -PASSED_TESTS=$(grep -c "are identical" "$output_dir"/result_status) -echo "Summary: ${PASSED_TESTS}/${TOTAL_TESTS} tests passed." diff --git a/evaluation/tests/agg/test-1 b/evaluation/tests/agg/test-1 deleted file mode 100755 index d3acfdbad..000000000 --- a/evaluation/tests/agg/test-1 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk "\$1 == \$2 {print \$2, \$3}" > out diff --git a/evaluation/tests/agg/test-10 b/evaluation/tests/agg/test-10 deleted file mode 100755 index a1115a7d0..000000000 --- a/evaluation/tests/agg/test-10 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -c 89-92 > out diff --git a/evaluation/tests/agg/test-100 b/evaluation/tests/agg/test-100 deleted file mode 100755 index 44166d930..000000000 --- a/evaluation/tests/agg/test-100 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -h > out diff --git a/evaluation/tests/agg/test-101 b/evaluation/tests/agg/test-101 deleted file mode 100755 index 3fe1cac28..000000000 --- a/evaluation/tests/agg/test-101 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -k2n > out diff --git a/evaluation/tests/agg/test-102 b/evaluation/tests/agg/test-102 deleted file mode 100755 index f8ab1f9bb..000000000 --- a/evaluation/tests/agg/test-102 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -t ':' -k2n - > out diff --git a/evaluation/tests/agg/test-103 b/evaluation/tests/agg/test-103 deleted file mode 100755 index c01d54699..000000000 --- a/evaluation/tests/agg/test-103 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tac > out diff --git a/evaluation/tests/agg/test-104 b/evaluation/tests/agg/test-104 deleted file mode 100755 index cae123d67..000000000 --- a/evaluation/tests/agg/test-104 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr [:upper:] [:lower:] > out diff --git a/evaluation/tests/agg/test-105 b/evaluation/tests/agg/test-105 deleted file mode 100755 index cfed29c3c..000000000 --- a/evaluation/tests/agg/test-105 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr [:lower:] [:upper:] | tr -s [:space:] | tr -d [:digit:] > out diff --git a/evaluation/tests/agg/test-106 b/evaluation/tests/agg/test-106 deleted file mode 100755 index a59f21887..000000000 --- a/evaluation/tests/agg/test-106 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | uniq > out diff --git a/evaluation/tests/agg/test-107 b/evaluation/tests/agg/test-107 deleted file mode 100755 index c0c77e0df..000000000 --- a/evaluation/tests/agg/test-107 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | uniq -c > out diff --git a/evaluation/tests/agg/test-108 b/evaluation/tests/agg/test-108 deleted file mode 100755 index 820c7edbf..000000000 --- a/evaluation/tests/agg/test-108 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | uniq -d > out diff --git a/evaluation/tests/agg/test-109 b/evaluation/tests/agg/test-109 deleted file mode 100755 index 0b1ad5084..000000000 --- a/evaluation/tests/agg/test-109 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc > out diff --git a/evaluation/tests/agg/test-11 b/evaluation/tests/agg/test-11 deleted file mode 100755 index 18f28cb97..000000000 --- a/evaluation/tests/agg/test-11 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -d ' ' -f9 > out diff --git a/evaluation/tests/agg/test-110 b/evaluation/tests/agg/test-110 deleted file mode 100755 index 917285b80..000000000 --- a/evaluation/tests/agg/test-110 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -c > out diff --git a/evaluation/tests/agg/test-111 b/evaluation/tests/agg/test-111 deleted file mode 100755 index d40093319..000000000 --- a/evaluation/tests/agg/test-111 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -cl > out diff --git a/evaluation/tests/agg/test-113 b/evaluation/tests/agg/test-113 deleted file mode 100755 index 109ac7a68..000000000 --- a/evaluation/tests/agg/test-113 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -cw > out diff --git a/evaluation/tests/agg/test-114 b/evaluation/tests/agg/test-114 deleted file mode 100755 index 102c6ee34..000000000 --- a/evaluation/tests/agg/test-114 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -cwm > out diff --git a/evaluation/tests/agg/test-115 b/evaluation/tests/agg/test-115 deleted file mode 100755 index 77fd0c397..000000000 --- a/evaluation/tests/agg/test-115 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -l > out diff --git a/evaluation/tests/agg/test-116 b/evaluation/tests/agg/test-116 deleted file mode 100755 index caaae3fdd..000000000 --- a/evaluation/tests/agg/test-116 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -lw > out diff --git a/evaluation/tests/agg/test-117 b/evaluation/tests/agg/test-117 deleted file mode 100755 index e2eb0e6ad..000000000 --- a/evaluation/tests/agg/test-117 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -m > out diff --git a/evaluation/tests/agg/test-118 b/evaluation/tests/agg/test-118 deleted file mode 100755 index 227268fb6..000000000 --- a/evaluation/tests/agg/test-118 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -w > out diff --git a/evaluation/tests/agg/test-12 b/evaluation/tests/agg/test-12 deleted file mode 100755 index f231a69c8..000000000 --- a/evaluation/tests/agg/test-12 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -d ',' -f 1 > out diff --git a/evaluation/tests/agg/test-120 b/evaluation/tests/agg/test-120 deleted file mode 100755 index 223f430e9..000000000 --- a/evaluation/tests/agg/test-120 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -wm > out diff --git a/evaluation/tests/agg/test-121 b/evaluation/tests/agg/test-121 deleted file mode 100755 index 28883591c..000000000 --- a/evaluation/tests/agg/test-121 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | wc -wml > out diff --git a/evaluation/tests/agg/test-122 b/evaluation/tests/agg/test-122 deleted file mode 100755 index f8a5776c4..000000000 --- a/evaluation/tests/agg/test-122 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | xargs > out diff --git a/evaluation/tests/agg/test-123 b/evaluation/tests/agg/test-123 deleted file mode 100755 index 30e84f91a..000000000 --- a/evaluation/tests/agg/test-123 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | xargs -n 2 > out diff --git a/evaluation/tests/agg/test-125 b/evaluation/tests/agg/test-125 deleted file mode 100755 index eca0b063f..000000000 --- a/evaluation/tests/agg/test-125 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk 'length >= 16' > out diff --git a/evaluation/tests/agg/test-129 b/evaluation/tests/agg/test-129 deleted file mode 100755 index bdf3080b3..000000000 --- a/evaluation/tests/agg/test-129 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cat > out diff --git a/evaluation/tests/agg/test-13 b/evaluation/tests/agg/test-13 deleted file mode 100755 index aeee419b8..000000000 --- a/evaluation/tests/agg/test-13 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -d ',' -f 1,2 > out diff --git a/evaluation/tests/agg/test-130 b/evaluation/tests/agg/test-130 deleted file mode 100755 index a6a68f82a..000000000 --- a/evaluation/tests/agg/test-130 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -c1 > out diff --git a/evaluation/tests/agg/test-131 b/evaluation/tests/agg/test-131 deleted file mode 100755 index 1047620af..000000000 --- a/evaluation/tests/agg/test-131 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -c 1-1 > out diff --git a/evaluation/tests/agg/test-14 b/evaluation/tests/agg/test-14 deleted file mode 100755 index 24e633c5a..000000000 --- a/evaluation/tests/agg/test-14 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -d ',' -f 1,2,4 > out diff --git a/evaluation/tests/agg/test-140 b/evaluation/tests/agg/test-140 deleted file mode 100755 index 3023da6df..000000000 --- a/evaluation/tests/agg/test-140 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -d "\"" -f 2 > out diff --git a/evaluation/tests/agg/test-149 b/evaluation/tests/agg/test-149 deleted file mode 100755 index 25bc2279a..000000000 --- a/evaluation/tests/agg/test-149 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -f 2 > out diff --git a/evaluation/tests/agg/test-150 b/evaluation/tests/agg/test-150 deleted file mode 100755 index 5a5d7a0f0..000000000 --- a/evaluation/tests/agg/test-150 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -f2,5 > out diff --git a/evaluation/tests/agg/test-152 b/evaluation/tests/agg/test-152 deleted file mode 100755 index 7a4c0cebc..000000000 --- a/evaluation/tests/agg/test-152 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | fmt -w1 > out diff --git a/evaluation/tests/agg/test-153 b/evaluation/tests/agg/test-153 deleted file mode 100755 index d24811b48..000000000 --- a/evaluation/tests/agg/test-153 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -v '[KQRBN]' > out diff --git a/evaluation/tests/agg/test-160 b/evaluation/tests/agg/test-160 deleted file mode 100755 index 597f417e7..000000000 --- a/evaluation/tests/agg/test-160 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep '\.' > out diff --git a/evaluation/tests/agg/test-163 b/evaluation/tests/agg/test-163 deleted file mode 100755 index e54208f10..000000000 --- a/evaluation/tests/agg/test-163 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep '[A-Z]' > out diff --git a/evaluation/tests/agg/test-165 b/evaluation/tests/agg/test-165 deleted file mode 100755 index dc9c67abc..000000000 --- a/evaluation/tests/agg/test-165 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep '[KQRBN]' > out diff --git a/evaluation/tests/agg/test-17 b/evaluation/tests/agg/test-17 deleted file mode 100755 index 143802fcc..000000000 --- a/evaluation/tests/agg/test-17 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -d ',' -f 3,1 > out diff --git a/evaluation/tests/agg/test-170 b/evaluation/tests/agg/test-170 deleted file mode 100755 index b6376d694..000000000 --- a/evaluation/tests/agg/test-170 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | paste - - > out diff --git a/evaluation/tests/agg/test-175 b/evaluation/tests/agg/test-175 deleted file mode 100755 index ee6e0692a..000000000 --- a/evaluation/tests/agg/test-175 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 5d > out diff --git a/evaluation/tests/agg/test-176 b/evaluation/tests/agg/test-176 deleted file mode 100755 index d98ddbc49..000000000 --- a/evaluation/tests/agg/test-176 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed s/\$/'0s'/ > out diff --git a/evaluation/tests/agg/test-177 b/evaluation/tests/agg/test-177 deleted file mode 100755 index 0615010ac..000000000 --- a/evaluation/tests/agg/test-177 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -n > out diff --git a/evaluation/tests/agg/test-178 b/evaluation/tests/agg/test-178 deleted file mode 100755 index 52a6aa537..000000000 --- a/evaluation/tests/agg/test-178 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -nr > out diff --git a/evaluation/tests/agg/test-179 b/evaluation/tests/agg/test-179 deleted file mode 100755 index a86bb2764..000000000 --- a/evaluation/tests/agg/test-179 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -nr -k 2 > out diff --git a/evaluation/tests/agg/test-18 b/evaluation/tests/agg/test-18 deleted file mode 100755 index 6d5297d41..000000000 --- a/evaluation/tests/agg/test-18 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cut -d: -f1 > out diff --git a/evaluation/tests/agg/test-180 b/evaluation/tests/agg/test-180 deleted file mode 100755 index 5f886bc80..000000000 --- a/evaluation/tests/agg/test-180 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -r > out diff --git a/evaluation/tests/agg/test-181 b/evaluation/tests/agg/test-181 deleted file mode 100755 index b66bbb7c3..000000000 --- a/evaluation/tests/agg/test-181 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort > out diff --git a/evaluation/tests/agg/test-182 b/evaluation/tests/agg/test-182 deleted file mode 100755 index 8c3664d88..000000000 --- a/evaluation/tests/agg/test-182 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tail -n 1 > out diff --git a/evaluation/tests/agg/test-187 b/evaluation/tests/agg/test-187 deleted file mode 100755 index f5fe282a6..000000000 --- a/evaluation/tests/agg/test-187 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr -d ',' > out diff --git a/evaluation/tests/agg/test-192 b/evaluation/tests/agg/test-192 deleted file mode 100755 index c3444f9f7..000000000 --- a/evaluation/tests/agg/test-192 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr '[A-Z]' '[a-z]' > out diff --git a/evaluation/tests/agg/test-2 b/evaluation/tests/agg/test-2 deleted file mode 100755 index 9ea463679..000000000 --- a/evaluation/tests/agg/test-2 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk "\$1 >= 1000" > out diff --git a/evaluation/tests/agg/test-21 b/evaluation/tests/agg/test-21 deleted file mode 100755 index 71135185a..000000000 --- a/evaluation/tests/agg/test-21 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > out diff --git a/evaluation/tests/agg/test-24 b/evaluation/tests/agg/test-24 deleted file mode 100755 index 39a47d8ea..000000000 --- a/evaluation/tests/agg/test-24 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -c '^....$' > out diff --git a/evaluation/tests/agg/test-25 b/evaluation/tests/agg/test-25 deleted file mode 100755 index 90ad2f79d..000000000 --- a/evaluation/tests/agg/test-25 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -c '^[A-Z]' > out diff --git a/evaluation/tests/agg/test-26 b/evaluation/tests/agg/test-26 deleted file mode 100755 index 2559cf953..000000000 --- a/evaluation/tests/agg/test-26 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -c 'light.\*light' > out diff --git a/evaluation/tests/agg/test-27 b/evaluation/tests/agg/test-27 deleted file mode 100755 index b6aa6e66c..000000000 --- a/evaluation/tests/agg/test-27 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -c 'light.\*light.\*light' > out diff --git a/evaluation/tests/agg/test-28 b/evaluation/tests/agg/test-28 deleted file mode 100755 index 6af2f98d9..000000000 --- a/evaluation/tests/agg/test-28 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -i '^[^aeiou]*[aeiou][^aeiou]*$' > out diff --git a/evaluation/tests/agg/test-3 b/evaluation/tests/agg/test-3 deleted file mode 100755 index 9e38c6711..000000000 --- a/evaluation/tests/agg/test-3 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk "length >= 16" > out diff --git a/evaluation/tests/agg/test-30 b/evaluation/tests/agg/test-30 deleted file mode 100755 index f3b39265d..000000000 --- a/evaluation/tests/agg/test-30 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -v '^0$' > out diff --git a/evaluation/tests/agg/test-31 b/evaluation/tests/agg/test-31 deleted file mode 100755 index 5be67e723..000000000 --- a/evaluation/tests/agg/test-31 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -vc 'light.\*light.\*light' > out diff --git a/evaluation/tests/agg/test-32 b/evaluation/tests/agg/test-32 deleted file mode 100755 index c710a481b..000000000 --- a/evaluation/tests/agg/test-32 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | grep -vi '[aeiou]' > out diff --git a/evaluation/tests/agg/test-34 b/evaluation/tests/agg/test-34 deleted file mode 100755 index 9bddc2926..000000000 --- a/evaluation/tests/agg/test-34 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | head > out diff --git a/evaluation/tests/agg/test-35 b/evaluation/tests/agg/test-35 deleted file mode 100755 index 82baf7ba5..000000000 --- a/evaluation/tests/agg/test-35 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | head -15 > out diff --git a/evaluation/tests/agg/test-36 b/evaluation/tests/agg/test-36 deleted file mode 100755 index 9256b2baa..000000000 --- a/evaluation/tests/agg/test-36 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | head -n 1 > out diff --git a/evaluation/tests/agg/test-38 b/evaluation/tests/agg/test-38 deleted file mode 100755 index f958a80eb..000000000 --- a/evaluation/tests/agg/test-38 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | iconv -c -t ascii//TRANSLIT > out diff --git a/evaluation/tests/agg/test-39 b/evaluation/tests/agg/test-39 deleted file mode 100755 index de5c803c7..000000000 --- a/evaluation/tests/agg/test-39 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | iconv -f utf-8 -t ascii//translit > out diff --git a/evaluation/tests/agg/test-4 b/evaluation/tests/agg/test-4 deleted file mode 100755 index 603082a28..000000000 --- a/evaluation/tests/agg/test-4 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk "{\$1=\$1};1" > out diff --git a/evaluation/tests/agg/test-40 b/evaluation/tests/agg/test-40 deleted file mode 100755 index 0ff6c0579..000000000 --- a/evaluation/tests/agg/test-40 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed "\$d" > out diff --git a/evaluation/tests/agg/test-41 b/evaluation/tests/agg/test-41 deleted file mode 100755 index 617ba4515..000000000 --- a/evaluation/tests/agg/test-41 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed "s#^#$WIKI#" > out diff --git a/evaluation/tests/agg/test-42 b/evaluation/tests/agg/test-42 deleted file mode 100755 index 008831019..000000000 --- a/evaluation/tests/agg/test-42 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed "s;^;$IN;" > out diff --git a/evaluation/tests/agg/test-43 b/evaluation/tests/agg/test-43 deleted file mode 100755 index 6a2f58cad..000000000 --- a/evaluation/tests/agg/test-43 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 's/ .*//g' > out diff --git a/evaluation/tests/agg/test-45 b/evaluation/tests/agg/test-45 deleted file mode 100755 index a419652e7..000000000 --- a/evaluation/tests/agg/test-45 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 's/T\(..\):..:../,\1/' > out diff --git a/evaluation/tests/agg/test-46 b/evaluation/tests/agg/test-46 deleted file mode 100755 index b6cea991e..000000000 --- a/evaluation/tests/agg/test-46 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 's/ly$/-ly/g' > out diff --git a/evaluation/tests/agg/test-47 b/evaluation/tests/agg/test-47 deleted file mode 100755 index ae1b29d52..000000000 --- a/evaluation/tests/agg/test-47 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 's;$;/;' > out diff --git a/evaluation/tests/agg/test-48 b/evaluation/tests/agg/test-48 deleted file mode 100755 index 3ea83230d..000000000 --- a/evaluation/tests/agg/test-48 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' > out diff --git a/evaluation/tests/agg/test-5 b/evaluation/tests/agg/test-5 deleted file mode 100755 index 2d5efa113..000000000 --- a/evaluation/tests/agg/test-5 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk "{print \$2, \$0}" > out diff --git a/evaluation/tests/agg/test-50 b/evaluation/tests/agg/test-50 deleted file mode 100755 index 3cb3bbab0..000000000 --- a/evaluation/tests/agg/test-50 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed 5q > out diff --git a/evaluation/tests/agg/test-51 b/evaluation/tests/agg/test-51 deleted file mode 100755 index 6937499e5..000000000 --- a/evaluation/tests/agg/test-51 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -f > out diff --git a/evaluation/tests/agg/test-52 b/evaluation/tests/agg/test-52 deleted file mode 100755 index bfb96eb76..000000000 --- a/evaluation/tests/agg/test-52 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -k1n > out diff --git a/evaluation/tests/agg/test-53 b/evaluation/tests/agg/test-53 deleted file mode 100755 index 1eca02a5b..000000000 --- a/evaluation/tests/agg/test-53 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -rn > out diff --git a/evaluation/tests/agg/test-55 b/evaluation/tests/agg/test-55 deleted file mode 100755 index 178e10f60..000000000 --- a/evaluation/tests/agg/test-55 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort -u > out diff --git a/evaluation/tests/agg/test-56 b/evaluation/tests/agg/test-56 deleted file mode 100755 index b131766c9..000000000 --- a/evaluation/tests/agg/test-56 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tail +2 > out diff --git a/evaluation/tests/agg/test-58 b/evaluation/tests/agg/test-58 deleted file mode 100755 index 8929ac6a2..000000000 --- a/evaluation/tests/agg/test-58 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr '[a-z]' '[A-Z]' > out diff --git a/evaluation/tests/agg/test-6 b/evaluation/tests/agg/test-6 deleted file mode 100755 index a1f95768e..000000000 --- a/evaluation/tests/agg/test-6 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk '{print NF}' > out diff --git a/evaluation/tests/agg/test-62 b/evaluation/tests/agg/test-62 deleted file mode 100755 index ed8d52d5e..000000000 --- a/evaluation/tests/agg/test-62 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr -cs A-Za-z'' > out diff --git a/evaluation/tests/agg/test-63 b/evaluation/tests/agg/test-63 deleted file mode 100755 index 584dac90d..000000000 --- a/evaluation/tests/agg/test-63 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr -d '[:punct:]' > out diff --git a/evaluation/tests/agg/test-7 b/evaluation/tests/agg/test-7 deleted file mode 100755 index f49fa5968..000000000 --- a/evaluation/tests/agg/test-7 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk -v OFS=" " "{print \$2,\$1}" > out diff --git a/evaluation/tests/agg/test-70 b/evaluation/tests/agg/test-70 deleted file mode 100755 index 440eed7cb..000000000 --- a/evaluation/tests/agg/test-70 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr -sc '[A-Z][a-z]' '[ 12*]' > out diff --git a/evaluation/tests/agg/test-73 b/evaluation/tests/agg/test-73 deleted file mode 100755 index 16f8d3ed1..000000000 --- a/evaluation/tests/agg/test-73 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | tr A-Z a-z > out diff --git a/evaluation/tests/agg/test-78 b/evaluation/tests/agg/test-78 deleted file mode 100755 index 676f474d7..000000000 --- a/evaluation/tests/agg/test-78 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | F1={$FILE1:-"file1"} F2={$FILE2:-"file2"} sort "$F1" "$F2" > out diff --git a/evaluation/tests/agg/test-79 b/evaluation/tests/agg/test-79 deleted file mode 100755 index 9915c5cf9..000000000 --- a/evaluation/tests/agg/test-79 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | F1={$FILE1:-"file1"} cat "$F1" > out diff --git a/evaluation/tests/agg/test-82 b/evaluation/tests/agg/test-82 deleted file mode 100755 index 375ed6c26..000000000 --- a/evaluation/tests/agg/test-82 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | awk 'BEGIN{print ""}{print}' > out diff --git a/evaluation/tests/agg/test-84 b/evaluation/tests/agg/test-84 deleted file mode 100755 index bdf3080b3..000000000 --- a/evaluation/tests/agg/test-84 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | cat > out diff --git a/evaluation/tests/agg/test-86 b/evaluation/tests/agg/test-86 deleted file mode 100755 index d7a3f60d7..000000000 --- a/evaluation/tests/agg/test-86 +++ /dev/null @@ -1,3 +0,0 @@ -LC_COLLATE=C -IN=$PASH_TOP/evaluation/tests/input/words -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort | comm -12 - $IN > out diff --git a/evaluation/tests/agg/test-89 b/evaluation/tests/agg/test-89 deleted file mode 100755 index 7a4c0cebc..000000000 --- a/evaluation/tests/agg/test-89 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | fmt -w1 > out diff --git a/evaluation/tests/agg/test-9 b/evaluation/tests/agg/test-9 deleted file mode 100755 index d5beb1ac5..000000000 --- a/evaluation/tests/agg/test-9 +++ /dev/null @@ -1,3 +0,0 @@ -export DICT=../input/sorted_words -export LC_COLLATE=C -cat $PASH_TOP/evaluation/tests/input/1M.txt | comm -23 - $DICT > out diff --git a/evaluation/tests/agg/test-90 b/evaluation/tests/agg/test-90 deleted file mode 100755 index 5f89d6ad8..000000000 --- a/evaluation/tests/agg/test-90 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | FAST='computer' SLOWER='\(.[a-zA-Z0-9]\+@[a-zA-Z0-9]\+\.[a-z]\{2,\}\).*\1' SLOWEST='\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' LC_ALL=en_US.UTF-8 grep "$SLOWEST" > out diff --git a/evaluation/tests/agg/test-91 b/evaluation/tests/agg/test-91 deleted file mode 100755 index 582ee331c..000000000 --- a/evaluation/tests/agg/test-91 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | SLOW='[a-zA-Z0-9]\+@[a-zA-Z0-9]\+\.[a-z]\{2,\}\|a' FAST='computer' grep -c $SLOW > out diff --git a/evaluation/tests/agg/test-92 b/evaluation/tests/agg/test-92 deleted file mode 100755 index cd9790916..000000000 --- a/evaluation/tests/agg/test-92 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | SLOW='[a-zA-Z0-9]\+@[a-zA-Z0-9]\+\.[a-z]\{2,\}\|a' FAST='computer' grep -c $FAST > out diff --git a/evaluation/tests/agg/test-93 b/evaluation/tests/agg/test-93 deleted file mode 100755 index 071d949f3..000000000 --- a/evaluation/tests/agg/test-93 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | FAST='computer' SLOWER='\(.[a-zA-Z0-9]\+@[a-zA-Z0-9]\+\.[a-z]\{2,\}\).*\1' SLOWEST='\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' LC_ALL=en_US.UTF-8 grep "$FAST" > out diff --git a/evaluation/tests/agg/test-94 b/evaluation/tests/agg/test-94 deleted file mode 100755 index fad251588..000000000 --- a/evaluation/tests/agg/test-94 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | FAST='computer' SLOWER='\(.[a-zA-Z0-9]\+@[a-zA-Z0-9]\+\.[a-z]\{2,\}\).*\1' SLOWEST='\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' LC_ALL=en_US.UTF-8 grep -in $SLOWEST > out diff --git a/evaluation/tests/agg/test-95 b/evaluation/tests/agg/test-95 deleted file mode 100755 index 2f2fa799f..000000000 --- a/evaluation/tests/agg/test-95 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | iconv > out diff --git a/evaluation/tests/agg/test-96 b/evaluation/tests/agg/test-96 deleted file mode 100755 index 670599329..000000000 --- a/evaluation/tests/agg/test-96 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | nl > out diff --git a/evaluation/tests/agg/test-97 b/evaluation/tests/agg/test-97 deleted file mode 100755 index 8e86da702..000000000 --- a/evaluation/tests/agg/test-97 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | rev > out diff --git a/evaluation/tests/agg/test-98 b/evaluation/tests/agg/test-98 deleted file mode 100755 index 5da53678a..000000000 --- a/evaluation/tests/agg/test-98 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sed '/^$/d' > out diff --git a/evaluation/tests/agg/test-99 b/evaluation/tests/agg/test-99 deleted file mode 100755 index b66bbb7c3..000000000 --- a/evaluation/tests/agg/test-99 +++ /dev/null @@ -1 +0,0 @@ -cat $PASH_TOP/evaluation/tests/input/1M.txt | sort > out diff --git a/evaluation/tests/interface_tests/env_vars.sh b/evaluation/tests/interface_tests/env_vars.sh new file mode 100644 index 000000000..784a4ae6d --- /dev/null +++ b/evaluation/tests/interface_tests/env_vars.sh @@ -0,0 +1,9 @@ +myfunction() { + env | sort > tmp1.txt +} +shellvar1=123456 +shellvar2="This is several words" +shellvar3=" xxx " +export shellvar2 +trap myfunction EXIT +env | sort > tmp2.txt diff --git a/evaluation/tests/interface_tests/redir-dup.sh b/evaluation/tests/interface_tests/redir-dup.sh new file mode 100644 index 000000000..107d956b0 --- /dev/null +++ b/evaluation/tests/interface_tests/redir-dup.sh @@ -0,0 +1,3 @@ +(echo one >&2) 2>&1 +(echo two >&2) 2>- +(echo three >&2) 2>&1 diff --git a/evaluation/tests/interface_tests/run.sh b/evaluation/tests/interface_tests/run.sh index b06a8dd0c..e0cd53cf1 100755 --- a/evaluation/tests/interface_tests/run.sh +++ b/evaluation/tests/interface_tests/run.sh @@ -307,6 +307,20 @@ test_star() dfg" } +test_env_vars() +{ + local shell=$1 + rm -f tmp1.txt tmp2.txt + $shell env_vars.sh + diff tmp1.txt tmp2.txt +} + +test_redir_dup() +{ + local shell=$1 + $shell redir-dup.sh +} + ## We run all tests composed with && to exit on the first that fails if [ "$#" -eq 0 ]; then run_test test1 @@ -349,6 +363,8 @@ if [ "$#" -eq 0 ]; then run_test test_exclam run_test test_redir_var_test run_test test_star + run_test test_env_vars + run_test test_redir_dup else for testname in $@ do diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..c40d1dbfa --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +graphviz +libdash +pash-annotations>=0.2.0 +shasta==0.1.0 +sh-expand \ No newline at end of file diff --git a/scripts/distro-deps.sh b/scripts/distro-deps.sh index cca53fd79..f2d8729fc 100755 --- a/scripts/distro-deps.sh +++ b/scripts/distro-deps.sh @@ -28,11 +28,11 @@ fi # convert to lowercase distro=$(printf '%s\n' "$distro" | LC_ALL=C tr '[:upper:]' '[:lower:]') # compile the list of the shared required packages -pkgs="bc curl git graphviz python sudo wget" +pkgs="bc curl git graphviz python3 sudo wget" # now do different things depending on distro case "$distro" in ubuntu*) - pkgs="$pkgs bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config python3 python3-pip python3-setuptools python3-testresources wamerican-insane" + pkgs="$pkgs bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config python3-pip python3-setuptools python3-testresources wamerican-insane" if [[ "$show_deps" == 1 ]]; then echo "$pkgs" | sort exit 0 @@ -52,7 +52,7 @@ case "$distro" in fi ;; debian*) - pkgs="$pkgs bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config procps python3 python3-pip python3-setuptools python3-testresources wamerican-insane" + pkgs="$pkgs bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config procps python3-pip python3-setuptools python3-testresources wamerican-insane" if [[ "$show_deps" == 1 ]]; then echo "$pkgs" | sort exit 0 diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index cb5c3acc1..c27a86e72 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -15,8 +15,3 @@ cd "$PASH_TOP/evaluation/tests/interface_tests" echo "Running compiler tests..." cd "$PASH_TOP/evaluation/tests/" ./test_evaluation_scripts.sh - -echo "Running aggregator tests..." -cd "$PASH_TOP/evaluation/tests/agg/" -./run.sh - diff --git a/scripts/setup-pash.sh b/scripts/setup-pash.sh index e7e5df1a3..575babfb8 100755 --- a/scripts/setup-pash.sh +++ b/scripts/setup-pash.sh @@ -22,10 +22,7 @@ mkdir -p $PYTHON_PKG_DIR echo "Installing python dependencies..." -python3 -m pip install graphviz --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_graphviz.log -# TODO 2022-08-01 if libdash wheel isn't available, we need autmake etc. -python3 -m pip install libdash --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_libdash.log -python3 -m pip install 'pash-annotations>=0.2.0,<0.3.0' --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_annotations.log +python3 -m pip install -r "$PASH_TOP/requirements.txt" --no-cache-dir --root $PYTHON_PKG_DIR --ignore-installed ## numpy and matplotlib are only needed to generate the evaluation plots so they should not be in the main path if [[ "$install_eval" == 1 ]]; then diff --git a/scripts/workflow/get_results.sh b/scripts/workflow/get_results.sh index 591bbd93a..67189ae7d 100755 --- a/scripts/workflow/get_results.sh +++ b/scripts/workflow/get_results.sh @@ -33,6 +33,3 @@ stats "$PASH_TOP/evaluation/tests/interface_tests/output" interface # ## compiler Tests stats "${PASH_TOP}/evaluation/tests/results" compiler -# -## aggregator tests -stats "${PASH_TOP}/evaluation/tests/agg/output" agg