diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml
new file mode 100644
index 000000000..5903fb668
--- /dev/null
+++ b/.github/workflows/black.yaml
@@ -0,0 +1,12 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: psf/black@stable
+        with:
+          options: "--extend-exclude 'evaluations/'"
diff --git a/compiler/annotations_utils/util_cmd_invocations.py b/compiler/annotations_utils/util_cmd_invocations.py
index 26dd8cb6b..7770de644 100644
--- a/compiler/annotations_utils/util_cmd_invocations.py
+++ b/compiler/annotations_utils/util_cmd_invocations.py
@@ -1,24 +1,43 @@
 from pash_annotations.datatypes.BasicDatatypes import Flag, ArgStringType, Operand
 from pash_annotations.datatypes.BasicDatatypesWithIO import OptionWithIO
 from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial
-from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo
-from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo
-from pash_annotations.annotation_generation.datatypes.CommandProperties import CommandProperties
-from pash_annotations.annotation_generation.AnnotationGeneration import get_input_output_info_from_cmd_invocation, \
-    get_parallelizability_info_from_cmd_invocation
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.annotation_generation.datatypes.InputOutputInfo import (
+    InputOutputInfo,
+)
+from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import (
+    ParallelizabilityInfo,
+)
+from pash_annotations.annotation_generation.datatypes.CommandProperties import (
+    CommandProperties,
+)
+from pash_annotations.annotation_generation.AnnotationGeneration import (
+    get_input_output_info_from_cmd_invocation,
+    get_parallelizability_info_from_cmd_invocation,
+)
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
 from definitions.ir.arg import Arg
 
 # for typing
 from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix
 
-from shell_ast.ast_util import  string_to_argument, redir_stdout_to_file, redir_file_to_stdin, make_command
+from shell_ast.ast_util import (
+    string_to_argument,
+    redir_stdout_to_file,
+    redir_file_to_stdin,
+    make_command,
+)
+
 
 def get_command_invocation_prefix_from_dfg_node(dfg_node):
-    return CommandInvocationPrefix(cmd_name = dfg_node.com_name,
-                                   flag_option_list = dfg_node.flag_option_list,
-                                   positional_config_list = dfg_node.positional_config_list)
+    return CommandInvocationPrefix(
+        cmd_name=dfg_node.com_name,
+        flag_option_list=dfg_node.flag_option_list,
+        positional_config_list=dfg_node.positional_config_list,
+    )
+
 
 # TODO: ideally methods in the respective classes but requires refactoring of parsing infrastructure
 # TODO: isn't this `to_ast`?
@@ -48,19 +67,22 @@ def to_node_cmd_inv_with_io_vars(cmd_inv, edges, redirs, assignments):
     node = make_command(cmd_asts, redirections=new_redirs, assignments=assignments)
     return node
 
+
 def to_ast_flagoption(flagoption, edges):
     if isinstance(flagoption, Flag):
         return [string_to_argument(flagoption.get_name())]
-    elif isinstance(flagoption, OptionWithIO): # retype to IOVar
+    elif isinstance(flagoption, OptionWithIO):  # retype to IOVar
         opt_name_ast = string_to_argument(flagoption.get_name())
         opt_arg_ast = translate_io_var_if_applicable(flagoption.get_arg(), edges)
         return [opt_name_ast, opt_arg_ast]
 
+
 def to_ast_operand(operand, edges):
     if isinstance(operand, Operand):
         return translate_io_var_if_applicable(operand.get_name(), edges)
     return translate_io_var_if_applicable(operand, edges)
 
+
 def translate_io_var_if_applicable(pot_io_var, edges):
     # TODO: this is currently a hack but eventually every possible type gets their own to_ast-function
     if isinstance(pot_io_var, int):
@@ -68,7 +90,7 @@ def translate_io_var_if_applicable(pot_io_var, edges):
     elif isinstance(pot_io_var, ArgStringType):
         return to_ast_arg_string_type(pot_io_var)
     elif isinstance(pot_io_var, CommandInvocationWithIOVars):
-        assert(False)
+        assert False
         # only happens as r-wrapped node
         return to_node_cmd_inv_with_io_vars(pot_io_var, edges, [], [])
     elif isinstance(pot_io_var, Arg):
@@ -76,27 +98,39 @@ def translate_io_var_if_applicable(pot_io_var, edges):
     else:
         raise Exception("Unhandled type for operand in to_ast!")
 
+
 def to_ast_arg_string_type(arg_string_type):
-    return arg_string_type.get_name().arg_char_list # is of type Arg
+    return arg_string_type.get_name().arg_char_list  # is of type Arg
+
 
 # assumes io_var is an edge id
 def dereference_io_var(io_var, edges):
     fid, _, _ = edges[io_var]
     return fid.to_ast()
 
-def get_input_output_info_from_cmd_invocation_util(cmd_invocationInitial : CommandInvocationInitial) -> InputOutputInfo:
+
+def get_input_output_info_from_cmd_invocation_util(
+    cmd_invocationInitial: CommandInvocationInitial,
+) -> InputOutputInfo:
     return get_input_output_info_from_cmd_invocation(cmd_invocationInitial)
 
-def get_parallelizability_info_from_cmd_invocation_util(cmd_invocationInitial : CommandInvocationInitial) -> ParallelizabilityInfo:
+
+def get_parallelizability_info_from_cmd_invocation_util(
+    cmd_invocationInitial: CommandInvocationInitial,
+) -> ParallelizabilityInfo:
     return get_parallelizability_info_from_cmd_invocation(cmd_invocationInitial)
 
+
 def construct_property_container_from_list_of_properties(list_properties):
     return CommandProperties(dict(list_properties))
 
+
 # this function is needed to wrap a node in `r_wrap`
-def to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping(cmd_inv, edges):
+def to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping(
+    cmd_inv, edges
+):
     # we already expand here
-    whole_cmd = Arg.string_to_arg("\'")
+    whole_cmd = Arg.string_to_arg("'")
     arg_cmd_name = Arg.string_to_arg(cmd_inv.cmd_name)
     arg_flagoptions = []
     for flagoption in cmd_inv.flag_option_list:
@@ -107,9 +141,10 @@ def to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wra
     all_cmd_parts_arg.extend(arg_operands)
     for part in all_cmd_parts_arg:
         whole_cmd.concatenate(part)
-    whole_cmd.concatenate(Arg.string_to_arg("\'"))
+    whole_cmd.concatenate(Arg.string_to_arg("'"))
     return whole_cmd
 
+
 def to_arg_flagoption(flagoption, edges):
     if isinstance(flagoption, Flag):
         return [Arg.string_to_arg(flagoption.get_name())]
@@ -118,11 +153,13 @@ def to_arg_flagoption(flagoption, edges):
         opt_arg_arg = translate_io_var_to_arg_if_applicable(flagoption.get_arg(), edges)
         return [opt_name_arg, opt_arg_arg]
 
+
 def to_arg_operand(operand, edges):
     if isinstance(operand, Operand):
         return translate_io_var_to_arg_if_applicable(operand.get_name(), edges)
     return translate_io_var_to_arg_if_applicable(operand, edges)
 
+
 def translate_io_var_to_arg_if_applicable(pot_io_var, edges):
     if isinstance(pot_io_var, int):
         return Arg(dereference_io_var(pot_io_var, edges))
diff --git a/compiler/annotations_utils/util_file_descriptors.py b/compiler/annotations_utils/util_file_descriptors.py
index fb17438b0..4495af9af 100644
--- a/compiler/annotations_utils/util_file_descriptors.py
+++ b/compiler/annotations_utils/util_file_descriptors.py
@@ -1,18 +1,21 @@
 from util import log
 from definitions.ir.resource import FileResource, Resource, FileDescriptorResource
-from pash_annotations.datatypes.BasicDatatypesWithIO import FileNameWithIOInfo, StdDescriptorWithIOInfo
+from pash_annotations.datatypes.BasicDatatypesWithIO import (
+    FileNameWithIOInfo,
+    StdDescriptorWithIOInfo,
+)
 
 
 def resource_from_file_descriptor(file_descriptor) -> Resource:
     if isinstance(file_descriptor, FileNameWithIOInfo):
         arg = file_descriptor.get_name()
-        log(f'filedes name: {file_descriptor.get_name()}')
-        log(f'filedes name type: {type(file_descriptor.get_name())}')
-        log(f'arg: {arg}')
+        log(f"filedes name: {file_descriptor.get_name()}")
+        log(f"filedes name type: {type(file_descriptor.get_name())}")
+        log(f"arg: {arg}")
         return FileResource(file_descriptor.get_name())
     elif isinstance(file_descriptor, StdDescriptorWithIOInfo):
         resource = ("fd", file_descriptor.get_type().value)
         return FileDescriptorResource(resource)
     else:
-        assert(False)
+        assert False
         # unreachable
diff --git a/compiler/annotations_utils/util_parsing.py b/compiler/annotations_utils/util_parsing.py
index f4655b9fa..074b94004 100644
--- a/compiler/annotations_utils/util_parsing.py
+++ b/compiler/annotations_utils/util_parsing.py
@@ -3,9 +3,20 @@
 from definitions.ir.arg import Arg
 
 from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial
-from pash_annotations.datatypes.BasicDatatypes import Option, ArgStringType, Flag, Operand
-from pash_annotations.parser.parser import parse, get_set_of_all_flags, get_dict_flag_to_primary_repr, get_set_of_all_options, \
-    get_dict_option_to_primary_repr, are_all_individually_flags
+from pash_annotations.datatypes.BasicDatatypes import (
+    Option,
+    ArgStringType,
+    Flag,
+    Operand,
+)
+from pash_annotations.parser.parser import (
+    parse,
+    get_set_of_all_flags,
+    get_dict_flag_to_primary_repr,
+    get_set_of_all_options,
+    get_dict_option_to_primary_repr,
+    are_all_individually_flags,
+)
 from pash_annotations.parser.util_parser import get_json_data
 
 
@@ -18,13 +29,19 @@ def merge_to_single_string_with_space(list_str):
     else:
         return " ".join(list_str)
 
+
 def get_command_invocation(command, options) -> CommandInvocationInitial:
     command_as_string: str = format_arg_chars(command)
-    options_and_operands_as_string: str = merge_to_single_string_with_space([format_arg_chars(option) for option in options])
-    command_invocation_as_string: str = f'{command_as_string} {options_and_operands_as_string}'
+    options_and_operands_as_string: str = merge_to_single_string_with_space(
+        [format_arg_chars(option) for option in options]
+    )
+    command_invocation_as_string: str = (
+        f"{command_as_string} {options_and_operands_as_string}"
+    )
     command_invocation: CommandInvocationInitial = parse(command_invocation_as_string)
     return command_invocation
 
+
 def get_ast_for_flagoption(flagoption):
     result = string_to_argument(flagoption.get_name())
     if isinstance(flagoption, Option):
@@ -32,26 +49,31 @@ def get_ast_for_flagoption(flagoption):
         assert False
     return result
 
+
 def get_ast_for_argstringtype(arg):
     return string_to_argument(arg.get_name())
 
+
 # TODO: this is a hack to fix the wrong parsing of "
 def fix_parsing_newline(arg):
-    if arg.get_name() == '\\n':
+    if arg.get_name() == "\\n":
         return ArgStringType(r'"\n"')
     else:
         return arg
 
 
-def parse_arg_list_to_command_invocation(command, flags_options_operands) -> CommandInvocationInitial:
-
+def parse_arg_list_to_command_invocation(
+    command, flags_options_operands
+) -> CommandInvocationInitial:
     cmd_name = format_arg_chars(command)
     json_data = get_json_data(cmd_name)
 
     set_of_all_flags: Set[str] = get_set_of_all_flags(json_data)
     dict_flag_to_primary_repr: dict[str, str] = get_dict_flag_to_primary_repr(json_data)
     set_of_all_options: Set[str] = get_set_of_all_options(json_data)
-    dict_option_to_primary_repr: dict[str, str] = get_dict_option_to_primary_repr(json_data)
+    dict_option_to_primary_repr: dict[str, str] = get_dict_option_to_primary_repr(
+        json_data
+    )
     # we keep the Arg for everything but flag and option names
 
     # parse list of command invocation terms
@@ -61,20 +83,30 @@ def parse_arg_list_to_command_invocation(command, flags_options_operands) -> Com
         potential_flag_or_option_arg = flags_options_operands[i]
         potential_flag_or_option_name = format_arg_chars(potential_flag_or_option_arg)
         if potential_flag_or_option_name in set_of_all_flags:
-            flag_name_as_string: str = dict_flag_to_primary_repr.get(potential_flag_or_option_name, potential_flag_or_option_name)
+            flag_name_as_string: str = dict_flag_to_primary_repr.get(
+                potential_flag_or_option_name, potential_flag_or_option_name
+            )
             flag: Flag = Flag(flag_name_as_string)
             flag_option_list.append(flag)
-        elif (potential_flag_or_option_name in set_of_all_options) and ((i+1) < len(flags_options_operands)):
-            option_name_as_string: str = dict_option_to_primary_repr.get(potential_flag_or_option_name, potential_flag_or_option_name)
-            option_arg_as_arg: Arg = Arg(flags_options_operands[i+1])
+        elif (potential_flag_or_option_name in set_of_all_options) and (
+            (i + 1) < len(flags_options_operands)
+        ):
+            option_name_as_string: str = dict_option_to_primary_repr.get(
+                potential_flag_or_option_name, potential_flag_or_option_name
+            )
+            option_arg_as_arg: Arg = Arg(flags_options_operands[i + 1])
             option = Option(option_name_as_string, option_arg_as_arg)
             flag_option_list.append(option)
             i += 1  # since we consumed another term for the argument
-        elif potential_flag_or_option_name == "-": # switch to operand mode (interpreted as hyphen-stdin)
+        elif (
+            potential_flag_or_option_name == "-"
+        ):  # switch to operand mode (interpreted as hyphen-stdin)
             break
-        elif are_all_individually_flags(potential_flag_or_option_name, set_of_all_flags):
+        elif are_all_individually_flags(
+            potential_flag_or_option_name, set_of_all_flags
+        ):
             for split_el in list(potential_flag_or_option_name[1:]):
-                flag: Flag = Flag(f'-{split_el}')
+                flag: Flag = Flag(f"-{split_el}")
                 flag_option_list.append(flag)
         else:
             break  # next one is Operand, and we keep these in separate list
@@ -85,7 +117,9 @@ def parse_arg_list_to_command_invocation(command, flags_options_operands) -> Com
     # if parsed_elements_list[i] == '--':
     #     i += 1
 
-    operand_list = [Operand(Arg(operand_arg)) for operand_arg in flags_options_operands[i:]]
+    operand_list = [
+        Operand(Arg(operand_arg)) for operand_arg in flags_options_operands[i:]
+    ]
     # log("type of operand_list[0].get_name()", type(operand_list[0].get_name()))   can only be used if there are operands
 
     return CommandInvocationInitial(cmd_name, flag_option_list, operand_list)
diff --git a/compiler/ast_to_ir.py b/compiler/ast_to_ir.py
index 2fda09d92..c1e753fa3 100644
--- a/compiler/ast_to_ir.py
+++ b/compiler/ast_to_ir.py
@@ -8,7 +8,9 @@
 from util import *
 from parse import from_ast_objects_to_shell
 
-## TODO: Separate the ir stuff to the bare minimum and 
+from custom_error import *
+
+## TODO: Separate the ir stuff to the bare minimum and
 ##       try to move this to the shell_ast folder.
 
 ##
@@ -24,25 +26,52 @@
 ## without knowing about previous or later subtrees that can be
 ## distributed. Is that reasonable?
 compile_cases = {
-        "Pipe": (lambda fileIdGen, config:
-                 lambda ast_node: compile_node_pipe(ast_node, fileIdGen, config)),
-        "Command": (lambda fileIdGen, config:
-                    lambda ast_node: compile_node_command(ast_node, fileIdGen, config)),
-        "And": (lambda fileIdGen, config:
-                lambda ast_node: compile_node_and_or_semi(ast_node, fileIdGen, config)),
-        "Or": (lambda fileIdGen, config:
-               lambda ast_node: compile_node_and_or_semi(ast_node, fileIdGen, config)),
-        "Semi": (lambda fileIdGen, config:
-                 lambda ast_node: compile_node_and_or_semi(ast_node, fileIdGen, config)),
-        "Redir": (lambda fileIdGen, config:
-                  lambda ast_node: compile_node_redir_subshell(ast_node, fileIdGen, config)),
-        "Subshell": (lambda fileIdGen, config:
-                     lambda ast_node: compile_node_redir_subshell(ast_node, fileIdGen, config)),
-        "Background": (lambda fileIdGen, config:
-                       lambda ast_node: compile_node_background(ast_node, fileIdGen, config)),
-        "For": (lambda fileIdGen, config:
-                  lambda ast_node: compile_node_for(ast_node, fileIdGen, config))
-        }
+    "Pipe": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_pipe(
+            ast_node, fileIdGen, config
+        )
+    ),
+    "Command": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_command(
+            ast_node, fileIdGen, config
+        )
+    ),
+    "And": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_and_or_semi(
+            ast_node, fileIdGen, config
+        )
+    ),
+    "Or": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_and_or_semi(
+            ast_node, fileIdGen, config
+        )
+    ),
+    "Semi": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_and_or_semi(
+            ast_node, fileIdGen, config
+        )
+    ),
+    "Redir": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_redir_subshell(
+            ast_node, fileIdGen, config
+        )
+    ),
+    "Subshell": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_redir_subshell(
+            ast_node, fileIdGen, config
+        )
+    ),
+    "Background": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_background(
+            ast_node, fileIdGen, config
+        )
+    ),
+    "For": (
+        lambda fileIdGen, config: lambda ast_node: compile_node_for(
+            ast_node, fileIdGen, config
+        )
+    ),
+}
 
 
 def compile_asts(ast_objects: "list[AstNode]", fileIdGen, config):
@@ -51,12 +80,12 @@ def compile_asts(ast_objects: "list[AstNode]", fileIdGen, config):
     for i, ast_object in enumerate(ast_objects):
         # log("Compiling AST {}".format(i))
         # log(ast_object)
-        assert(isinstance(ast_object, AstNode))
+        assert isinstance(ast_object, AstNode)
 
         ## Compile subtrees of the AST to out intermediate representation
-        ## KK 2023-05-25: Would we ever want to pass this state to the expansion 
+        ## KK 2023-05-25: Would we ever want to pass this state to the expansion
         ##                of the next object? I don't think so.
-        exp_state = ExpansionState(config['shell_variables'])
+        exp_state = ExpansionState(config["shell_variables"])
         expanded_ast = expand_command(ast_object, exp_state)
         # log("Expanded:", expanded_ast)
         compiled_ast = compile_node(expanded_ast, fileIdGen, config)
@@ -67,9 +96,8 @@ def compile_asts(ast_objects: "list[AstNode]", fileIdGen, config):
         ## If the accumulator contains an IR (meaning that the
         ## previous commands where run in background), union it with
         ## the current returned ast.
-        if (not acc_ir is None):
-
-            if (isinstance(compiled_ast, IR)):
+        if not acc_ir is None:
+            if isinstance(compiled_ast, IR):
                 acc_ir.background_union(compiled_ast)
             else:
                 ## TODO: Make this union the compiled_ast with the
@@ -82,21 +110,19 @@ def compile_asts(ast_objects: "list[AstNode]", fileIdGen, config):
 
             ## If the current compiled ast not in background (and so
             ## the union isn't in background too), stop accumulating
-            if (not acc_ir is None
-                and not acc_ir.is_in_background()):
+            if not acc_ir is None and not acc_ir.is_in_background():
                 compiled_asts.append(acc_ir)
                 acc_ir = None
         else:
             ## If the compiled ast is in background, start
             ## accumulating it
-            if (isinstance(compiled_ast, IR)
-                and compiled_ast.is_in_background()):
+            if isinstance(compiled_ast, IR) and compiled_ast.is_in_background():
                 acc_ir = compiled_ast
             else:
                 compiled_asts.append(compiled_ast)
 
     ## The final accumulator
-    if (not acc_ir is None):
+    if not acc_ir is None:
         compiled_asts.append(acc_ir)
 
     return compiled_asts
@@ -106,9 +132,11 @@ def compile_node(ast_object, fileIdGen, config):
     global compile_cases
     return ast_match(ast_object, compile_cases, fileIdGen, config)
 
+
 def compile_node_pipe(ast_node, fileIdGen, config):
-    compiled_pipe_nodes = combine_pipe([compile_node(pipe_item, fileIdGen, config)
-                                        for pipe_item in ast_node.items])
+    compiled_pipe_nodes = combine_pipe(
+        [compile_node(pipe_item, fileIdGen, config) for pipe_item in ast_node.items]
+    )
 
     ## Note: When calling combine_pipe_nodes (which
     ##       optimistically distributes all the children of a
@@ -124,27 +152,29 @@ def compile_node_pipe(ast_node, fileIdGen, config):
     compiled_ast = compiled_ir
     return compiled_ast
 
+
 ## This combines all the children of the Pipeline to an IR.
 def combine_pipe(ast_nodes):
     ## Initialize the IR with the first node in the Pipe
-    if (isinstance(ast_nodes[0], IR)):
+    if isinstance(ast_nodes[0], IR):
         combined_nodes = ast_nodes[0]
     else:
         ## If any part of the pipe is not an IR, the compilation must fail.
         log("Node: {} is not pure".format(ast_nodes[0]))
-        raise Exception('Not pure node in pipe')
+        raise UnparallelizableError("Node: {} is not a pure node in pipe".format(ast_nodes[0]))
 
     ## Combine the rest of the nodes
     for ast_node in ast_nodes[1:]:
-        if (isinstance(ast_node, IR)):
+        if isinstance(ast_node, IR):
             combined_nodes.pipe_append(ast_node)
         else:
             ## If any part of the pipe is not an IR, the compilation must fail.
             log("Node: {} is not pure".format(ast_nodes))
-            raise Exception('Not pure node in pipe')
+            raise UnparallelizableError("This specific node: {} is not a pure node in pipe".format(ast_node))
 
     return [combined_nodes]
 
+
 def compile_node_command(ast_node, fileIdGen, config):
     ## Compile assignments and redirection list
     compiled_assignments = compile_assignments(ast_node.assignments, fileIdGen, config)
@@ -160,10 +190,9 @@ def compile_node_command(ast_node, fileIdGen, config):
 
     try:
         ## If the command is not compileable to a DFG the following call will fail
-        ir = compile_command_to_DFG(fileIdGen,
-                                    command_name,
-                                    options,
-                                    redirections=compiled_redirections)
+        ir = compile_command_to_DFG(
+            fileIdGen, command_name, options, redirections=compiled_redirections
+        )
         compiled_ast = ir
     except ValueError as err:
         log("Command not compiled to DFG:", err)
@@ -171,37 +200,52 @@ def compile_node_command(ast_node, fileIdGen, config):
         ##       Is there any case where a non-compiled command is fine?
         # log(traceback.format_exc())
         compiled_arguments = compile_command_arguments(arguments, fileIdGen, config)
-        compiled_ast = make_kv(type(ast_node).NodeName,
-                                [ast_node.line_number, compiled_assignments,
-                                compiled_arguments, compiled_redirections])
+        compiled_ast = make_kv(
+            type(ast_node).NodeName,
+            [
+                ast_node.line_number,
+                compiled_assignments,
+                compiled_arguments,
+                compiled_redirections,
+            ],
+        )
 
     return compiled_ast
 
+
 def compile_node_and_or_semi(ast_node, fileIdGen, config):
-    compiled_ast = make_kv(type(ast_node).NodeName,
-            [compile_node(ast_node.left_operand, fileIdGen, config),
-             compile_node(ast_node.right_operand, fileIdGen, config)])
+    compiled_ast = make_kv(
+        type(ast_node).NodeName,
+        [
+            compile_node(ast_node.left_operand, fileIdGen, config),
+            compile_node(ast_node.right_operand, fileIdGen, config),
+        ],
+    )
     return compiled_ast
 
+
 def compile_node_redir_subshell(ast_node, fileIdGen, config):
     compiled_node = compile_node(ast_node.node, fileIdGen, config)
 
-    if (isinstance(compiled_node, IR)):
+    if isinstance(compiled_node, IR):
         ## TODO: I should use the redir list to redirect the files of
         ##       the IR accordingly
         compiled_ast = compiled_node
     else:
-        compiled_ast = make_kv(type(ast_node).NodeName, [ast_node.line_number,
-            compiled_node, ast_node.redir_list])
+        compiled_ast = make_kv(
+            type(ast_node).NodeName,
+            [ast_node.line_number, compiled_node, ast_node.redir_list],
+        )
 
     return compiled_ast
 
+
 def compile_node_background(ast_node, fileIdGen, config):
     compiled_node = compile_node(ast_node.node, fileIdGen, config)
 
     ## TODO: I should use the redir list to redirect the files of
     ##       the IR accordingly
-    if (isinstance(compiled_node, IR)):
+    if isinstance(compiled_node, IR):
         ## TODO: Redirect the stdout, stdin accordingly
         compiled_node.set_background(True)
         compiled_ast = compiled_node
@@ -218,14 +262,19 @@ def compile_node_background(ast_node, fileIdGen, config):
 
     return compiled_ast
 
+
 def compile_node_for(ast_node, fileIdGen, config):
     ## TODO: Investigate what kind of check could we do to make a for
     ## loop parallel
-    compiled_ast = make_kv(type(ast_node).NodeName,
-                           [ast_node.line_number,
-                            compile_command_argument(ast_node.argument, fileIdGen, config),
-                            compile_node(ast_node.body, fileIdGen, config),
-                            ast_node.variable])
+    compiled_ast = make_kv(
+        type(ast_node).NodeName,
+        [
+            ast_node.line_number,
+            compile_command_argument(ast_node.argument, fileIdGen, config),
+            compile_node(ast_node.body, fileIdGen, config),
+            ast_node.variable,
+        ],
+    )
     return compiled_ast
 
 
@@ -238,15 +287,16 @@ def compile_node_for(ast_node, fileIdGen, config):
 ## 2. Second it raises an error if we cannot expand an argument.
 def should_expand_arg_char(arg_char):
     key, val = get_kv(arg_char)
-    if (key in ['V']): # Variable
+    if key in ["V"]:  # Variable
         return True
-    elif (key == 'Q'):
+    elif key == "Q":
         return should_expand_argument(val)
-    elif (key == 'B'):
+    elif key == "B":
         log("Cannot expand:", arg_char)
         raise NotImplementedError()
     return False
 
+
 def should_expand_argument(argument):
     return any([should_expand_arg_char(arg_char) for arg_char in argument])
 
@@ -255,21 +305,26 @@ def should_expand_argument(argument):
 def execute_shell_asts(asts):
     output_script = from_ast_objects_to_shell(asts)
     # log(output_script)
-    exec_obj = subprocess.run(["/usr/bin/env", "bash"], input=output_script,
-                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                              universal_newlines=True)
+    exec_obj = subprocess.run(
+        ["/usr/bin/env", "bash"],
+        input=output_script,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        universal_newlines=True,
+    )
     exec_obj.check_returncode()
     # log(exec_obj.stdout)
     return exec_obj.stdout
 
+
 ## TODO: Properly parse the output of the shell script
 def parse_string_to_arguments(arg_char_string):
     # log(arg_char_string)
     return string_to_arguments(arg_char_string)
 
+
 ## TODO: Use "pash_input_args" when expanding in place of normal arguments.
 def naive_expand(argument, config):
-
     ## config contains a dictionary with:
     ##  - all variables, their types, and values in 'shell_variables'
     ##  - the name of a file that contains them in 'shell_variables_file_path'
@@ -277,7 +332,7 @@ def naive_expand(argument, config):
     # log(config['shell_variables_file_path'])
 
     ## Create an AST node that "echo"s the argument
-    echo_asts = make_echo_ast(argument, config['shell_variables_file_path'])
+    echo_asts = make_echo_ast(argument, config["shell_variables_file_path"])
 
     ## Execute the echo AST by unparsing it to shell
     ## and calling bash
@@ -293,7 +348,6 @@ def naive_expand(argument, config):
     return expanded_arguments
 
 
-
 ## This function expands an arg_char.
 ## At the moment it is pretty inefficient as it serves as a prototype.
 ##
@@ -301,17 +355,17 @@ def naive_expand(argument, config):
 ##       might have assignments of its own, therefore requiring that we use them to properly expand.
 def expand_command_argument(argument, config):
     new_arguments = [argument]
-    if(should_expand_argument(argument)):
+    if should_expand_argument(argument):
         new_arguments = naive_expand(argument, config)
     return new_arguments
 
+
 ## This function compiles an arg char by recursing if it contains quotes or command substitution.
 ##
 ## It is currently being extended to also expand any arguments that are safe to expand.
 def compile_arg_char(arg_char: ArgChar, fileIdGen, config):
     ## Compile the arg char
-    if isinstance(arg_char, CArgChar) \
-        or isinstance(arg_char, EArgChar):
+    if isinstance(arg_char, CArgChar) or isinstance(arg_char, EArgChar):
         # Single character or escape
         return arg_char
     elif isinstance(arg_char, BArgChar):
@@ -326,32 +380,42 @@ def compile_arg_char(arg_char: ArgChar, fileIdGen, config):
         arg_char.arg = compile_command_argument(arg_char.arg, fileIdGen, config)
         return arg_char
     else:
-        log(f'Unknown arg_char: {arg_char}')
+        log(f"Unknown arg_char: {arg_char}")
         ## TODO: Complete this
         return arg_char
 
+
 def compile_command_argument(argument, fileIdGen, config):
     compiled_argument = [compile_arg_char(char, fileIdGen, config) for char in argument]
     return compiled_argument
 
+
 def compile_command_arguments(arguments, fileIdGen, config):
-    compiled_arguments = [compile_command_argument(arg, fileIdGen, config) for arg in arguments]
+    compiled_arguments = [
+        compile_command_argument(arg, fileIdGen, config) for arg in arguments
+    ]
     return compiled_arguments
 
+
 ## Compiles the value assigned to a variable using the command argument rules.
 ## TODO: Is that the correct way to handle them?
 def compile_assignments(assignments, fileIdGen, config):
-    compiled_assignments = [[assignment[0], compile_command_argument(assignment[1], fileIdGen, config)]
-                            for assignment in assignments]
+    compiled_assignments = [
+        [assignment[0], compile_command_argument(assignment[1], fileIdGen, config)]
+        for assignment in assignments
+    ]
     return compiled_assignments
 
+
 def compile_redirection(redirection, fileIdGen, config):
     file_arg = compile_command_argument(redirection.arg, fileIdGen, config)
     redirection.arg = file_arg
     return redirection
 
+
 def compile_redirections(redirections, fileIdGen, config):
-    compiled_redirections = [compile_redirection(redirection, fileIdGen, config)
-                             for redirection in redirections]
+    compiled_redirections = [
+        compile_redirection(redirection, fileIdGen, config)
+        for redirection in redirections
+    ]
     return compiled_redirections
-
diff --git a/compiler/cli.py b/compiler/cli.py
new file mode 100644
index 000000000..bfab6c988
--- /dev/null
+++ b/compiler/cli.py
@@ -0,0 +1,314 @@
+import argparse
+import os
+
+
+class BaseParser(argparse.ArgumentParser):
+    """
+    Base class for all Argument Parsers used by PaSh. It has two configurable flags
+    by default: debug and log_file.
+
+    Other flags are available by classes which inherit BaseParser
+    """
+
+    @staticmethod
+    def _get_width():
+        cpus = os.cpu_count()
+        assert cpus is not None
+        return cpus // 8 if cpus >= 16 else 2
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.add_argument(
+            "-t",
+            "--output_time",  # FIXME: --time
+            help="(obsolete, time is always logged now) output the time it took for every step",
+            action="store_true",
+        )
+        self.add_argument(
+            "-d",
+            "--debug",
+            type=int,
+            help="configure debug level; defaults to 0",
+            default=0,
+        )
+        self.add_argument(
+            "--log_file",
+            help="configure where to write the log; defaults to stderr.",
+            default="",
+        )
+
+    def add_pash_args(self):
+        self.add_argument(
+            "-w",
+            "--width",
+            type=int,
+            default=self._get_width(),
+            help="set data-parallelism factor",
+        )
+        self.add_argument(
+            "--no_optimize",
+            help="not apply transformations over the DFG",
+            action="store_true",
+        )
+        self.add_argument(
+            "--dry_run_compiler",
+            help="not execute the compiled script, even if the compiler succeeded",
+            action="store_true",
+        )
+        self.add_argument(
+            "--assert_compiler_success",
+            help="assert that the compiler succeeded with no general error occuring",
+            action="store_true",
+        )
+        self.add_argument(
+            "--assert_all_regions_parallelizable",
+            help="assert that the compiler succeeded with all regions being parallelizable and no general error occuring (used to make tests more robust); more strict than --assert_compiler_success flag",
+            action="store_true",
+        )
+        self.add_argument(
+            "--avoid_pash_runtime_completion",
+            help="avoid the pash_runtime execution completion (only relevant when --debug > 0)",
+            action="store_true",
+        )
+        self.add_argument(
+            "-p",
+            "--output_optimized",  # FIXME: --print
+            help="output the parallel shell script for inspection",
+            action="store_true",
+        )
+        self.add_argument(
+            "--graphviz",
+            help="generates graphical representations of the dataflow graphs. The option argument corresponds to the format. PaSh stores them in a timestamped directory in the argument of --graphviz_dir",
+            choices=["no", "dot", "svg", "pdf", "png"],
+            default="no",
+        )
+        ## TODO: To discuss: Do we maybe want to have graphviz to always be included
+        ##       in the temp directory (under a graphviz subdirectory) instead of in its own?
+        ##   kk: I think that ideally we want a log-directory where we can put logs, graphviz,
+        ##       and other observability and monitoring info (instead of putting them in the temp).
+        self.add_argument(
+            "--graphviz_dir",
+            help="the directory in which to store graphical representations",
+            default="/tmp",
+        )
+        self.add_argument(
+            "--no_parallel_pipelines",
+            help="Disable parallel running of independent pipelines",
+            action="store_true",
+            default=False,
+        )
+        self.add_argument(
+            "--parallel_pipelines_limit",
+            help="Maximum number of parallel independent pipelines",
+            type=int,
+            default=2,
+        )
+        self.add_argument(
+            "--r_split_batch_size",
+            type=int,
+            help="configure the batch size of r_split (default: 1MB)",
+            default=1000000,
+        )
+        self.add_argument(
+            "--config_path",
+            help="determines the config file path. By default it is 'PASH_TOP/compiler/config.yaml'.",
+            default="",
+        )
+        self.add_argument(
+            "--version",
+            action="version",
+            version="%(prog)s {version}".format(
+                version="0.12.2"
+            ),  # What does this version mean?
+        )
+
+        self.add_experimental_args()
+        self.add_obsolete_args()
+
+    def add_obsolete_args(self):
+        self.add_argument(
+            "--no_daemon",
+            help="(obsolete) does nothing -- Run the compiler everytime we need a compilation instead of using the daemon",
+            action="store_true",
+            default=False,
+        )
+        self.add_argument(
+            "--parallel_pipelines",
+            help="(obsolete) Run multiple pipelines in parallel if they are safe to run. Now true by default. See --no_parallel_pipelines.",
+            action="store_true",
+            default=True,
+        )
+        self.add_argument(
+            "--r_split",
+            help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)",
+            action="store_true",
+        )
+        self.add_argument(
+            "--dgsh_tee",
+            help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)",
+            action="store_true",
+        )
+        self.add_argument(
+            "--speculation",
+            help="(obsolete) does nothing -- run the original script during compilation; if compilation succeeds, abort the original and run only the parallel (quick_abort) (Default: no_spec)",
+            choices=["no_spec", "quick_abort"],
+            default="no_spec",
+        )
+
+    def add_experimental_args(self):
+        self.add_argument(
+            "--no_eager",
+            help="(experimental) disable eager nodes before merging nodes",
+            action="store_true",
+        )
+        self.add_argument(
+            "--profile_driven",
+            help="(experimental) use profiling information when optimizing",
+            action="store_true",
+        )
+        self.add_argument(
+            "--speculative",
+            help="(experimental) use the speculative execution preprocessing and runtime (NOTE: this has nothing to do with --speculation, which is actually misnamed, and should be named concurrent compilation/execution and is now obsolete)",
+            action="store_true",
+            default=False,
+        )
+        self.add_argument(
+            "--termination",
+            help="(experimental) determine the termination behavior of the DFG. Defaults to cleanup after the last process dies, but can drain all streams until depletion",
+            choices=["clean_up_graph", "drain_stream"],
+            default="clean_up_graph",
+        )
+        self.add_argument(
+            "--daemon_communicates_through_unix_pipes",
+            help="(experimental) the daemon communicates through unix pipes instead of sockets",
+            action="store_true",
+        )
+        self.add_argument(
+            "--distributed_exec",
+            help="(experimental) execute the script in a distributed environment. Remote machines should be configured and ready",
+            action="store_true",
+            default=False,
+        )
+
+
+class RunnerParser(BaseParser):
+    """
+    Parser for the PaSh Runner in compiler/pash.py
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.add_pash_args()
+
+        self.add_argument(
+            "input",
+            nargs="*",
+            help="the script to be compiled and executed (followed by any command-line arguments",
+        )
+        self.add_argument(
+            "--preprocess_only",
+            help="only preprocess the input script and not execute it",
+            action="store_true",
+        )
+        self.add_argument(
+            "--output_preprocessed",
+            help=" output the preprocessed script",
+            action="store_true",
+        )
+        self.add_argument(
+            "--interactive",
+            help="Executes the script using an interactive internal shell session (experimental)",
+            action="store_true",
+        )
+        self.add_argument(
+            "-c",
+            "--command",
+            help="Evaluate the following as a script, rather than a file",
+            default=None,
+        )
+        ## This is not the correct way to parse these, because more than one option can be given together, e.g., -ae
+        self.add_argument(
+            "-a",
+            help="Enabling the `allexport` shell option",
+            action="store_true",
+            default=False,
+        )
+        self.add_argument(
+            "+a",
+            help="Disabling the `allexport` shell option",
+            action="store_false",
+            default=False,
+        )
+        ## These two are here for compatibility with respect to bash
+        self.add_argument(
+            "-v",
+            help="(experimental) prints shell input lines as they are read",
+            action="store_true",
+        )
+        self.add_argument(
+            "-x",
+            help="(experimental) prints commands and their arguments as they execute",
+            action="store_true",
+        )
+        self.set_defaults(preprocess_mode="pash")
+
+
+class CompilerParser(BaseParser):
+    """
+    Parser for the PaSh compiler in compiler/pash_compiler.py
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.add_pash_args()
+
+        self.add_argument(
+            "compiled_script_file",
+            help="the file in which to output the compiled script",
+        )
+        self.add_argument(
+            "input_ir",
+            help="the file containing the dataflow graph to be optimized and executed",
+        )
+        self.add_argument(
+            "--var_file",
+            help="determines the path of a file containing all shell variables.",
+            default=None,
+        )
+
+
+class PreprocessorParser(BaseParser):
+    """
+    Parser for the preprocessor in compiler/preprocessor/preprocessor.py
+    Generates two subparsers
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        subparser = self.add_subparsers(help="sub-command help")
+        self.add_pash_subparser(subparser)
+        self.add_spec_subparser(subparser)
+
+    @staticmethod
+    def add_pash_subparser(subparser):
+        parser_pash = subparser.add_parser(
+            "pash", help="Preprocess the script so that it can be run with PaSh"
+        )
+        parser_pash.add_pash_args()
+        parser_pash.add_argument("input", help="the script to be preprocessed")
+        parser_pash.set_defaults(preprocess_mode="pash")
+
+    @staticmethod
+    def add_spec_subparser(subparser):
+        # create the parser for the "b" command
+        parser_spec = subparser.add_parser(
+            "spec", help="Preprocess the script so that it can be run with speculation"
+        )
+        parser_spec.add_argument("input", help="the script to be preprocessed")
+
+        ## TODO: When we better integrate, this should be automatically set.
+        parser_spec.add_argument(
+            "partial_order_file",
+            help="the file to store the partial order (currently just a sequence)",
+        )
+        parser_spec.set_defaults(preprocess_mode="spec")
diff --git a/compiler/config.py b/compiler/config.py
index c6a9c662b..e6a8fee07 100644
--- a/compiler/config.py
+++ b/compiler/config.py
@@ -2,30 +2,44 @@
 import logging
 import os
 import subprocess
-import math
 
 from util import *
 
+
 ## Global
-__version__ = "0.12.2" # FIXME add libdash version
-GIT_TOP_CMD = [ 'git', 'rev-parse', '--show-toplevel', '--show-superproject-working-tree']
-if 'PASH_TOP' in os.environ:
-    PASH_TOP = os.environ['PASH_TOP']
+__version__ = "0.12.2"  # FIXME add libdash version
+GIT_TOP_CMD = [
+    "git",
+    "rev-parse",
+    "--show-toplevel",
+    "--show-superproject-working-tree",
+]
+if "PASH_TOP" in os.environ:
+    PASH_TOP = os.environ["PASH_TOP"]
 else:
-    PASH_TOP = subprocess.run(GIT_TOP_CMD, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True).stdout.rstrip()
+    PASH_TOP = subprocess.run(
+        GIT_TOP_CMD,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        universal_newlines=True,
+    ).stdout.rstrip()
 
 PYTHON_VERSION = "python3"
 PLANNER_EXECUTABLE = os.path.join(PASH_TOP, "compiler/pash_compiler.py")
 RUNTIME_EXECUTABLE = os.path.join(PASH_TOP, "compiler/pash_runtime.sh")
 SAVE_ARGS_EXECUTABLE = os.path.join(PASH_TOP, "runtime/save_args.sh")
-SAVE_SHELL_STATE_EXECUTABLE = os.path.join(PASH_TOP, "compiler/orchestrator_runtime/save_shell_state.sh")
+SAVE_SHELL_STATE_EXECUTABLE = os.path.join(
+    PASH_TOP, "compiler/orchestrator_runtime/save_shell_state.sh"
+)
 
 ## Ensure that PASH_TMP_PREFIX is set by pa.sh
-assert(not os.getenv('PASH_TMP_PREFIX') is None)
-PASH_TMP_PREFIX = os.getenv('PASH_TMP_PREFIX')
+assert not os.getenv("PASH_TMP_PREFIX") is None
+PASH_TMP_PREFIX = os.getenv("PASH_TMP_PREFIX")
 
 SOCKET_BUF_SIZE = 8192
 
+BASH_VERSION = tuple(int(i) for i in os.getenv("PASH_BASH_VERSION").split(" "))
+
 
 ##
 ## Global configuration used by all pash components
@@ -49,7 +63,6 @@
 def set_config_globals_from_pash_args(given_pash_args):
     global pash_args, OUTPUT_TIME, DEBUG_LEVEL, LOG_FILE
     pash_args = given_pash_args
-    OUTPUT_TIME = pash_args.output_time
     DEBUG_LEVEL = pash_args.debug
     LOG_FILE = pash_args.log_file
 
@@ -60,9 +73,11 @@ def set_config_globals_from_pash_args(given_pash_args):
     if given_pash_args.log_file == "":
         logging.basicConfig(format="%(message)s")
     else:
-        logging.basicConfig(format="%(message)s", 
-                            filename=f"{os.path.abspath(given_pash_args.log_file)}", 
-                            filemode="w")
+        logging.basicConfig(
+            format="%(message)s",
+            filename=f"{os.path.abspath(given_pash_args.log_file)}",
+            filemode="w",
+        )
 
     # Set debug level
     if given_pash_args.debug == 1:
@@ -70,163 +85,67 @@ def set_config_globals_from_pash_args(given_pash_args):
     elif given_pash_args.debug >= 2:
         logging.getLogger().setLevel(logging.DEBUG)
 
+
 ## Increase the recursion limit (it seems that the parser/unparser needs it for bigger graphs)
 sys.setrecursionlimit(10000)
 
+
 def load_config(config_file_path=""):
     global config
     pash_config = {}
-    CONFIG_KEY = 'distr_planner'
+    CONFIG_KEY = "distr_planner"
 
-    if(config_file_path == ""):
-      config_file_path = '{}/compiler/config.json'.format(PASH_TOP)
+    if config_file_path == "":
+        config_file_path = "{}/compiler/config.json".format(PASH_TOP)
     with open(config_file_path) as config_file:
         pash_config = json.load(config_file)
 
     if not pash_config:
-        raise Exception('No valid configuration could be loaded from {}'.format(config_file_path))
+        raise Exception(
+            "No valid configuration could be loaded from {}".format(config_file_path)
+        )
 
     if CONFIG_KEY not in pash_config:
-        raise Exception('Missing `{}` config in {}'.format(CONFIG_KEY, config_file_path))
+        raise Exception(
+            "Missing `{}` config in {}".format(CONFIG_KEY, config_file_path)
+        )
 
     config = pash_config
 
-def getWidth():
-    cpus = os.cpu_count()
-    return math.floor(cpus / 8) if cpus >= 16 else 2
-
-def add_general_config_arguments(parser):
-    ## TODO: Delete that at some point, or make it have a different use (e.g., outputting time even without -d 1).
-    parser.add_argument("-t", "--output_time", #FIXME: --time
-                        help="(obsolete, time is always logged now) output the time it took for every step",
-                        action="store_true")
-    parser.add_argument("-d", "--debug",
-                        type=int,
-                        help="configure debug level; defaults to 0",
-                        default=0)
-    parser.add_argument("--log_file",
-                        help="configure where to write the log; defaults to stderr.",
-                        default="")
-
-## These are arguments that are common to pash.py and pash_compiler.py
-def add_common_arguments(parser):
-    add_general_config_arguments(parser)
-
-    parser.add_argument("-w", "--width",
-                        type=int,
-                        default=getWidth(),
-                        help="set data-parallelism factor")
-    parser.add_argument("--no_optimize",
-                        help="not apply transformations over the DFG",
-                        action="store_true")
-    parser.add_argument("--dry_run_compiler",
-                        help="not execute the compiled script, even if the compiler succeeded",
-                        action="store_true")
-    parser.add_argument("--assert_compiler_success",
-                        help="assert that the compiler succeeded (used to make tests more robust)",
-                        action="store_true")
-    parser.add_argument("--avoid_pash_runtime_completion",
-                        help="avoid the pash_runtime execution completion (only relevant when --debug > 0)",
-                        action="store_true")
-    parser.add_argument("--profile_driven",
-                        help="(experimental) use profiling information when optimizing",
-                        action="store_true")
-    parser.add_argument("-p", "--output_optimized", # FIXME: --print
-                        help="output the parallel shell script for inspection",
-                        action="store_true")
-    parser.add_argument("--graphviz",
-                        help="generates graphical representations of the dataflow graphs. The option argument corresponds to the format. PaSh stores them in a timestamped directory in the argument of --graphviz_dir",
-                        choices=["no", "dot", "svg", "pdf", "png"],
-                        default="no")
-    ## TODO: To discuss: Do we maybe want to have graphviz to always be included 
-    ##       in the temp directory (under a graphviz subdirectory) instead of in its own?
-    ##   kk: I think that ideally we want a log-directory where we can put logs, graphviz, 
-    ##       and other observability and monitoring info (instead of putting them in the temp).
-    parser.add_argument("--graphviz_dir",
-                        help="the directory in which to store graphical representations",
-                        default="/tmp")
-    parser.add_argument("--no_eager",
-                        help="(experimental) disable eager nodes before merging nodes",
-                        action="store_true")
-    parser.add_argument("--no_daemon",
-                        help="(obsolete) does nothing -- Run the compiler everytime we need a compilation instead of using the daemon",
-                        action="store_true",
-                        default=False)
-    parser.add_argument("--parallel_pipelines",
-                        help="Run multiple pipelines in parallel if they are safe to run",
-                        action="store_true",
-                        default=False)
-    parser.add_argument("--r_split_batch_size",
-                        type=int,
-                        help="configure the batch size of r_split (default: 1MB)",
-                        default=1000000)
-    parser.add_argument("--r_split",
-                        help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)",
-                        action="store_true")
-    parser.add_argument("--dgsh_tee",
-                        help="(obsolete) does nothing -- only here for old interfaces (not used anywhere in the code)",
-                        action="store_true")
-    parser.add_argument("--speculative",
-                        help="(experimental) use the speculative execution preprocessing and runtime (NOTE: this has nothing to do with --speculation, which is actually misnamed, and should be named concurrent compilation/execution and is now obsolete)",
-                        action="store_true",
-                        default=False)
-    ## This is misnamed, it should be named concurrent compilation/execution
-    parser.add_argument("--speculation",
-                        help="(obsolete) does nothing -- run the original script during compilation; if compilation succeeds, abort the original and run only the parallel (quick_abort) (Default: no_spec)",
-                        choices=['no_spec', 'quick_abort'],
-                        default='no_spec')
-    parser.add_argument("--termination",
-                        help="(experimental) determine the termination behavior of the DFG. Defaults to cleanup after the last process dies, but can drain all streams until depletion",
-                        choices=['clean_up_graph', 'drain_stream'],
-                        default="clean_up_graph")
-    parser.add_argument("--daemon_communicates_through_unix_pipes",
-                        help="(experimental) the daemon communicates through unix pipes instead of sockets",
-                        action="store_true")
-    parser.add_argument("--distributed_exec",
-                        help="(experimental) execute the script in a distributed environment. Remote machines should be configured and ready",
-                        action="store_true",
-                        default=False)
-    parser.add_argument("--config_path",
-                        help="determines the config file path. By default it is 'PASH_TOP/compiler/config.yaml'.",
-                        default="")
-    parser.add_argument("--version",
-            action='version',
-            version='%(prog)s {version}'.format(version=__version__))
-    return
 
 def pass_common_arguments(pash_arguments):
     arguments = []
-    if (pash_arguments.no_optimize):
+    if pash_arguments.no_optimize:
         arguments.append("--no_optimize")
-    if (pash_arguments.dry_run_compiler):
+    if pash_arguments.dry_run_compiler:
         arguments.append("--dry_run_compiler")
-    if (pash_arguments.assert_compiler_success):
+    if pash_arguments.assert_compiler_success:
         arguments.append("--assert_compiler_success")
-    if (pash_arguments.avoid_pash_runtime_completion):
+    if pash_arguments.avoid_pash_runtime_completion:
         arguments.append("--avoid_pash_runtime_completion")
-    if (pash_arguments.profile_driven):
+    if pash_arguments.profile_driven:
         arguments.append("--profile_driven")
-    if (pash_arguments.output_time):
-        arguments.append("--output_time")
-    if (pash_arguments.output_optimized):
+    if pash_arguments.output_optimized:
         arguments.append("--output_optimized")
     arguments.append("--graphviz")
     arguments.append(pash_arguments.graphviz)
     arguments.append("--graphviz_dir")
     arguments.append(pash_arguments.graphviz_dir)
-    if(not pash_arguments.log_file == ""):
+    if not pash_arguments.log_file == "":
         arguments.append("--log_file")
         arguments.append(pash_arguments.log_file)
-    if (pash_arguments.no_eager):
+    if pash_arguments.no_eager:
         arguments.append("--no_eager")
-    if (pash_arguments.distributed_exec):
+    if pash_arguments.distributed_exec:
         arguments.append("--distributed_exec")
-    if (pash_arguments.speculative):
+    if pash_arguments.speculative:
         arguments.append("--speculative")
-    if (pash_arguments.parallel_pipelines):
-        arguments.append("--parallel_pipelines")
-    if (pash_arguments.daemon_communicates_through_unix_pipes):
+    if pash_arguments.no_parallel_pipelines:
+        arguments.append("--no_parallel_pipelines")
+    if pash_arguments.daemon_communicates_through_unix_pipes:
         arguments.append("--daemon_communicates_through_unix_pipes")
+    arguments.append("--parallel_pipelines_limit")
+    arguments.append(str(pash_arguments.parallel_pipelines_limit))
     arguments.append("--r_split_batch_size")
     arguments.append(str(pash_arguments.r_split_batch_size))
     arguments.append("--debug")
@@ -235,14 +154,15 @@ def pass_common_arguments(pash_arguments):
     arguments.append(pash_arguments.termination)
     arguments.append("--width")
     arguments.append(str(pash_arguments.width))
-    if(not pash_arguments.config_path == ""):
+    if not pash_arguments.config_path == "":
         arguments.append("--config_path")
         arguments.append(pash_arguments.config_path)
     return arguments
 
+
 def init_log_file():
     global LOG_FILE
-    if(not LOG_FILE == ""):
+    if not LOG_FILE == "":
         with open(LOG_FILE, "w") as f:
             pass
 
@@ -251,7 +171,8 @@ def init_log_file():
 ## Set the shell variables
 ##
 
+
 def set_vars_file(var_file_path: str, var_dict: dict):
-    global config    
-    config['shell_variables'] = var_dict
-    config['shell_variables_file_path'] = var_file_path
+    global config
+    config["shell_variables"] = var_dict
+    config["shell_variables_file_path"] = var_file_path
diff --git a/compiler/custom_error.py b/compiler/custom_error.py
new file mode 100644
index 000000000..4b2e42444
--- /dev/null
+++ b/compiler/custom_error.py
@@ -0,0 +1,12 @@
+class UnparallelizableError(Exception):
+    pass
+
+class AdjLineNotImplementedError(Exception):
+    pass
+
+# to be raised in pash_compiler if a UnparallelizableError is caught at any point running the compiler
+#   primarily to differentiate 
+#       --assert_compiler_success (exit with error only under general exceptions caught) 
+#       --assert_all_regions_parallelizable (exit with error when regions are found not parallelizable + general exceptions)
+class NotAllRegionParallelizableError(Exception): 
+    pass 
\ No newline at end of file
diff --git a/compiler/definitions/ir/aggregator_node.py b/compiler/definitions/ir/aggregator_node.py
index 125ce46db..a99f1e7b5 100644
--- a/compiler/definitions/ir/aggregator_node.py
+++ b/compiler/definitions/ir/aggregator_node.py
@@ -1,54 +1,80 @@
 from definitions.ir.dfg_node import *
+
 # from definitions.ir.nodes.arg import Arg
-from annotations_utils.util_cmd_invocations import get_command_invocation_prefix_from_dfg_node
+from annotations_utils.util_cmd_invocations import (
+    get_command_invocation_prefix_from_dfg_node,
+)
 
 
 ## This class corresponds to a generic n-ary aggregator
 ##
 ## TODO: Do we need to do anything special for binary aggregators?
 class MapperAggregatorNode(DFGNode):
-    def __init__(self, old_node, input_ids, output_ids, name_string, new_options, flag_option_list):
-
+    def __init__(
+        self,
+        old_node,
+        input_ids,
+        output_ids,
+        name_string,
+        new_options,
+        flag_option_list,
+    ):
         ## The name of the aggregator command
         name = Arg.string_to_arg(name_string)
 
         ## TODO: The category should also be acquired through annotations (and maybe should be asserted to be at most pure)
-        com_category="pure"
+        com_category = "pure"
 
         ## TODO: Not sure if redirections need to be copied to new function.
         com_redirs = [redir.to_ast() for redir in old_node.com_redirs]
-        super().__init__(input_ids,
-                         output_ids, 
-                         name,
-                         com_category,
-                         com_options=new_options, # changed that all are already in there and not appended
-                         flag_option_list=flag_option_list,
-                         com_redirs=com_redirs, 
-                         com_assignments=old_node.com_assignments)
+        super().__init__(
+            input_ids,
+            output_ids,
+            name,
+            com_category,
+            com_options=new_options,  # changed that all are already in there and not appended
+            flag_option_list=flag_option_list,
+            com_redirs=com_redirs,
+            com_assignments=old_node.com_assignments,
+        )
 
 
 class AggregatorNode(MapperAggregatorNode):
     def __init__(self, old_node, input_ids, output_ids):
-
         used_parallelizer = old_node.get_used_parallelizer()
         cmd_inv_pref = get_command_invocation_prefix_from_dfg_node(old_node)
         used_aggregator = used_parallelizer.get_actual_aggregator(cmd_inv_pref)
-        log(f'used_agg: {used_aggregator}')
-        log(f'old_node: {old_node}')
+        log(f"used_agg: {used_aggregator}")
+        log(f"old_node: {old_node}")
 
         ## Check if an aggregator can be instantiated from the node
-        if(used_aggregator is None):
-            log("Error: Node:", old_node, "does not contain information to instantiate an aggregator!")
-            raise Exception('No information to instantiate aggregator')
+        if used_aggregator is None:
+            log(
+                "Error: Node:",
+                old_node,
+                "does not contain information to instantiate an aggregator!",
+            )
+            raise Exception("No information to instantiate aggregator")
 
         ## The name of the aggregator command
         agg_name_string = used_aggregator.cmd_name
-        all_options_incl_new = [Arg.string_to_arg(el.get_name()) for el in used_aggregator.flag_option_list + used_aggregator.positional_config_list]
+        all_options_incl_new = [
+            Arg.string_to_arg(el.get_name())
+            for el in used_aggregator.flag_option_list
+            + used_aggregator.positional_config_list
+        ]
         # TODO: zip is nicer
-        all_options_incl_new_right_format = [(i, all_options_incl_new[i]) for i in range(len(all_options_incl_new))]
+        all_options_incl_new_right_format = [
+            (i, all_options_incl_new[i]) for i in range(len(all_options_incl_new))
+        ]
 
-        super().__init__(old_node, input_ids, output_ids, agg_name_string, all_options_incl_new_right_format,
-                         flag_option_list=used_aggregator.flag_option_list)
+        super().__init__(
+            old_node,
+            input_ids,
+            output_ids,
+            agg_name_string,
+            all_options_incl_new_right_format,
+            flag_option_list=used_aggregator.flag_option_list,
+        )
 
         log("Generic Aggregator Created:", self)
-
diff --git a/compiler/definitions/ir/arg.py b/compiler/definitions/ir/arg.py
index 41fcafc6a..9cf83037b 100644
--- a/compiler/definitions/ir/arg.py
+++ b/compiler/definitions/ir/arg.py
@@ -3,37 +3,43 @@
 from shell_ast.ast_util import *
 from util import *
 
+
 class Arg:
     arg_char_list: "list[ArgChar]"
 
     def __init__(self, arg_char_list: "list[ArgChar]"):
-        assert(not isinstance(arg_char_list, Arg))
+        assert not isinstance(arg_char_list, Arg)
         for arg_char in arg_char_list:
-            assert(isinstance(arg_char, ArgChar))
+            assert isinstance(arg_char, ArgChar)
         self.arg_char_list = arg_char_list
 
     def __repr__(self):
         return format_arg_chars(self.arg_char_list)
 
     def __eq__(self, other):
-        if(isinstance(other, Arg)):
+        if isinstance(other, Arg):
             return self.arg_char_list == other.arg_char_list
-        log("Warning: Comparing Arg:", self, "with a non Arg argument:", other, "of type:", type(other))
+        log(
+            "Warning: Comparing Arg:",
+            self,
+            "with a non Arg argument:",
+            other,
+            "of type:",
+            type(other),
+        )
         return False
 
     def opt_serialize(self):
         return self.__repr__()
-    
+
     def to_ast(self):
         return self.arg_char_list
 
     def concatenate(self, other):
-        space = [CArgChar(32)] # space
+        space = [CArgChar(32)]  # space
         self.arg_char_list.extend(space)
         self.arg_char_list.extend(other.arg_char_list)
 
     @staticmethod
     def string_to_arg(string: str) -> Arg:
         return Arg(string_to_carg_char_list(string))
-
-
diff --git a/compiler/definitions/ir/dfg_node.py b/compiler/definitions/ir/dfg_node.py
index 7259a29af..927f63471 100644
--- a/compiler/definitions/ir/dfg_node.py
+++ b/compiler/definitions/ir/dfg_node.py
@@ -2,9 +2,16 @@
 from definitions.ir.redirection import *
 from definitions.ir.resource import *
 
-from annotations_utils.util_cmd_invocations import to_node_cmd_inv_with_io_vars, construct_property_container_from_list_of_properties
+from annotations_utils.util_cmd_invocations import (
+    to_node_cmd_inv_with_io_vars,
+    construct_property_container_from_list_of_properties,
+)
+
+from util import (
+    return_empty_list_if_none_else_itself,
+    return_default_if_none_else_itself,
+)
 
-from util import return_empty_list_if_none_else_itself, return_default_if_none_else_itself
 
 ## Assumption: Everything related to a DFGNode must be already expanded.
 ## TODO: Ensure that this is true with assertions
@@ -17,14 +24,16 @@ class DFGNode:
     ## com_assignments : list of assignments
     ## parallelizer_list : list of parallelizers for this DFGNode
     ## cmd_related_properties : dict to store properties like commutativity
-    def __init__(self,
-                 cmd_invocation_with_io_vars,
-                 com_redirs = [],
-                 com_assignments=[],
-                 parallelizer_list=None,
-                 cmd_related_properties=None,
-                 ):
-        # TODO []: default parameters!
+    def __init__(
+        self,
+        cmd_invocation_with_io_vars,
+        com_redirs=None,
+        com_assignments=None,
+        parallelizer_list=None,
+        cmd_related_properties=None,
+    ):
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
 
         ## @KK: can this be deleted? Was there another id in the member attributes before?
         ## Add a unique identifier to each DFGNode since id() is not guaranteed to be unique for objects that have different lifetimes.
@@ -34,9 +43,15 @@ def __init__(self,
 
         self.com_redirs = [Redirection(redirection) for redirection in com_redirs]
         self.com_assignments = com_assignments
-        self.parallelizer_list = return_empty_list_if_none_else_itself(parallelizer_list)
-        default_cmd_properties = construct_property_container_from_list_of_properties([])
-        self.cmd_related_properties = return_default_if_none_else_itself(cmd_related_properties, default_cmd_properties)
+        self.parallelizer_list = return_empty_list_if_none_else_itself(
+            parallelizer_list
+        )
+        default_cmd_properties = construct_property_container_from_list_of_properties(
+            []
+        )
+        self.cmd_related_properties = return_default_if_none_else_itself(
+            cmd_related_properties, default_cmd_properties
+        )
         self.cmd_invocation_with_io_vars = cmd_invocation_with_io_vars
         # log("Node created:", self.id, self)
 
@@ -57,7 +72,6 @@ def get_dot_label(self) -> str:
         basename = os.path.basename(str(name))
         return basename
 
-
     def get_id(self):
         return self.id
 
@@ -84,20 +98,19 @@ def get_configuration_inputs(self):
         return inputs.get_config_inputs()
 
     def is_commutative(self):
-        val = self.cmd_related_properties.get_property_value('is_commutative')
+        val = self.cmd_related_properties.get_property_value("is_commutative")
         if val is not None:
             return val
         else:
             return False
 
-
     ## Auxiliary method that returns any necessary redirections,
     ##   at the moment it doesn't look necessary.
     def _to_ast_aux_get_redirs(self):
         ## still used in to_ast
         ## TODO: Properly handle redirections
         ##
-        ## TODO: If one of the redirected outputs or inputs is changed in the IR 
+        ## TODO: If one of the redirected outputs or inputs is changed in the IR
         ##       (e.g. `cat < s1` was changed to read from an ephemeral file `cat < "#file5"`)
         ##       this needs to be changed in the redirections too. Maybe we can modify redirections
         ##       when replacing fid.
@@ -111,7 +124,6 @@ def _to_ast_aux_get_redirs(self):
         ## where we recreate arguments and redirections).
         return []
 
-
     ## TODO: Improve this function to be separately implemented for different special nodes,
     ##       such as cat, eager, split, etc...
     ## I do not think this separation is reasonable anymore since we remodelled nodes in a way that the back-translation is trivial
@@ -120,7 +132,7 @@ def _to_ast_aux_get_redirs(self):
     ##    hence assumes that non-streaming inputs/outputs will not change; with a special to_ast, we could circumvent this
     def to_ast(self, edges, drain_streams):
         ## TODO: We might not want to implement this at all actually
-        if (drain_streams):
+        if drain_streams:
             raise NotImplementedError()
         else:
             # commented since "see above"
@@ -132,7 +144,9 @@ def to_ast(self, edges, drain_streams):
             redirs = self._to_ast_aux_get_redirs()
             assignments = self.com_assignments
 
-            node = to_node_cmd_inv_with_io_vars(self.cmd_invocation_with_io_vars, edges, redirs, assignments)
+            node = to_node_cmd_inv_with_io_vars(
+                self.cmd_invocation_with_io_vars, edges, redirs, assignments
+            )
             # TODO: think about redirections
             # old code for this:
             # rest_argument_fids, new_redirs = create_command_arguments_redirs(com_name_ast,
@@ -157,30 +171,34 @@ def apply_redirections(self, edges):
         unhandled_redirs = []
         for redirection in self.com_redirs:
             ## Handle To redirections that have to do with stdout
-            if (redirection.is_to_file() and redirection.is_for_stdout()):
+            if redirection.is_to_file() and redirection.is_for_stdout():
                 # log(redirection)
                 file_resource = FileResource(redirection.file_arg)
                 success = False
                 for i in range(len(self.get_output_list())):
                     output_edge_id = self.get_output_list()[i]
                     output_fid = edges[output_edge_id][0]
-                    if(output_fid.has_file_descriptor_resource()
-                       and output_fid.resource.is_stdout()):
+                    if (
+                        output_fid.has_file_descriptor_resource()
+                        and output_fid.resource.is_stdout()
+                    ):
                         success = True
                         edges[output_edge_id][0].set_resource(file_resource)
                         # self.outputs[i].set_resource(file_resource)
-                assert(success)
-            elif (redirection.is_from_file() and redirection.is_for_stdin()):
+                assert success
+            elif redirection.is_from_file() and redirection.is_for_stdin():
                 # log(redirection)
                 file_resource = FileResource(redirection.file_arg)
                 success = False
                 for input_edge_id in self.get_input_list():
                     input_fid = edges[input_edge_id][0]
-                    if(input_fid.has_file_descriptor_resource()
-                       and input_fid.resource.is_stdin()):
+                    if (
+                        input_fid.has_file_descriptor_resource()
+                        and input_fid.resource.is_stdin()
+                    ):
                         success = True
                         edges[input_edge_id][0].set_resource(file_resource)
-                assert(success)
+                assert success
             else:
                 log("Warning -- Unhandled redirection:", redirection)
                 unhandled_redirs.append(redirection)
@@ -188,7 +206,6 @@ def apply_redirections(self, edges):
                 ##       Does it make any sense to keep them and have them in the Final AST.
                 raise NotImplementedError()
 
-
     ## This renames the from_id (wherever it exists in inputs or outputs)
     ## to the to_id.
     ##
@@ -202,7 +219,7 @@ def replace_edge(self, from_id, to_id):
     def replace_edge_in_list(self, edge_ids, from_id, to_id):
         new_edge_ids = []
         for id in edge_ids:
-            if(id == from_id):
+            if id == from_id:
                 new_edge_id = to_id
             else:
                 new_edge_id = id
@@ -212,22 +229,30 @@ def replace_edge_in_list(self, edge_ids, from_id, to_id):
     def get_option_implemented_round_robin_parallelizer(self):
         for parallelizer in self.parallelizer_list:
             splitter = parallelizer.get_splitter()
-            if splitter.is_splitter_round_robin() and parallelizer.are_all_parts_implemented():
+            if (
+                splitter.is_splitter_round_robin()
+                and parallelizer.are_all_parts_implemented()
+            ):
                 return parallelizer
         return None
 
     def get_option_implemented_round_robin_with_unwrap_parallelizer(self):
         for parallelizer in self.parallelizer_list:
             splitter = parallelizer.get_splitter()
-            if splitter.is_splitter_round_robin_with_unwrap_flag() and parallelizer.are_all_parts_implemented():
+            if (
+                splitter.is_splitter_round_robin_with_unwrap_flag()
+                and parallelizer.are_all_parts_implemented()
+            ):
                 return parallelizer
         return None
 
-
     def get_option_implemented_consecutive_chunks_parallelizer(self):
         for parallelizer in self.parallelizer_list:
             splitter = parallelizer.get_splitter()
-            if splitter.is_splitter_consec_chunks() and parallelizer.are_all_parts_implemented():
+            if (
+                splitter.is_splitter_consec_chunks()
+                and parallelizer.are_all_parts_implemented()
+            ):
                 return parallelizer
         return None
 
@@ -235,13 +260,15 @@ def get_option_implemented_consecutive_chunks_parallelizer(self):
     def make_simple_dfg_node_from_cmd_inv_with_io_vars(cmd_inv_with_io_vars):
         return DFGNode(cmd_inv_with_io_vars)
 
-    def get_single_streaming_input_single_output_and_configuration_inputs_of_node_for_parallelization(self):
+    def get_single_streaming_input_single_output_and_configuration_inputs_of_node_for_parallelization(
+        self,
+    ):
         streaming_inputs = self.get_streaming_inputs()
-        assert (len(streaming_inputs) == 1)
+        assert len(streaming_inputs) == 1
         streaming_input = streaming_inputs[0]
         configuration_inputs = self.get_configuration_inputs()
-        assert (len(configuration_inputs) == 0)
+        assert len(configuration_inputs) == 0
         streaming_outputs = self.get_output_list()
-        assert (len(streaming_outputs) == 1)
+        assert len(streaming_outputs) == 1
         streaming_output = streaming_outputs[0]
         return streaming_input, streaming_output, configuration_inputs
diff --git a/compiler/definitions/ir/file_id.py b/compiler/definitions/ir/file_id.py
index ecee07ec0..e3d8eef99 100644
--- a/compiler/definitions/ir/file_id.py
+++ b/compiler/definitions/ir/file_id.py
@@ -7,6 +7,7 @@
 
 from definitions.ir.resource import *
 
+
 ## Note: The NULL ident is considered to be the default unknown file id
 ##
 ## TODO: WARNING: We have to make sure that a resource in our IR can
@@ -29,19 +30,19 @@ def __init__(self, ident, prefix="", resource=None):
         self.prefix = prefix
         ## TODO: Remove all union_find
         ## Initialize the parent
-        self.resource=resource
+        self.resource = resource
 
     def __repr__(self):
-        if(isinstance(self.resource, EphemeralResource)):
+        if isinstance(self.resource, EphemeralResource):
             output = self.get_fifo_suffix()
         else:
             output = "fid:{}:{}".format(self.ident, self.resource)
         return output
 
     def serialize(self):
-        if(isinstance(self.resource, TemporaryFileResource)):
+        if isinstance(self.resource, TemporaryFileResource):
             output = self.get_temporary_file_suffix()
-        elif(isinstance(self.resource, EphemeralResource)):
+        elif isinstance(self.resource, EphemeralResource):
             output = self.get_fifo_suffix()
         else:
             output = "{}".format(self.resource)
@@ -73,17 +74,17 @@ def to_ast(self, stdin_dash=False):
         ##       check if a file id refers to a pipe
         ##
         ## TODO: I am not sure about the FileDescriptor resource
-        if(isinstance(self.resource, TemporaryFileResource)):
+        if isinstance(self.resource, TemporaryFileResource):
             suffix = self.get_temporary_file_suffix()
             string = os.path.join(config.PASH_TMP_PREFIX, suffix)
             argument = string_to_argument(string)
-        elif(isinstance(self.resource, EphemeralResource)):
+        elif isinstance(self.resource, EphemeralResource):
             suffix = self.get_fifo_suffix()
-            string = os.path.join(config.PASH_TMP_PREFIX, suffix)     
+            string = os.path.join(config.PASH_TMP_PREFIX, suffix)
             ## Quote the argument
-            argument = [make_kv('Q', string_to_argument(string))]
-        elif(isinstance(self.resource, FileDescriptorResource)):
-            if (self.resource.is_stdin() and stdin_dash):
+            argument = [make_kv("Q", string_to_argument(string))]
+        elif isinstance(self.resource, FileDescriptorResource):
+            if self.resource.is_stdin() and stdin_dash:
                 argument = string_to_argument("-")
             else:
                 raise NotImplementedError()
@@ -97,7 +98,7 @@ def set_resource(self, resource):
         ## The file resource cannot be reset. A pointer can never point to
         ## more than one file resource. However, we can change an ephemeral
         ## resource or a file_descriptor resource.
-        assert(not self.has_file_resource())
+        assert not self.has_file_resource()
         self.resource = resource
 
     def get_resource(self):
@@ -105,19 +106,19 @@ def get_resource(self):
 
     ## Remove this
     def has_resource(self):
-        return (not self.resource is None)
+        return not self.resource is None
 
     def has_file_resource(self):
-        return (isinstance(self.resource, FileResource))
+        return isinstance(self.resource, FileResource)
 
     def has_file_descriptor_resource(self):
-        return (isinstance(self.resource, FileDescriptorResource))
+        return isinstance(self.resource, FileDescriptorResource)
 
     def has_remote_file_resource(self):
         return isinstance(self.resource, RemoteFileResource)
 
     def is_ephemeral(self):
-        return (isinstance(self.resource, EphemeralResource))
+        return isinstance(self.resource, EphemeralResource)
 
     def make_temporary_file(self):
         self.resource = TemporaryFileResource()
diff --git a/compiler/definitions/ir/nodes/cat.py b/compiler/definitions/ir/nodes/cat.py
index 675b3880b..ced0cb2b7 100644
--- a/compiler/definitions/ir/nodes/cat.py
+++ b/compiler/definitions/ir/nodes/cat.py
@@ -1,6 +1,11 @@
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 from definitions.ir.dfg_node import DFGNode
 
+
 def make_cat_node(inputs, output):
-    cmd_inv_cat = CommandInvocationWithIOVars.make_cat_command_invocation_with_io_vars(inputs, output)
+    cmd_inv_cat = CommandInvocationWithIOVars.make_cat_command_invocation_with_io_vars(
+        inputs, output
+    )
     return DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(cmd_inv_cat)
diff --git a/compiler/definitions/ir/nodes/dfs_split_reader.py b/compiler/definitions/ir/nodes/dfs_split_reader.py
index 63855e325..8c8835af9 100644
--- a/compiler/definitions/ir/nodes/dfs_split_reader.py
+++ b/compiler/definitions/ir/nodes/dfs_split_reader.py
@@ -1,28 +1,44 @@
 import os
 from definitions.ir.dfg_node import *
 
+
 class DFSSplitReader(DFGNode):
-    def __init__(self, inputs, outputs, com_name, com_category, 
-                 com_options = [], com_redirs = [], com_assignments=[]):
-        
-        super().__init__(inputs, outputs, com_name, com_category,
-                         com_options=com_options, 
-                         com_redirs=com_redirs, 
-                         com_assignments=com_assignments)
+    def __init__(
+        self,
+        inputs,
+        outputs,
+        com_name,
+        com_category,
+        com_options=None,
+        com_redirs=None,
+        com_assignments=None,
+    ):
+        com_options = [] if com_options is None else com_options
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+
+        super().__init__(
+            inputs,
+            outputs,
+            com_name,
+            com_category,
+            com_options=com_options,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+        )
+
+    def set_server_address(self, addr):  # ex addr: 127.0.0.1:50051
+        self.com_options.append((3, Arg.string_to_arg(f"--addr {addr}")))
 
-    def set_server_address(self, addr): # ex addr: 127.0.0.1:50051
-         self.com_options.append((3, Arg.string_to_arg(f"--addr {addr}")))
 
 def make_dfs_split_reader_node(inputs, output, split_num, prefix):
-    split_reader_bin = os.path.join(config.PASH_TOP, config.config['runtime']['dfs_split_reader_binary'])
+    split_reader_bin = os.path.join(
+        config.PASH_TOP, config.config["runtime"]["dfs_split_reader_binary"]
+    )
     com_name = Arg.string_to_arg(split_reader_bin)
     com_category = "pure"
     options = []
     options.append((1, Arg.string_to_arg(f"--prefix '{prefix}'")))
     options.append((2, Arg.string_to_arg(f"--split {split_num}")))
 
-    return DFSSplitReader(inputs,
-               [output],
-               com_name, 
-               com_category,
-               options)
+    return DFSSplitReader(inputs, [output], com_name, com_category, options)
diff --git a/compiler/definitions/ir/nodes/dgsh_tee.py b/compiler/definitions/ir/nodes/dgsh_tee.py
index 16bd5efff..bcee52fe3 100644
--- a/compiler/definitions/ir/nodes/dgsh_tee.py
+++ b/compiler/definitions/ir/nodes/dgsh_tee.py
@@ -1,32 +1,44 @@
 from pash_annotations.datatypes.AccessKind import make_stream_output, make_stream_input
 from pash_annotations.datatypes.BasicDatatypes import Flag, ArgStringType
 from pash_annotations.datatypes.BasicDatatypesWithIO import OptionWithIO
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
 from annotations_utils.util_cmd_invocations import to_ast_flagoption, to_ast_operand
 from definitions.ir.dfg_node import *
 
+
 class DGSHTee(DFGNode):
-    def __init__(self,
-                 cmd_invocation_with_io_vars,
-                 com_redirs=[], com_assignments=[]
-                 ):
-        # TODO []: default
-        super().__init__(cmd_invocation_with_io_vars,
-                         com_redirs=com_redirs,
-                         com_assignments=com_assignments)
+    def __init__(self, cmd_invocation_with_io_vars, com_redirs=None, com_assignments=None):
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+        super().__init__(
+            cmd_invocation_with_io_vars,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+        )
+
 
 def make_dgsh_tee_node(input_id, output_id):
-    dgsh_tee_bin = os.path.join(config.PASH_TOP, config.config['runtime']['dgsh_tee_binary'])
+    dgsh_tee_bin = os.path.join(
+        config.PASH_TOP, config.config["runtime"]["dgsh_tee_binary"]
+    )
 
-    access_map = {output_id: make_stream_output(),
-                  input_id: make_stream_input()}
+    access_map = {output_id: make_stream_output(), input_id: make_stream_input()}
 
-    flag_option_list = [OptionWithIO("-i", input_id),
-                        OptionWithIO("-o", output_id),
-                        Flag("-I"),
-                        Flag("-f"),
-                        OptionWithIO("-b", ArgStringType(Arg.string_to_arg(str(config.config['runtime']['dgsh_buffer_size']))))]
+    flag_option_list = [
+        OptionWithIO("-i", input_id),
+        OptionWithIO("-o", output_id),
+        Flag("-I"),
+        Flag("-f"),
+        OptionWithIO(
+            "-b",
+            ArgStringType(
+                Arg.string_to_arg(str(config.config["runtime"]["dgsh_buffer_size"]))
+            ),
+        ),
+    ]
 
     cmd_inv_with_io_vars = CommandInvocationWithIOVars(
         cmd_name=dgsh_tee_bin,
@@ -34,5 +46,6 @@ def make_dgsh_tee_node(input_id, output_id):
         operand_list=[],
         implicit_use_of_streaming_input=None,
         implicit_use_of_streaming_output=None,
-        access_map=access_map)
+        access_map=access_map,
+    )
     return DGSHTee(cmd_inv_with_io_vars)
diff --git a/compiler/definitions/ir/nodes/eager.py b/compiler/definitions/ir/nodes/eager.py
index 73643768b..a807173c9 100644
--- a/compiler/definitions/ir/nodes/eager.py
+++ b/compiler/definitions/ir/nodes/eager.py
@@ -1,31 +1,43 @@
-from pash_annotations.datatypes.AccessKind import AccessKind, make_stream_output, make_stream_input, make_other_output
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.AccessKind import (
+    AccessKind,
+    make_stream_output,
+    make_stream_input,
+    make_other_output,
+)
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
 from definitions.ir.dfg_node import *
 
+
 class Eager(DFGNode):
-    def __init__(self,
-                 cmd_invocation_with_io_vars,
-                 com_redirs=[], com_assignments=[]
-                 ):
-        # TODO []: default
-        super().__init__(cmd_invocation_with_io_vars,
-                         com_redirs=com_redirs,
-                         com_assignments=com_assignments)
+    def __init__(self, cmd_invocation_with_io_vars, com_redirs=None, com_assignments=None):
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+
+        super().__init__(
+            cmd_invocation_with_io_vars,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+        )
 
 
 def make_eager_node(input_id, output_id, intermediate_file_id, eager_exec_path):
     eager_name = eager_exec_path
     intermediate_file_id_id = intermediate_file_id.get_ident()
     operand_list = [input_id, output_id, intermediate_file_id_id]
-    access_map = {output_id: make_stream_output(),
-                  input_id: make_stream_input(),
-                  intermediate_file_id_id: make_other_output()}
+    access_map = {
+        output_id: make_stream_output(),
+        input_id: make_stream_input(),
+        intermediate_file_id_id: make_other_output(),
+    }
     cmd_inv_with_io_vars = CommandInvocationWithIOVars(
         cmd_name=eager_name,
         flag_option_list=[],
         operand_list=operand_list,
         implicit_use_of_streaming_input=None,
         implicit_use_of_streaming_output=None,
-        access_map=access_map)
+        access_map=access_map,
+    )
     return Eager(cmd_inv_with_io_vars)
diff --git a/compiler/definitions/ir/nodes/hdfs_cat.py b/compiler/definitions/ir/nodes/hdfs_cat.py
index 3fe81012c..d8aefe337 100644
--- a/compiler/definitions/ir/nodes/hdfs_cat.py
+++ b/compiler/definitions/ir/nodes/hdfs_cat.py
@@ -1,11 +1,29 @@
 from definitions.ir.dfg_node import *
 
+
 class HDFSCat(DFGNode):
-    def __init__(self, inputs, outputs, com_name, com_category,
-                 com_options = [], com_redirs = [], com_assignments=[]):
-        assert(str(com_name) == "hdfs")
-        assert(str(com_options[0][1]) == "dfs" and str(com_options[1][1]) == "-cat") 
-        super().__init__(inputs, outputs, com_name, com_category,
-                         com_options=com_options, 
-                         com_redirs=com_redirs, 
-                         com_assignments=com_assignments)
+    def __init__(
+        self,
+        inputs,
+        outputs,
+        com_name,
+        com_category,
+        com_options=None,
+        com_redirs=None,
+        com_assignments=None,
+    ):
+        com_options = [] if com_options is None else com_options
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+
+        assert str(com_name) == "hdfs"
+        assert str(com_options[0][1]) == "dfs" and str(com_options[1][1]) == "-cat"
+        super().__init__(
+            inputs,
+            outputs,
+            com_name,
+            com_category,
+            com_options=com_options,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+        )
diff --git a/compiler/definitions/ir/nodes/pash_split.py b/compiler/definitions/ir/nodes/pash_split.py
index 621334807..925ca9753 100644
--- a/compiler/definitions/ir/nodes/pash_split.py
+++ b/compiler/definitions/ir/nodes/pash_split.py
@@ -1,5 +1,7 @@
 from pash_annotations.datatypes.AccessKind import make_stream_input, make_stream_output
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
 from definitions.ir.file_id import *
 from definitions.ir.dfg_node import *
@@ -7,22 +9,31 @@
 import config
 import os
 
+
 class Split(DFGNode):
-    def __init__(self,
-                 cmd_invocation_with_io_vars,
-                 com_redirs=[],
-                 com_assignments=[],
-                 parallelizer_list=None,
-                 cmd_related_properties=None):
-        # TODO []: default arguments!
-        super().__init__(cmd_invocation_with_io_vars=cmd_invocation_with_io_vars,
-                         com_redirs=com_redirs,
-                         com_assignments=com_assignments,
-                         parallelizer_list=parallelizer_list,
-                         cmd_related_properties=cmd_related_properties)
+    def __init__(
+        self,
+        cmd_invocation_with_io_vars,
+        com_redirs=None,
+        com_assignments=None,
+        parallelizer_list=None,
+        cmd_related_properties=None,
+    ):
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+        super().__init__(
+            cmd_invocation_with_io_vars=cmd_invocation_with_io_vars,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+            parallelizer_list=parallelizer_list,
+            cmd_related_properties=cmd_related_properties,
+        )
+
 
 def make_split_file(input_id, out_ids):
-    auto_split_bin = os.path.join(config.PASH_TOP, config.config['runtime']['auto_split_binary'])
+    auto_split_bin = os.path.join(
+        config.PASH_TOP, config.config["runtime"]["auto_split_binary"]
+    )
     operand_list = [input_id]
     operand_list.extend(out_ids)
     access_map = {output_id: make_stream_output() for output_id in out_ids}
@@ -33,5 +44,6 @@ def make_split_file(input_id, out_ids):
         operand_list=operand_list,
         implicit_use_of_streaming_input=None,
         implicit_use_of_streaming_output=None,
-        access_map=access_map)
+        access_map=access_map,
+    )
     return Split(cmd_inv_with_io_vars)
diff --git a/compiler/definitions/ir/nodes/r_merge.py b/compiler/definitions/ir/nodes/r_merge.py
index 345c13e23..9c6f01b84 100644
--- a/compiler/definitions/ir/nodes/r_merge.py
+++ b/compiler/definitions/ir/nodes/r_merge.py
@@ -1,24 +1,35 @@
 from pash_annotations.datatypes.AccessKind import make_stream_input, make_stream_output
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
 from definitions.ir.dfg_node import *
 
+
 class RMerge(DFGNode):
-    def __init__(self,
-                 cmd_invocation_with_io_vars,
-                 com_redirs=[],
-                 com_assignments=[],
-                 parallelizer_list=None,
-                 cmd_related_properties=None):
-        # TODO []: default arguments!
-        super().__init__(cmd_invocation_with_io_vars=cmd_invocation_with_io_vars,
-                         com_redirs=com_redirs,
-                         com_assignments=com_assignments,
-                         parallelizer_list=parallelizer_list,
-                         cmd_related_properties=cmd_related_properties)
+    def __init__(
+        self,
+        cmd_invocation_with_io_vars,
+        com_redirs=None,
+        com_assignments=None,
+        parallelizer_list=None,
+        cmd_related_properties=None,
+    ):
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+        super().__init__(
+            cmd_invocation_with_io_vars=cmd_invocation_with_io_vars,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+            parallelizer_list=parallelizer_list,
+            cmd_related_properties=cmd_related_properties,
+        )
+
 
 def make_r_merge_node(inputs, output):
-    r_merge_bin = os.path.join(config.PASH_TOP, config.config['runtime']['r_merge_binary'])
+    r_merge_bin = os.path.join(
+        config.PASH_TOP, config.config["runtime"]["r_merge_binary"]
+    )
     # TODO: assume that the inputs and output is provided as operands
     access_map = {input_id: make_stream_input() for input_id in inputs}
     access_map[output] = make_stream_output()
@@ -28,5 +39,6 @@ def make_r_merge_node(inputs, output):
         operand_list=inputs,
         implicit_use_of_streaming_input=None,
         implicit_use_of_streaming_output=output,
-        access_map=access_map)
+        access_map=access_map,
+    )
     return RMerge(cmd_inv_with_io_vars)
diff --git a/compiler/definitions/ir/nodes/r_split.py b/compiler/definitions/ir/nodes/r_split.py
index aefce4b7c..92bed717f 100644
--- a/compiler/definitions/ir/nodes/r_split.py
+++ b/compiler/definitions/ir/nodes/r_split.py
@@ -1,8 +1,14 @@
 import os
 
-from pash_annotations.datatypes.AccessKind import AccessKind, make_stream_input, make_stream_output
+from pash_annotations.datatypes.AccessKind import (
+    AccessKind,
+    make_stream_input,
+    make_stream_output,
+)
 from pash_annotations.datatypes.BasicDatatypes import Operand, Flag
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
 import config
 
@@ -10,40 +16,49 @@
 from definitions.ir.file_id import *
 from shell_ast.ast_util import string_to_argument
 
+
 class RSplit(DFGNode):
-    def __init__(self,
-                 cmd_invocation_with_io_vars,
-                 com_redirs=[],
-                 com_assignments=[],
-                 parallelizer_list=None,
-                 cmd_related_properties=None):
-        # TODO []: default arguments!
-        super().__init__(cmd_invocation_with_io_vars=cmd_invocation_with_io_vars,
-                         com_redirs=com_redirs,
-                         com_assignments=com_assignments,
-                         parallelizer_list=parallelizer_list,
-                         cmd_related_properties=cmd_related_properties)
+    def __init__(
+        self,
+        cmd_invocation_with_io_vars,
+        com_redirs=None,
+        com_assignments=None,
+        parallelizer_list=None,
+        cmd_related_properties=None,
+    ):
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+        super().__init__(
+            cmd_invocation_with_io_vars=cmd_invocation_with_io_vars,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+            parallelizer_list=parallelizer_list,
+            cmd_related_properties=cmd_related_properties,
+        )
 
     def add_r_flag(self):
         self.cmd_invocation_with_io_vars.flag_option_list.append(Flag("-r"))
 
 
 def make_r_split(input_id, out_ids, r_split_batch_size):
-    r_split_bin = os.path.join(config.PASH_TOP, config.config['runtime']['r_split_binary'])
-    operand_list = [input_id,
-                    Operand(Arg.string_to_arg(str(r_split_batch_size)))]
+    r_split_bin = os.path.join(
+        config.PASH_TOP, config.config["runtime"]["r_split_binary"]
+    )
+    operand_list = [input_id, Operand(Arg.string_to_arg(str(r_split_batch_size)))]
     operand_list.extend(out_ids)
     access_map = {output_id: make_stream_output() for output_id in out_ids}
     access_map[input_id] = make_stream_input()
     cmd_inv_with_io_vars = CommandInvocationWithIOVars(
-                    cmd_name=r_split_bin,
-                    flag_option_list=[],
-                    operand_list=operand_list,
-                    implicit_use_of_streaming_input=None,
-                    implicit_use_of_streaming_output=None,
-                    access_map=access_map)
+        cmd_name=r_split_bin,
+        flag_option_list=[],
+        operand_list=operand_list,
+        implicit_use_of_streaming_input=None,
+        implicit_use_of_streaming_output=None,
+        access_map=access_map,
+    )
     return RSplit(cmd_inv_with_io_vars)
 
+
 def make_r_split_with_unwrap_flag(input_id, out_ids, r_split_batch_size):
     standard_r_split = make_r_split(input_id, out_ids, r_split_batch_size)
     standard_r_split.add_r_flag()
diff --git a/compiler/definitions/ir/nodes/r_unwrap.py b/compiler/definitions/ir/nodes/r_unwrap.py
index 931507220..ce8a9d4aa 100644
--- a/compiler/definitions/ir/nodes/r_unwrap.py
+++ b/compiler/definitions/ir/nodes/r_unwrap.py
@@ -1,32 +1,44 @@
 from pash_annotations.datatypes.AccessKind import make_stream_input, make_stream_output
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
 from definitions.ir.dfg_node import *
 
+
 class RUnwrap(DFGNode):
-    def __init__(self,
-                 cmd_invocation_with_io_vars,
-                 com_redirs=[],
-                 com_assignments=[],
-                 parallelizer_list=None,
-                 cmd_related_properties=None):
-        # TODO []: default
-        super().__init__(cmd_invocation_with_io_vars,
-                         com_redirs=com_redirs,
-                         com_assignments=com_assignments,
-                         parallelizer_list=parallelizer_list,
-                         cmd_related_properties=cmd_related_properties)
+    def __init__(
+        self,
+        cmd_invocation_with_io_vars,
+        com_redirs=None,
+        com_assignments=None,
+        parallelizer_list=None,
+        cmd_related_properties=None,
+    ):
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+        super().__init__(
+            cmd_invocation_with_io_vars,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+            parallelizer_list=parallelizer_list,
+            cmd_related_properties=cmd_related_properties,
+        )
+
 
 def make_unwrap_node(inputs, output):
-    assert(len(inputs) == 1)
+    assert len(inputs) == 1
     input_id = inputs[0]
     access_map = {input_id: make_stream_input(), output: make_stream_output()}
-    r_unwrap_bin = os.path.join(config.PASH_TOP, config.config['runtime']['r_unwrap_binary'])
+    r_unwrap_bin = os.path.join(
+        config.PASH_TOP, config.config["runtime"]["r_unwrap_binary"]
+    )
     cmd_inv_with_io_vars = CommandInvocationWithIOVars(
         cmd_name=r_unwrap_bin,
         flag_option_list=[],
         operand_list=[],
         implicit_use_of_streaming_input=input_id,
         implicit_use_of_streaming_output=output,
-        access_map=access_map)
+        access_map=access_map,
+    )
     return RUnwrap(cmd_inv_with_io_vars)
diff --git a/compiler/definitions/ir/nodes/r_wrap.py b/compiler/definitions/ir/nodes/r_wrap.py
index 2a5f79ee9..52993a8b5 100644
--- a/compiler/definitions/ir/nodes/r_wrap.py
+++ b/compiler/definitions/ir/nodes/r_wrap.py
@@ -1,26 +1,36 @@
 from pash_annotations.datatypes.AccessKind import make_stream_output, make_stream_input
 from pash_annotations.datatypes.BasicDatatypes import ArgStringType
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
-from annotations_utils.util_cmd_invocations import to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping
+from annotations_utils.util_cmd_invocations import (
+    to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping,
+)
 from definitions.ir.dfg_node import *
 from shell_ast.ast_util import *
 
+
 class RWrap(DFGNode):
-    def __init__(self,
-                 cmd_invocation_with_io_vars,
-                 com_redirs=[],
-                 com_assignments=[],
-                 parallelizer_list=None,
-                 cmd_related_properties=None,
-                 wrapped_node_name=None):
-        # TODO []: default
+    def __init__(
+        self,
+        cmd_invocation_with_io_vars,
+        com_redirs=None,
+        com_assignments=None,
+        parallelizer_list=None,
+        cmd_related_properties=None,
+        wrapped_node_name=None,
+    ):
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
         self.wrapped_node_name = wrapped_node_name
-        super().__init__(cmd_invocation_with_io_vars,
-                         com_redirs=com_redirs,
-                         com_assignments=com_assignments,
-                         parallelizer_list=parallelizer_list,
-                         cmd_related_properties=cmd_related_properties)
+        super().__init__(
+            cmd_invocation_with_io_vars,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+            parallelizer_list=parallelizer_list,
+            cmd_related_properties=cmd_related_properties,
+        )
 
     ## Get the label of the node. By default, it is simply the name
     def get_dot_label(self) -> str:
@@ -29,31 +39,36 @@ def get_dot_label(self) -> str:
         basename = os.path.basename(str(name))
 
         wrapped_node_name = self.wrapped_node_name
-        return f'{basename}({wrapped_node_name})'
+        return f"{basename}({wrapped_node_name})"
+
 
 def wrap_node(node: DFGNode, edges):
-    r_wrap_bin = os.path.join(config.PASH_TOP, config.config['runtime']['r_wrap_binary'])
+    r_wrap_bin = os.path.join(
+        config.PASH_TOP, config.config["runtime"]["r_wrap_binary"]
+    )
 
     ## At the moment we can only wrap a node that takes its input from stdin
     ## and outputs to stdout. Therefore the node needs to have only one input and one output.
     ## TO CHECK: with the remodelling also other cases should be handled
     inputs = node.get_input_list()
-    assert(len(inputs) == 1)
+    assert len(inputs) == 1
     input_id = inputs[0]
     outputs = node.get_output_list()
     ## TODO: Would it make sense for outputs to be less than one?
     ## TODO: changed this from <= to == 1 to simplify reasoning later for now
-    assert(len(outputs) == 1)
+    assert len(outputs) == 1
     output_id = outputs[0]
     access_map = {input_id: make_stream_input(), output_id: make_stream_output()}
 
-    #create bash -c argument
+    # create bash -c argument
     cmd_inv_with_io_vars: CommandInvocationWithIOVars = node.cmd_invocation_with_io_vars
     # do we need to copy here? currently, it seems fine
     cmd_inv_with_io_vars.remove_streaming_inputs()
     cmd_inv_with_io_vars.remove_streaming_outputs()
     # any non-streaming inputs or outputs are converted here already!
-    cmd = to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping(cmd_inv_with_io_vars, edges)
+    cmd = to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping(
+        cmd_inv_with_io_vars, edges
+    )
 
     bash_command_arg = [Arg.string_to_arg("bash -c")]
     operand_list = bash_command_arg + [cmd]
@@ -64,13 +79,16 @@ def wrap_node(node: DFGNode, edges):
         operand_list=operand_list,
         implicit_use_of_streaming_input=input_id,
         implicit_use_of_streaming_output=output_id,
-        access_map=access_map)
+        access_map=access_map,
+    )
 
     ## TODO: It is not clear if it is safe to just pass redirections and assignments down the line as is
     redirs = node.com_redirs
     assignments = node.com_assignments
 
-    return RWrap(cmd_inv_with_io_vars,
-                 com_redirs=redirs,
-                 com_assignments=assignments,
-                 wrapped_node_name=node.cmd_invocation_with_io_vars.cmd_name)
+    return RWrap(
+        cmd_inv_with_io_vars,
+        com_redirs=redirs,
+        com_assignments=assignments,
+        wrapped_node_name=node.cmd_invocation_with_io_vars.cmd_name,
+    )
diff --git a/compiler/definitions/ir/nodes/remote_pipe.py b/compiler/definitions/ir/nodes/remote_pipe.py
index 7e35faf32..9b335324f 100644
--- a/compiler/definitions/ir/nodes/remote_pipe.py
+++ b/compiler/definitions/ir/nodes/remote_pipe.py
@@ -1,12 +1,31 @@
 from definitions.ir.dfg_node import *
 
+
 class RemotePipe(DFGNode):
-    def __init__(self, inputs, outputs, com_name, com_category,
-                 com_options = [], com_redirs = [], com_assignments=[]):
-        super().__init__(inputs, outputs, com_name, com_category,
-                         com_options=com_options, 
-                         com_redirs=com_redirs, 
-                         com_assignments=com_assignments)
+    def __init__(
+        self,
+        inputs,
+        outputs,
+        com_name,
+        com_category,
+        com_options=None,
+        com_redirs=None,
+        com_assignments=None,
+    ):
+        com_options = [] if com_options is None else com_options
+        com_redirs = [] if com_redirs is None else com_redirs
+        com_assignments = [] if com_assignments is None else com_assignments
+
+        super().__init__(
+            inputs,
+            outputs,
+            com_name,
+            com_category,
+            com_options=com_options,
+            com_redirs=com_redirs,
+            com_assignments=com_assignments,
+        )
+
 
 def make_remote_pipe(inputs, outputs, host_ip, port, is_remote_read, id):
     com_category = "pure"
@@ -14,17 +33,17 @@ def make_remote_pipe(inputs, outputs, host_ip, port, is_remote_read, id):
     opt_count = 0
 
     if is_remote_read:
-        remote_pipe_bin = os.path.join(config.PASH_TOP, config.config['runtime']['remote_read_binary'])
+        remote_pipe_bin = os.path.join(
+            config.PASH_TOP, config.config["runtime"]["remote_read_binary"]
+        )
     else:
-        remote_pipe_bin = os.path.join(config.PASH_TOP, config.config['runtime']['remote_write_binary'])
+        remote_pipe_bin = os.path.join(
+            config.PASH_TOP, config.config["runtime"]["remote_write_binary"]
+        )
 
     com_name = Arg.string_to_arg(remote_pipe_bin)
 
     options.append((opt_count, Arg.string_to_arg(f"--addr {host_ip}:{port}")))
     options.append((opt_count + 1, Arg.string_to_arg(f"--id {id}")))
 
-    return RemotePipe(inputs,
-               outputs,
-               com_name, 
-               com_category,
-               com_options=options)
+    return RemotePipe(inputs, outputs, com_name, com_category, com_options=options)
diff --git a/compiler/definitions/ir/redirection.py b/compiler/definitions/ir/redirection.py
index 5a4a745df..bee70d714 100644
--- a/compiler/definitions/ir/redirection.py
+++ b/compiler/definitions/ir/redirection.py
@@ -1,7 +1,8 @@
 from definitions.ir.arg import *
 from shell_ast.ast_util import *
 
-class Redirection():
+
+class Redirection:
     def __init__(self, redirection: RedirectionNode):
         if isinstance(redirection, FileRedirNode):
             self.redir_type = FileRedirNode.NodeName
@@ -16,32 +17,29 @@ def __init__(self, redirection: RedirectionNode):
 
         # log(redirection)
         ## TODO: Support all redirections
-        assert(self.redir_type == 'File')
-        assert(self.redir_subtype in ['To', 'From'])
+        assert self.redir_type == "File"
+        assert self.redir_subtype in ["To", "From"]
 
     def __repr__(self):
-        return '({}, {}, {}, {})'.format(self.redir_type,
-                                         self.redir_subtype,
-                                         self.stream_id,
-                                         self.file_arg)
+        return "({}, {}, {}, {})".format(
+            self.redir_type, self.redir_subtype, self.stream_id, self.file_arg
+        )
 
     def to_ast(self):
-        redir = make_kv(self.redir_type,
-                        [self.redir_subtype,
-                         self.stream_id,
-                         self.file_arg.to_ast()])
+        redir = make_kv(
+            self.redir_type,
+            [self.redir_subtype, self.stream_id, self.file_arg.to_ast()],
+        )
         return redir
 
     def is_to_file(self):
-        return (self.redir_type == 'File'
-                and self.redir_subtype == 'To')
+        return self.redir_type == "File" and self.redir_subtype == "To"
 
     def is_for_stdout(self):
-        return (self.stream_id == 1)
+        return self.stream_id == 1
 
     def is_from_file(self):
-        return (self.redir_type == 'File'
-                and self.redir_subtype == 'From')
+        return self.redir_type == "File" and self.redir_subtype == "From"
 
     def is_for_stdin(self):
-        return (self.stream_id == 0)
+        return self.stream_id == 0
diff --git a/compiler/definitions/ir/resource.py b/compiler/definitions/ir/resource.py
index c6ad69c5e..4b7b9fe85 100644
--- a/compiler/definitions/ir/resource.py
+++ b/compiler/definitions/ir/resource.py
@@ -7,6 +7,7 @@
 ## TODO: Resources should probably be more elaborate than just a
 ## string and a line range. They could be URLs, and possibly other things.
 
+
 ## TODO: Think if we can have any optimizations if we know the size of a resource.
 class Resource:
     def __init__(self, uri):
@@ -27,26 +28,25 @@ def __eq__(self, other):
         if isinstance(other, Resource):
             return self.uri == other.uri
         return False
-    
+
+
 class FileDescriptorResource(Resource):
     def __init__(self, fd):
-        assert(isinstance(fd, tuple)
-               and len(fd) == 2
-               and fd[0] == 'fd')
+        assert isinstance(fd, tuple) and len(fd) == 2 and fd[0] == "fd"
         self.uri = fd
 
     def is_stdin(self):
-        return (self.uri == ('fd', 0))
+        return self.uri == ("fd", 0)
 
     def is_stdout(self):
-        return (self.uri == ('fd', 1))
+        return self.uri == ("fd", 1)
 
 
 class FileResource(Resource):
     ## The uri is the path of the file.
     def __init__(self, path):
         log("class of path", type(path))
-        assert(isinstance(path, Arg))
+        assert isinstance(path, Arg)
         ## TODO: Make sure that paths are normalized
         self.uri = path
 
@@ -55,15 +55,18 @@ def __eq__(self, other):
             return self.uri == other.uri
         return False
 
+
 class TemporaryFileResource(Resource):
     def __init__(self):
         self.uri = None
 
+
 # A FIFO.
 class EphemeralResource(Resource):
     def __init__(self):
         self.uri = None
 
+
 class RemoteFileResource(Resource):
     def __init__(self):
         raise NotImplementedError("RemoteFileResource is an interface")
@@ -84,15 +87,16 @@ def _normalize_addr(self, addr):
         normalized_host = socket.gethostbyaddr(host)[2][0]
         return normalized_host
 
+
 class HDFSFileResource(RemoteFileResource):
     ## The uri is the path of the file.
     def __init__(self, uri, resource_hosts):
         """
         Params:
-            uri: Usually the path to the file. The path doesn't include the top directory 
-            which is different between hosts. The str function adds the prefix $HDFS_DATANODE_DIR/ 
+            uri: Usually the path to the file. The path doesn't include the top directory
+            which is different between hosts. The str function adds the prefix $HDFS_DATANODE_DIR/
             which should be defined on host machine worker environment.
-            resource_hosts: the addresses of all the machines containing 
+            resource_hosts: the addresses of all the machines containing
             the resource.
         """
         self.uri = uri
@@ -107,11 +111,12 @@ def is_available_on(self, host):
         return host in self.hosts
 
     def __repr__(self):
-        return f'hdfs://{self.uri}'
+        return f"hdfs://{self.uri}"
 
     def __str__(self):
         return f"$HDFS_DATANODE_DIR/{self.uri}"
 
+
 # DFS logical split resource
 class DFSSplitResource(RemoteFileResource):
     def __init__(self, config, config_path, split_num, hosts):
@@ -125,6 +130,6 @@ def is_available_on(self, host):
 
     def set_config_path(self, config_path):
         self.config_path = config_path
-        
+
     def __str__(self):
         return self.config_path
diff --git a/compiler/dspash/hdfs_file_data.py b/compiler/dspash/hdfs_file_data.py
index cffb45677..5b8933d4b 100644
--- a/compiler/dspash/hdfs_file_data.py
+++ b/compiler/dspash/hdfs_file_data.py
@@ -38,13 +38,14 @@ def paths(self):
             )
         return filepaths
 
+
 class HDFSFileConfig:
     def __init__(self, filedata: FileData):
-        self.blocks : List[HDFSBlock] = []
+        self.blocks: List[HDFSBlock] = []
         for i, block_path in enumerate(filedata.paths()):
             hosts = list(map(lambda addr: addr.rsplit(":", 1)[0], filedata.machines[i]))
             self.blocks.append(HDFSBlock(block_path, hosts))
-    
+
     def _serialize(self):
         data = {"blocks": []}
         for path, hosts in self.blocks:
@@ -57,7 +58,7 @@ def dumps(self):
 
     def dump(self, filepath):
         data = self._serialize()
-        with open(filepath, 'w') as f:
+        with open(filepath, "w") as f:
             json.dump(data, f)
 
     def __eq__(self, __o: object) -> bool:
@@ -65,10 +66,13 @@ def __eq__(self, __o: object) -> bool:
             return False
         return self.blocks == __o.blocks
 
+
 def get_hdfs_file_data(filename):
     info = FileData(filename)
     log = subprocess.check_output(
-        "hdfs fsck {0} -files -blocks -locations".format(filename), shell=True, stderr=subprocess.PIPE
+        "hdfs fsck {0} -files -blocks -locations".format(filename),
+        shell=True,
+        stderr=subprocess.PIPE,
     )
     count = 0
     for line in log.splitlines():
@@ -95,6 +99,7 @@ def get_hdfs_file_data(filename):
     assert info.size > 0
     return info
 
+
 def _getIPs(raw):
     rawparts = raw.split(" ")
     ips = []
@@ -103,6 +108,7 @@ def _getIPs(raw):
         ips.append(part[index + len("DatanodeInfoWithStorage") + 1 : part.find(",")])
     return ips
 
+
 if __name__ == "__main__":
     assert len(sys.argv) == 2
     filename = sys.argv[1]
diff --git a/compiler/dspash/hdfs_utils.py b/compiler/dspash/hdfs_utils.py
index c86109702..94fccd60f 100644
--- a/compiler/dspash/hdfs_utils.py
+++ b/compiler/dspash/hdfs_utils.py
@@ -1,24 +1,30 @@
 from dspash.hdfs_file_data import get_hdfs_file_data, FileData, HDFSFileConfig
 from typing import List, Tuple
 
-def get_cmd_output(cmd:str):
-    ret = subprocess.check_output(cmd, shell=True, universal_newlines=True, stderr=subprocess.PIPE)
+
+def get_cmd_output(cmd: str):
+    ret = subprocess.check_output(
+        cmd, shell=True, universal_newlines=True, stderr=subprocess.PIPE
+    )
     return ret.strip()
 
-def _remove_prefix(s:str, prefix:str) -> str:
+
+def _remove_prefix(s: str, prefix: str) -> str:
     if s.startswith(prefix):
-        return s[len(prefix):]
+        return s[len(prefix) :]
     return s
 
+
 def get_datanode_dir() -> str:
     data_dir = get_cmd_output("hdfs getconf -confKey dfs.datanode.data.dir")
     data_dir = _remove_prefix(data_dir, "file://")
     return data_dir
 
+
 def get_file_data(filename: str) -> FileData:
     return get_hdfs_file_data(filename)
 
+
 def get_file_config(filename: str) -> HDFSFileConfig:
     filedata = get_file_data(filename)
     return HDFSFileConfig(filedata)
-
diff --git a/compiler/dspash/ir_helper.py b/compiler/dspash/ir_helper.py
index 7ce37d80e..f73b63600 100644
--- a/compiler/dspash/ir_helper.py
+++ b/compiler/dspash/ir_helper.py
@@ -6,6 +6,7 @@
 from datetime import datetime
 from typing import List, Set, Tuple, Dict, Callable
 from uuid import uuid4
+
 sys.path.append("/pash/compiler")
 
 import config
@@ -40,10 +41,11 @@ def read_graph(filename):
         ir, shell_vars = pickle.load(ir_file)
     return ir, shell_vars
 
-def save_configs(graph:IR, dfs_configs_paths: Dict[HDFSFileConfig, str]):
+
+def save_configs(graph: IR, dfs_configs_paths: Dict[HDFSFileConfig, str]):
     for edge in graph.all_fids():
         if isinstance(edge.get_resource(), DFSSplitResource):
-            resource : DFSSplitResource = edge.get_resource()
+            resource: DFSSplitResource = edge.get_resource()
             config: HDFSFileConfig = resource.config
             if config not in dfs_configs_paths:
                 config_path = ptempfile()
@@ -55,14 +57,15 @@ def save_configs(graph:IR, dfs_configs_paths: Dict[HDFSFileConfig, str]):
 
             resource.set_config_path(config_path)
 
+
 def to_shell_file(graph: IR, args) -> str:
     filename = ptempfile()
-    
+
     dirs = set()
     for edge in graph.all_fids():
         directory = os.path.join(config.PASH_TMP_PREFIX, edge.prefix)
         dirs.add(directory)
-        
+
     for directory in dirs:
         os.makedirs(directory, exist_ok=True)
 
@@ -74,6 +77,7 @@ def to_shell_file(graph: IR, args) -> str:
         f.write(script)
     return filename
 
+
 def split_ir(graph: IR) -> Tuple[List[IR], Dict[int, IR]]:
     """ Takes an optimized IR and splits it subgraphs. Every subgraph
     is a continues section between a splitter and a merger.
@@ -99,7 +103,7 @@ def split_ir(graph: IR) -> Tuple[List[IR], Dict[int, IR]]:
     """
     source_node_ids = graph.source_nodes()
     input_fifo_map = defaultdict(list)
-    
+
     subgraphs = []
     queue = deque([(source, IR({}, {})) for source in source_node_ids])
 
@@ -112,13 +116,13 @@ def split_ir(graph: IR) -> Tuple[List[IR], Dict[int, IR]]:
         input_fids = graph.get_node_input_fids(old_node_id)
         output_fids = graph.get_node_output_fids(old_node_id)
 
-        if(any(map(lambda fid:fid not in visited_edges, input_fids))):
+        if any(map(lambda fid: fid not in visited_edges, input_fids)):
             if subgraph.source_nodes():
                 subgraphs.append(subgraph)
             continue
-        
+
         # Second condition makes sure we don't add empty graphs
-        if len(input_fids) > 1 and subgraph.source_nodes(): # merger node
+        if len(input_fids) > 1 and subgraph.source_nodes():  # merger node
             if subgraph not in subgraphs:
                 subgraphs.append(subgraph)
             subgraph = IR({}, {})
@@ -127,7 +131,7 @@ def split_ir(graph: IR) -> Tuple[List[IR], Dict[int, IR]]:
             continue
         else:
             visited_nodes.add(old_node_id)
-        
+
         node = graph.get_node(old_node_id).copy()
         node_id = node.get_id()
 
@@ -141,7 +145,7 @@ def split_ir(graph: IR) -> Tuple[List[IR], Dict[int, IR]]:
             else:
                 input_edge_id = input_fid.get_ident()
                 subgraph.set_edge_to(input_edge_id, node_id)
-            # keep track  
+            # keep track
             input_fifo_map[input_edge_id].append(subgraph)
 
         # Add edges coming out of the node
@@ -152,7 +156,7 @@ def split_ir(graph: IR) -> Tuple[List[IR], Dict[int, IR]]:
         # Add edges coming into the node
         for input_fid in input_fids:
             if input_fid.get_ident() not in subgraph.edges:
-                subgraph.add_to_edge(input_fid, node_id) 
+                subgraph.add_to_edge(input_fid, node_id)
 
         # Add the node
         subgraph.add_node(node)
@@ -164,21 +168,28 @@ def split_ir(graph: IR) -> Tuple[List[IR], Dict[int, IR]]:
             subgraphs.append(subgraph)
             for next_id in next_ids:
                 queue.append((next_id, IR({}, {})))
-        
+
     # print(list(map(lambda k : k.all_fids(), graphs)))
     return subgraphs, input_fifo_map
 
-def add_stdout_fid(graph : IR, file_id_gen: FileIdGen) -> FileId:
+
+def add_stdout_fid(graph: IR, file_id_gen: FileIdGen) -> FileId:
     stdout = file_id_gen.next_file_id()
-    stdout.set_resource(FileDescriptorResource(('fd', 1)))
+    stdout.set_resource(FileDescriptorResource(("fd", 1)))
     graph.add_edge(stdout)
     return stdout
 
-def assign_workers_to_subgraphs(subgraphs:List[IR], file_id_gen: FileIdGen, input_fifo_map:Dict[int, IR], get_worker: Callable) -> (IR, Tuple):
-    """ Takes a list of subgraphs and assigns a worker to each subgraph and augment
-    the subgraphs with the necessary remote read/write nodes for data movement 
-    between workers. This function also produces graph that should run in 
-    the original shell in which pash was executed. This graph contains 
+
+def assign_workers_to_subgraphs(
+    subgraphs: List[IR],
+    file_id_gen: FileIdGen,
+    input_fifo_map: Dict[int, IR],
+    get_worker: Callable,
+) -> (IR, Tuple):
+    """Takes a list of subgraphs and assigns a worker to each subgraph and augment
+    the subgraphs with the necessary remote read/write nodes for data movement
+    between workers. This function also produces graph that should run in
+    the original shell in which pash was executed. This graph contains
     remote read/write nodes for stdin/stdout, named pipes, and files.
 
     Args:
@@ -197,13 +208,15 @@ def assign_workers_to_subgraphs(subgraphs:List[IR], file_id_gen: FileIdGen, inpu
 
     # Replace output edges and corrosponding input edges with remote read/write
     for subgraph in subgraphs:
-        subgraph_critical_fids = list(filter(lambda fid: fid.has_remote_file_resource(), subgraph.all_fids()))
+        subgraph_critical_fids = list(
+            filter(lambda fid: fid.has_remote_file_resource(), subgraph.all_fids())
+        )
         worker = get_worker(subgraph_critical_fids)
         worker._running_processes += 1
         worker_subgraph_pairs.append((worker, subgraph))
         sink_nodes = subgraph.sink_nodes()
-        assert(len(sink_nodes) == 1)
-        
+        assert len(sink_nodes) == 1
+
         for out_edge in subgraph.get_node_output_fids(sink_nodes[0]):
             stdout = add_stdout_fid(subgraph, file_id_gen)
             out_edge_id = out_edge.get_ident()
@@ -213,9 +226,16 @@ def assign_workers_to_subgraphs(subgraphs:List[IR], file_id_gen: FileIdGen, inpu
             subgraph.replace_edge(out_edge_id, ephemeral_edge)
             edge_uid = uuid4()
             # Add remote-write node at the end of the subgraph
-            remote_write = remote_pipe.make_remote_pipe([ephemeral_edge.get_ident()], [stdout.get_ident()], worker.host(), DISCOVERY_PORT, False, edge_uid)
+            remote_write = remote_pipe.make_remote_pipe(
+                [ephemeral_edge.get_ident()],
+                [stdout.get_ident()],
+                worker.host(),
+                DISCOVERY_PORT,
+                False,
+                edge_uid,
+            )
             subgraph.add_node(remote_write)
-            
+
             # Copy the old output edge resource
             new_edge = file_id_gen.next_file_id()
             new_edge.set_resource(out_edge.get_resource())
@@ -227,8 +247,15 @@ def assign_workers_to_subgraphs(subgraphs:List[IR], file_id_gen: FileIdGen, inpu
             else:
                 matching_subgraph = main_graph
                 matching_subgraph.add_edge(new_edge)
-    
-            remote_read = remote_pipe.make_remote_pipe([], [new_edge.get_ident()], worker.host(), DISCOVERY_PORT, True, edge_uid)
+
+            remote_read = remote_pipe.make_remote_pipe(
+                [],
+                [new_edge.get_ident()],
+                worker.host(),
+                DISCOVERY_PORT,
+                True,
+                edge_uid,
+            )
             matching_subgraph.add_node(remote_read)
 
     # Replace non ephemeral input edges with remote read/write
@@ -236,7 +263,10 @@ def assign_workers_to_subgraphs(subgraphs:List[IR], file_id_gen: FileIdGen, inpu
         source_nodes = subgraph.source_nodes()
         for source in source_nodes:
             for in_edge in subgraph.get_node_input_fids(source):
-                if in_edge.has_file_resource() or in_edge.has_file_descriptor_resource():
+                if (
+                    in_edge.has_file_resource()
+                    or in_edge.has_file_descriptor_resource()
+                ):
                     # setup
                     stdout = add_stdout_fid(main_graph, file_id_gen)
 
@@ -247,14 +277,28 @@ def assign_workers_to_subgraphs(subgraphs:List[IR], file_id_gen: FileIdGen, inpu
 
                     # Add remote write to main subgraph
                     edge_uid = uuid4()
-                    remote_write = remote_pipe.make_remote_pipe([new_edge.get_ident()], [stdout.get_ident()], HOST, DISCOVERY_PORT, False, edge_uid)
+                    remote_write = remote_pipe.make_remote_pipe(
+                        [new_edge.get_ident()],
+                        [stdout.get_ident()],
+                        HOST,
+                        DISCOVERY_PORT,
+                        False,
+                        edge_uid,
+                    )
                     main_graph.add_node(remote_write)
 
                     # Add remote read to current subgraph
                     ephemeral_edge = file_id_gen.next_ephemeral_file_id()
                     subgraph.replace_edge(in_edge.get_ident(), ephemeral_edge)
 
-                    remote_read = remote_pipe.make_remote_pipe([], [ephemeral_edge.get_ident()], HOST, DISCOVERY_PORT, True, edge_uid)
+                    remote_read = remote_pipe.make_remote_pipe(
+                        [],
+                        [ephemeral_edge.get_ident()],
+                        HOST,
+                        DISCOVERY_PORT,
+                        True,
+                        edge_uid,
+                    )
                     subgraph.add_node(remote_read)
                 else:
                     # sometimes a command can have both a file resource and an ephemeral resources (example: spell oneliner)
@@ -262,18 +306,19 @@ def assign_workers_to_subgraphs(subgraphs:List[IR], file_id_gen: FileIdGen, inpu
 
     return main_graph, worker_subgraph_pairs
 
-def prepare_graph_for_remote_exec(filename:str, get_worker:Callable):
+
+def prepare_graph_for_remote_exec(filename: str, get_worker: Callable):
     """
     Reads the complete ir from filename and splits it
     into subgraphs where ony the first subgraph represent a continues
-    segment (merger segment or branched segment) in the graph. 
+    segment (merger segment or branched segment) in the graph.
     Note: All subgraphs(except first one) read and write from remote pipes.
         However, we had to add a fake stdout to avoid some problems when converting to shell code.
 
-    Returns: 
+    Returns:
         worker_graph_pairs: List of (worker, subgraph)
         shell_vars: shell variables
-        main_graph: The ir we need to execute on the main shell. 
+        main_graph: The ir we need to execute on the main shell.
             This graph contains edges to correctly redirect the following to remote workers
             - special pipes (stdin/stdout)
             - named pipes reading and writing
@@ -282,5 +327,7 @@ def prepare_graph_for_remote_exec(filename:str, get_worker:Callable):
     ir, shell_vars = read_graph(filename)
     file_id_gen = ir.get_file_id_gen()
     subgraphs, mapping = split_ir(ir)
-    main_graph, worker_graph_pairs = assign_workers_to_subgraphs(subgraphs, file_id_gen, mapping, get_worker)
+    main_graph, worker_graph_pairs = assign_workers_to_subgraphs(
+        subgraphs, file_id_gen, mapping, get_worker
+    )
     return worker_graph_pairs, shell_vars, main_graph
diff --git a/compiler/dspash/socket_utils.py b/compiler/dspash/socket_utils.py
index d3c736f3c..0598626fe 100644
--- a/compiler/dspash/socket_utils.py
+++ b/compiler/dspash/socket_utils.py
@@ -6,20 +6,23 @@
 import pickle
 import struct
 
+
 def send_msg(sock, msg):
     # Prefix each message with a 4-byte length (network byte order)
-    msg = struct.pack('>I', len(msg)) + msg
+    msg = struct.pack(">I", len(msg)) + msg
     sock.sendall(msg)
 
+
 def recv_msg(sock):
     # Read message length and unpack it into an integer
     raw_msglen = recvall(sock, 4)
     if not raw_msglen:
         return None
-    msglen = struct.unpack('>I', raw_msglen)[0]
+    msglen = struct.unpack(">I", raw_msglen)[0]
     # Read the message data
     return recvall(sock, msglen)
 
+
 def recvall(sock, n):
     # Helper function to recv n bytes or return None if EOF is hit
     data = bytearray()
@@ -30,12 +33,15 @@ def recvall(sock, n):
         data.extend(packet)
     return data
 
+
 def encode_request(obj: dict):
     return pickle.dumps(obj)
 
+
 def decode_request(b: bytes):
     return pickle.loads(b)
 
+
 ## TODO: SocketManager might need to handle errors more gracefully
 class SocketManager:
     def __init__(self, server_address):
@@ -56,32 +62,31 @@ def __init__(self, server_address):
         # log("SocketManager: Created socket")
 
         self.sock.bind(server_address)
-        # log("SocketManager: Successfully bound to socket")    
+        # log("SocketManager: Successfully bound to socket")
 
         ## TODO: Check if we need to configure the back# log
-        self.sock.listen()    
-        # log("SocketManager: Listenting on socket")    
-    
+        self.sock.listen()
+        # log("SocketManager: Listenting on socket")
 
     def get_next_cmd(self):
         connection, client_address = self.sock.accept()
         data = connection.recv(self.buf_size)
 
         ## TODO: This could be avoided for efficiency
-        str_data = data.decode('utf-8')
+        str_data = data.decode("utf-8")
         # log("Received data:", str_data)
         ## TODO: Lift this requirement if needed
         ##
         ## We need to ensure that we read a command at once or the command was empty (only relevant in the first invocation)
-        assert(str_data.endswith("\n") or str_data == "")
-        
+        assert str_data.endswith("\n") or str_data == ""
+
         return str_data, connection
 
     ## This method respond to the connection we last got input from
     ## In the case of the UnixPipes, we don't have any state management here
     ##   since all reads/writes go to/from the same fifos
     def respond(self, message, connection):
-        bytes_message = message.encode('utf-8')
+        bytes_message = message.encode("utf-8")
         connection.sendall(bytes_message)
         connection.close()
 
diff --git a/compiler/dspash/utils.py b/compiler/dspash/utils.py
index 6402c94dd..a503e698b 100644
--- a/compiler/dspash/utils.py
+++ b/compiler/dspash/utils.py
@@ -3,16 +3,19 @@
 import tempfile
 import uuid
 
+
 def read_file(file, mode="r"):
     with open(file, mode) as f:
         data = f.read()
     return data
 
+
 def write_file(file, data, mode="w"):
     with open(file, mode) as f:
         n = f.write(data)
     return n
 
+
 def create_filename(dir, prefix="", temp=False):
     if temp:
         return tempfile.mkstemp(dir=dir, prefix=prefix)
diff --git a/compiler/dspash/worker.py b/compiler/dspash/worker.py
index 4b60ef766..1df79b5f2 100644
--- a/compiler/dspash/worker.py
+++ b/compiler/dspash/worker.py
@@ -11,7 +11,7 @@
 import uuid
 import argparse
 
-PASH_TOP = os.environ['PASH_TOP']
+PASH_TOP = os.environ["PASH_TOP"]
 sys.path.append(os.path.join(PASH_TOP, "compiler"))
 
 import config
@@ -23,42 +23,43 @@
 
 # from ... import config
 HOST = socket.gethostbyname(socket.gethostname())
-PORT = 65432        # Port to listen on (non-privileged ports are > 1023)
+PORT = 65432  # Port to listen on (non-privileged ports are > 1023)
 
 
 def err_print(*args):
     print(*args, file=sys.stderr)
 
-def send_success(conn, body, msg = ""):
-    request = {
-        'status': 'OK',
-        'body': body,
-        'msg': msg
-    }
+
+def send_success(conn, body, msg=""):
+    request = {"status": "OK", "body": body, "msg": msg}
     send_msg(conn, encode_request(request))
 
+
 def parse_exec_request(request):
-    return request['cmd']
+    return request["cmd"]
+
 
 def parse_exec_graph(request):
-    return request['graph'], request['shell_variables'], request['functions']
+    return request["graph"], request["shell_variables"], request["functions"]
+
 
 def exec_graph(graph, shell_vars, functions):
-    config.config['shell_variables'] = shell_vars
+    config.config["shell_variables"] = shell_vars
     script_path = to_shell_file(graph, config.pash_args)
 
     e = os.environ.copy()
-    e['PASH_TOP'] = PASH_TOP
+    e["PASH_TOP"] = PASH_TOP
 
     # store functions
-    functions_file = create_filename(dir=config.PASH_TMP_PREFIX, prefix='pashFuncs')
+    functions_file = create_filename(dir=config.PASH_TMP_PREFIX, prefix="pashFuncs")
     write_file(functions_file, functions)
     cmd = f"source {functions_file}; source {script_path}"
     rc = subprocess.Popen(cmd, env=e, executable="/bin/bash", shell=True)
     return rc
 
+
 class Worker:
-    def __init__(self, port = None):
+    def __init__(self, port=None):
         self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         if port == None:
             # pick a random port
@@ -71,19 +72,20 @@ def run(self):
         connections = []
         with self.s:
             self.s.listen()
-            while(True):
+            while True:
                 conn, addr = self.s.accept()
-                print(f"got new connection")     
+                print(f"got new connection")
                 t = Thread(target=manage_connection, args=[conn, addr])
                 t.start()
                 connections.append(t)
         for t in connections:
             t.join()
 
+
 def manage_connection(conn, addr):
     rcs = []
     with conn:
-        print('Connected by', addr)
+        print("Connected by", addr)
         dfs_configs_paths = {}
         while True:
             data = recv_msg(conn)
@@ -92,7 +94,7 @@ def manage_connection(conn, addr):
 
             print("got new request")
             request = decode_request(data)
-            if request['type'] == 'Exec-Graph':
+            if request["type"] == "Exec-Graph":
                 graph, shell_vars, functions = parse_exec_graph(request)
                 save_configs(graph, dfs_configs_paths)
                 exec_graph(graph, shell_vars, functions)
@@ -104,12 +106,10 @@ def manage_connection(conn, addr):
     for rc in rcs:
         rc.wait()
 
+
 def parse_args():
-    parser = argparse.ArgumentParser(description='Process some integers.')
-    parser.add_argument("--port",
-                        type=int,
-                        help="port to use",
-                        default=65432)
+    parser = argparse.ArgumentParser(description="Process some integers.")
+    parser.add_argument("--port", type=int, help="port to use", default=65432)
     config.add_common_arguments(parser)
     args = parser.parse_args()
     config.set_config_globals_from_pash_args(args)
@@ -119,19 +119,22 @@ def parse_args():
         config.load_config(args.config_path)
     return args
 
+
 def init():
     args = parse_args()
     config.LOGGING_PREFIX = f"Worker {config.pash_args.port}: "
     ## KK: 2023-02-21 Commenting this out, we need to figure out if the new annotations work with the distribution package
     # config.annotations = load_annotation_files(
     #     config.config['distr_planner']['annotations_dir'])
-    pash_compiler.runtime_config = config.config['distr_planner']
+    pash_compiler.runtime_config = config.config["distr_planner"]
     pash_compiler.termination = ""
 
+
 def main():
     init()
     worker = Worker(config.pash_args.port)
     worker.run()
 
+
 if __name__ == "__main__":
     main()
diff --git a/compiler/dspash/worker_manager.py b/compiler/dspash/worker_manager.py
index 3bcfa1c50..e6e7d3db4 100644
--- a/compiler/dspash/worker_manager.py
+++ b/compiler/dspash/worker_manager.py
@@ -5,18 +5,27 @@
 import pickle
 import json
 
-from dspash.socket_utils import SocketManager, encode_request, decode_request, send_msg, recv_msg
+from dspash.socket_utils import (
+    SocketManager,
+    encode_request,
+    decode_request,
+    send_msg,
+    recv_msg,
+)
 from util import log
 from dspash.ir_helper import prepare_graph_for_remote_exec, to_shell_file
 from dspash.utils import read_file
-import config 
+import config
 import copy
 
-PORT = 65425        # Port to listen on (non-privileged ports are > 1023)
+PORT = 65425  # Port to listen on (non-privileged ports are > 1023)
+
 
 class WorkerConnection:
     def __init__(self, host, port):
-        self._host = socket.gethostbyaddr(host)[2][0] # get ip address in case host needs resolving
+        self._host = socket.gethostbyaddr(host)[2][
+            0
+        ]  # get ip address in case host needs resolving
         self._port = port
         self._running_processes = 0
         self._online = True
@@ -26,7 +35,7 @@ def __init__(self, host, port):
             self._socket.connect((self._host, self._port))
         except Exception as e:
             self._online = False
-        
+
     def is_online(self):
         # TODO: create a ping to confirm is online
         return self._online
@@ -42,17 +51,18 @@ def get_running_processes(self):
         return self._running_processes
 
     def send_graph_exec_request(self, graph, shell_vars, functions) -> bool:
-        request_dict = { 'type': 'Exec-Graph',
-                        'graph': graph,
-                        'functions': functions,
-                        'shell_variables': None # Doesn't seem needed for now     
-                    }
+        request_dict = {
+            "type": "Exec-Graph",
+            "graph": graph,
+            "functions": functions,
+            "shell_variables": None,  # Doesn't seem needed for now
+        }
         request = encode_request(request_dict)
-        #TODO: do I need to open and close connection?
+        # TODO: do I need to open and close connection?
         send_msg(self._socket, request)
         # TODO wait until the command exec finishes and run this in parallel?
         response_data = recv_msg(self._socket)
-        if not response_data or decode_request(response_data)['status'] != "OK":
+        if not response_data or decode_request(response_data)["status"] != "OK":
             raise Exception(f"didn't recieved ack on request {response_data}")
         else:
             # self._running_processes += 1 #TODO: decrease in case of failure or process ended
@@ -77,15 +87,16 @@ def __str__(self):
     def host(self):
         return self._host
 
-class WorkersManager():
-    def __init__(self, workers: WorkerConnection = []):
-        self.workers = workers
+
+class WorkersManager:
+    def __init__(self, workers: WorkerConnection = None):
+        self.workers = [] if workers is None else workers
         self.host = socket.gethostbyname(socket.gethostname())
         self.args = copy.copy(config.pash_args)
         # Required to create a correct multi sink graph
-        self.args.termination = "" 
+        self.args.termination = ""
 
-    def get_worker(self, fids = None) -> WorkerConnection:
+    def get_worker(self, fids=None) -> WorkerConnection:
         if not fids:
             fids = []
 
@@ -93,12 +104,15 @@ def get_worker(self, fids = None) -> WorkerConnection:
         for worker in self.workers:
             if not worker.is_online():
                 continue
-            
+
             # Skip if any provided fid isn't available on the worker machine
             if any(map(lambda fid: not fid.is_available_on(worker.host()), fids)):
                 continue
 
-            if best_worker is None or best_worker.get_running_processes() > worker.get_running_processes():
+            if (
+                best_worker is None
+                or best_worker.get_running_processes() > worker.get_running_processes()
+            ):
                 best_worker = worker
 
         if best_worker == None:
@@ -110,31 +124,36 @@ def add_worker(self, host, port):
         self.workers.append(WorkerConnection(host, port))
 
     def add_workers_from_cluster_config(self, config_path):
-        with open(config_path, 'r') as f:
+        with open(config_path, "r") as f:
             cluster_config = json.load(f)
 
         workers = cluster_config["workers"].values()
         for worker in workers:
-            host = worker['host']
-            port = worker['port']
+            host = worker["host"]
+            port = worker["port"]
             self.add_worker(host, port)
-            
-            
+
     def run(self):
         workers_manager = self
-        workers_manager.add_workers_from_cluster_config(os.path.join(config.PASH_TOP, 'cluster.json'))
+        workers_manager.add_workers_from_cluster_config(
+            os.path.join(config.PASH_TOP, "cluster.json")
+        )
 
-        dspash_socket = SocketManager(os.getenv('DSPASH_SOCKET'))
+        dspash_socket = SocketManager(os.getenv("DSPASH_SOCKET"))
         while True:
             request, conn = dspash_socket.get_next_cmd()
             if request.startswith("Done"):
                 dspash_socket.close()
                 break
             elif request.startswith("Exec-Graph"):
-                args = request.split(':', 1)[1].strip()
+                args = request.split(":", 1)[1].strip()
                 filename, declared_functions_file = args.split()
 
-                worker_subgraph_pairs, shell_vars, main_graph = prepare_graph_for_remote_exec(filename, self.get_worker)
+                (
+                    worker_subgraph_pairs,
+                    shell_vars,
+                    main_graph,
+                ) = prepare_graph_for_remote_exec(filename, self.get_worker)
                 script_fname = to_shell_file(main_graph, self.args)
                 log("Master node graph stored in ", script_fname)
 
@@ -148,9 +167,12 @@ def run(self):
 
                 # Execute subgraphs on workers
                 for worker, subgraph in worker_subgraph_pairs:
-                    worker.send_graph_exec_request(subgraph, shell_vars, declared_functions)
+                    worker.send_graph_exec_request(
+                        subgraph, shell_vars, declared_functions
+                    )
             else:
                 raise Exception(f"Unknown request: {request}")
-        
+
+
 if __name__ == "__main__":
     WorkersManager().run()
diff --git a/compiler/env_var_names.py b/compiler/env_var_names.py
index 81c45b289..5fe7ac597 100644
--- a/compiler/env_var_names.py
+++ b/compiler/env_var_names.py
@@ -1,10 +1,11 @@
-
 ##
 ## Variable names used in the pash runtime
 ##
 
+
 def loop_iters_var() -> str:
-    return 'pash_loop_iters'
+    return "pash_loop_iters"
+
 
 def loop_iter_var(loop_id: int) -> str:
-    return f'pash_loop_{loop_id}_iter'
\ No newline at end of file
+    return f"pash_loop_{loop_id}_iter"
diff --git a/compiler/env_vars_util.py b/compiler/env_vars_util.py
deleted file mode 100644
index 6a7ec62b0..000000000
--- a/compiler/env_vars_util.py
+++ /dev/null
@@ -1,232 +0,0 @@
-import shlex
-from datetime import datetime
-
-from util import log, print_time_delta
-
-def read_vars_file(var_file_path):
-    log("Reading variables from:", var_file_path)
-
-    if(not var_file_path is None):
-        vars_dict = {}
-        # with open(var_file_path) as f:
-        #     lines = [line.rstrip() for line in f.readlines()]
-
-        with open(var_file_path) as f:
-            variable_reading_start_time = datetime.now()
-            data = f.read()
-            variable_reading_end_time = datetime.now()
-            print_time_delta("Variable Reading", variable_reading_start_time, variable_reading_end_time)
-
-            variable_tokenizing_start_time = datetime.now()
-            ## TODO: Can we replace this tokenizing process with our own code? This is very slow :'(
-            ##       It takes about 15ms on deathstar.
-            tokens = shlex.split(data)
-            variable_tokenizing_end_time = datetime.now()
-            print_time_delta("Variable Tokenizing", variable_tokenizing_start_time, variable_tokenizing_end_time)
-            # log("Tokens:", tokens)
-
-        # MMG 2021-03-09 definitively breaking on newlines (e.g., IFS) and function outputs (i.e., `declare -f`)
-        # KK  2021-10-26 no longer breaking on newlines (probably)
-
-        ## At the start of each iteration token_i should point to a 'declare'
-        token_i = 0
-        while token_i < len(tokens):
-            # FIXME is this assignment needed?
-            export_or_typeset = tokens[token_i]
-
-            ## Array variables require special parsing treatment
-            if (export_or_typeset == "declare" and is_array_variable(tokens[token_i+1])):
-                var_name, var_type, var_value, new_token_i = parse_array_variable(tokens, token_i)
-                vars_dict[var_name] = (var_type, var_value)
-                token_i = new_token_i
-                continue
-
-            new_token_i = find_next_delimiter(tokens, token_i)
-            rest = " ".join(tokens[(token_i+1):new_token_i])
-            token_i = new_token_i
-
-            space_index = rest.find(' ')
-            eq_index = rest.find('=')
-            var_type = None
-
-            ## Declared but unset?
-            if eq_index == -1:
-                if space_index != -1:
-                    var_name = rest[(space_index+1):]
-                    var_type = rest[:space_index]
-                else:
-                    var_name = rest
-                var_value = ""
-            ## Set, with type
-            elif(space_index < eq_index and not space_index == -1):
-                var_type = rest[:space_index]
-
-                if var_type == "--":
-                    var_type = None
-                
-                var_name = rest[(space_index+1):eq_index]
-                var_value = rest[(eq_index+1):]
-            ## Set, without type
-            else:
-                var_name = rest[:eq_index]
-                var_value = rest[(eq_index+1):]
-
-            ## Strip quotes
-            if var_value is not None and len(var_value) >= 2 and \
-               var_value[0] == "\"" and var_value[-1] == "\"":
-                var_value = var_value[1:-1]                
-                
-            vars_dict[var_name] = (var_type, var_value)
-
-        final_vars_dict = set_special_parameters(vars_dict)
-        return final_vars_dict
-
-
-## This sets the values of the special shell parameters correctly
-##
-## TODO KK PR#246 Do we need to split using IFS or is it always spaces?
-##
-## TODO MMG this isn't quite adequate: if pash_input_args contains
-##      spaces, we'll miscount. KK and I wrote a test
-##      evaluation/tests/interface_tests that's disabled as of PR#246.
-##
-##      the right solution here is:
-##
-##         - positional arguments get their own field in the
-##           exp_state---they're not store with ordinary shell
-##           variables
-##
-##         - we save those separately, probably in a separate file
-##
-##           ```
-##           echo pash_argc=$# >pash_positional_args
-##           for i in $(seq 0 $#)
-##           do
-##             echo "pash_arg$i=\"$i\"" >pash_positional_args
-##           done
-##           ```
-##
-##         - we load these separately. pretty annoying; here's a sketch
-##
-##           ```
-##           cmd="set --"
-##           for i in $(seq 0 $pash_argc)
-##           do
-##             cmd="$cmd \"\$pash_arg$i\""
-##           done
-##           eval "$cmd"
-def set_special_parameters(variables: dict):
-    new_vars = variables.copy()
-
-    ia_t, input_args = get_var(variables, 'pash_input_args')
-    es_t, exit_status = get_var(variables, 'pash_previous_exit_status')
-    ss_t, set_status = get_var(variables, 'pash_previous_set_status')
-    sn_t, shell_name = get_var(variables, 'pash_shell_name')
-
-    ## TODO: Set the types of variables correctly
-    new_vars['@'] = ia_t, " ".join(input_args)
-    new_vars['?'] = es_t, exit_status
-    new_vars['-'] = ss_t, set_status
-    new_vars['0'] = sn_t, shell_name
-    new_vars['#'] = ia_t, str(len(input_args))
-
-    for i, arg in enumerate(input_args):
-        index = i + 1
-        new_vars[str(index)] = input_args[i]
-
-    return new_vars
-
-def get_var(variables: dict, varname: str):
-    type, value = variables.get(varname, [None, None])
-    return type, value
-
-def is_array_variable(token):
-    return ('a' in token)
-
-## Based on the following:
-## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting
-def ansi_c_expand(string):
-    return bytes(string, "utf-8").decode("unicode_escape")
-
-## This finds the end of this variable/function
-def find_next_delimiter(tokens, i):
-    if (tokens[i] == "declare"):
-        return i + 3
-    else:
-        ## TODO: When is this case actually useful?
-        j = i + 1
-        while j < len(tokens) and (tokens[j] != "declare"):
-            j += 1
-        return j
-
-def parse_array_variable(tokens, i):
-    ## The `declare` keyword
-    _declare = tokens[i]
-    ## The type
-    declare_type = tokens[i+1]
-    assert(is_array_variable(declare_type))
-
-    ## The variable name and first argument
-    ## TODO: Test with empty array and single value array
-    name_and_start=tokens[i+2]
-    first_equal_index = name_and_start.find('=')
-
-    ## If it doesn't contain any = then it is empty
-    if first_equal_index == -1:
-        ## Then the name is the whole token,
-        ##  the type is None (TODO)
-        ##  and the value is empty
-        return name_and_start, None, "", i+3
-
-    var_name = name_and_start[:first_equal_index]
-    array_start = name_and_start[first_equal_index+1:]
-
-    var_values = []
-    if array_start == "()":
-        next_i = i+3
-    else:
-        ## Remove the opening parenthesis
-        array_item = array_start[1:]
-
-        ## Set the index that points to array items
-        curr_i = i+2
-
-        done = False
-        while not done:
-            ## TODO: Is this check adequate? Or could it miss the end 
-            ##       (or be misleaded into an earlier end by the item value?)
-            if array_item.endswith(")"):
-                done = True
-                array_item = array_item[:-1]
-
-            first_equal_index = array_item.find('=')
-            ## Find the index and value of the array item
-            item_index_raw = array_item[:first_equal_index]
-            item_value = array_item[first_equal_index+1:]
-
-            ## Sometimes the value starts with a dollar mark, see Bash ANSI-C quoting:
-            ## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting
-            if item_value.startswith("$"):
-                ## TODO: Figure out if this is adequate
-                item_value = ansi_c_expand(item_value[1:])
-
-            item_index = int(item_index_raw[1:-1])
-            
-            ## Add None values if the index is larger than the next item (see Bash sparse arrays)
-            ## TODO: Keep bash array values as maps to avoid sparse costs 
-            var_values += [None] * (item_index - len(var_values))
-            ## Set the next item
-            var_values.append(item_value)
-
-            
-
-            ## Get next array_item
-            curr_i += 1
-            array_item = tokens[curr_i]
-        
-        next_i = curr_i
-
-    ## TODO: Michael?
-    var_type = None
-
-    return var_name, var_type, var_values, next_i
diff --git a/compiler/ir.py b/compiler/ir.py
index 211d1242b..b7319cc23 100644
--- a/compiler/ir.py
+++ b/compiler/ir.py
@@ -2,14 +2,29 @@
 
 from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial
 from pash_annotations.datatypes.BasicDatatypes import ArgStringType
-from pash_annotations.datatypes.BasicDatatypesWithIO import FileNameWithIOInfo, StdDescriptorWithIOInfo, OptionWithIO
-from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo
-from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo
-from pash_annotations.annotation_generation.datatypes.CommandProperties import CommandProperties
-from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
+from pash_annotations.datatypes.BasicDatatypesWithIO import (
+    FileNameWithIOInfo,
+    StdDescriptorWithIOInfo,
+    OptionWithIO,
+)
+from pash_annotations.annotation_generation.datatypes.InputOutputInfo import (
+    InputOutputInfo,
+)
+from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import (
+    ParallelizabilityInfo,
+)
+from pash_annotations.annotation_generation.datatypes.CommandProperties import (
+    CommandProperties,
+)
+from pash_annotations.datatypes.CommandInvocationWithIOVars import (
+    CommandInvocationWithIOVars,
+)
 
 from annotations_utils.util_parsing import parse_arg_list_to_command_invocation
-from annotations_utils.util_cmd_invocations import get_input_output_info_from_cmd_invocation_util, get_parallelizability_info_from_cmd_invocation_util
+from annotations_utils.util_cmd_invocations import (
+    get_input_output_info_from_cmd_invocation_util,
+    get_parallelizability_info_from_cmd_invocation_util,
+)
 from annotations_utils.util_file_descriptors import resource_from_file_descriptor
 
 from definitions.ir.file_id import *
@@ -23,26 +38,30 @@
 
 from shell_ast.ast_util import *
 from util import *
+from custom_error import *
 
 import config
 
+
 ## Creates a file id for a given resource
 def create_file_id_for_resource(resource, fileIdGen):
     file_id = create_split_file_id(fileIdGen)
     file_id.set_resource(resource)
     return file_id
 
+
 ## Creates a file id that has a given maximum length
 def create_split_file_id(fileIdGen):
     file_id = fileIdGen.next_file_id()
     return file_id
 
+
 class FileIdGen:
-    def __init__(self, next = 0, prefix = ""):
+    def __init__(self, next=0, prefix=""):
         self.next = next + 1
         directory = f"{str(uuid.uuid4().hex)}"
         self.prefix = f"{directory}/{prefix}"
-        directory_path = os.path.join(config.PASH_TMP_PREFIX, self.prefix)  
+        directory_path = os.path.join(config.PASH_TMP_PREFIX, self.prefix)
         os.makedirs(directory_path)
 
     def next_file_id(self):
@@ -64,37 +83,44 @@ def bump_counter_to_value_of(self, OtherFileIdGen):
         # TODO: find a better solution to make unique numbers, currently: set to max-value + 1
         self.next = OtherFileIdGen.next + 1
 
+
 ## Returns the resource or file descriptor related to this specific opt_or_fd
-## NOTE: Assumes that everything is expanded. 
+## NOTE: Assumes that everything is expanded.
 def get_option_or_fd(opt_or_fd, options, fileIdGen):
-    if(isinstance(opt_or_fd, tuple)
-       and len(opt_or_fd) == 2
-       and opt_or_fd[0] == "option"):
+    if (
+        isinstance(opt_or_fd, tuple)
+        and len(opt_or_fd) == 2
+        and opt_or_fd[0] == "option"
+    ):
         resource = FileResource(Arg(options[opt_or_fd[1]]))
     else:
         ## TODO: Make this be a subtype of Resource
-        if(opt_or_fd == "stdin"):
+        if opt_or_fd == "stdin":
             resource = ("fd", 0)
-        elif(opt_or_fd == "stdout"):
+        elif opt_or_fd == "stdout":
             resource = ("fd", 1)
-        elif(opt_or_fd == "stderr"):
+        elif opt_or_fd == "stderr":
             resource = ("fd", 2)
         else:
             raise NotImplementedError()
         resource = FileDescriptorResource(resource)
-    
+
     fid = create_file_id_for_resource(resource, fileIdGen)
     return fid
 
+
 ## Get the options as arguments
 def get_option(opt_or_fd, options, fileIdGen):
-    assert(isinstance(opt_or_fd, tuple)
-       and len(opt_or_fd) == 2
-       and opt_or_fd[0] == "option")
+    assert (
+        isinstance(opt_or_fd, tuple)
+        and len(opt_or_fd) == 2
+        and opt_or_fd[0] == "option"
+    )
     arg = Arg(options[opt_or_fd[1]])
     return (opt_or_fd[1], arg)
 
-## This function 
+
+## This function
 def create_edges_from_opt_or_fd_list(opt_or_fd_list, edges_dict, options, fileIdGen):
     new_edge_list = []
     for opt_or_fd in opt_or_fd_list:
@@ -105,23 +131,37 @@ def create_edges_from_opt_or_fd_list(opt_or_fd_list, edges_dict, options, fileId
     return new_edge_list
 
 
-def find_input_edges(positional_input_list, implicit_use_of_stdin, dfg_edges, fileIdGen) -> List[int]:
-    assert (not implicit_use_of_stdin or len(positional_input_list) == 0)
+def find_input_edges(
+    positional_input_list, implicit_use_of_stdin, dfg_edges, fileIdGen
+) -> List[int]:
+    assert not implicit_use_of_stdin or len(positional_input_list) == 0
     if implicit_use_of_stdin:
         resources = [FileDescriptorResource(("fd", 0))]
     else:
-        resources = [resource_from_file_descriptor(input_el) for input_el in positional_input_list]
-    file_ids = [create_file_id_for_resource(resource, fileIdGen) for resource in resources]
+        resources = [
+            resource_from_file_descriptor(input_el)
+            for input_el in positional_input_list
+        ]
+    file_ids = [
+        create_file_id_for_resource(resource, fileIdGen) for resource in resources
+    ]
     return get_edge_list_from_file_id_list(dfg_edges, file_ids)
 
 
-def find_output_edges(positional_output_list, implicit_use_of_stdout, dfg_edges, fileIdGen) -> List[int]:
-    assert (not implicit_use_of_stdout or len(positional_output_list) == 0)
+def find_output_edges(
+    positional_output_list, implicit_use_of_stdout, dfg_edges, fileIdGen
+) -> List[int]:
+    assert not implicit_use_of_stdout or len(positional_output_list) == 0
     if implicit_use_of_stdout:
         resources = [FileDescriptorResource(("fd", 1))]
     else:
-        resources = [resource_from_file_descriptor(input_el) for input_el in positional_output_list]
-    file_ids = [create_file_id_for_resource(resource, fileIdGen) for resource in resources]
+        resources = [
+            resource_from_file_descriptor(input_el)
+            for input_el in positional_output_list
+        ]
+    file_ids = [
+        create_file_id_for_resource(resource, fileIdGen) for resource in resources
+    ]
     return get_edge_list_from_file_id_list(dfg_edges, file_ids)
 
 
@@ -152,88 +192,125 @@ def add_var_for_descriptor(operand):
 
     for i in range(len(command_invocation_with_io.flag_option_list)):
         flagoption = command_invocation_with_io.flag_option_list[i]
-        if isinstance(flagoption, OptionWithIO) and not isinstance(flagoption.option_arg, ArgStringType):
+        if isinstance(flagoption, OptionWithIO) and not isinstance(
+            flagoption.option_arg, ArgStringType
+        ):
             fid_id = add_var_for_descriptor(flagoption.option_arg)
             new_option = OptionWithIOVar(flagoption.name, fid_id)
             new_flagoption_list.append(new_option)
-        else: # Flag
+        else:  # Flag
             new_flagoption_list.append(flagoption)
 
     for i in range(len(command_invocation_with_io.operand_list)):
         operand = command_invocation_with_io.operand_list[i]
-        if isinstance(operand, FileNameWithIOInfo) or isinstance(operand, StdDescriptorWithIOInfo):
+        if isinstance(operand, FileNameWithIOInfo) or isinstance(
+            operand, StdDescriptorWithIOInfo
+        ):
             fid_id = add_var_for_descriptor(operand)
             new_operand_list.append(fid_id)
         else:
             new_operand_list.append(operand)
     if command_invocation_with_io.implicit_use_of_streaming_input:
-        new_implicit_use_of_streaming_input = add_var_for_descriptor(command_invocation_with_io.implicit_use_of_streaming_input)
+        new_implicit_use_of_streaming_input = add_var_for_descriptor(
+            command_invocation_with_io.implicit_use_of_streaming_input
+        )
     else:
         new_implicit_use_of_streaming_input = None
     if command_invocation_with_io.implicit_use_of_streaming_output:
-        new_implicit_use_of_streaming_output = add_var_for_descriptor(command_invocation_with_io.implicit_use_of_streaming_output)
+        new_implicit_use_of_streaming_output = add_var_for_descriptor(
+            command_invocation_with_io.implicit_use_of_streaming_output
+        )
     else:
         new_implicit_use_of_streaming_output = None
 
-    command_invocation_with_io_vars = CommandInvocationWithIOVars(cmd_name=command_invocation_with_io.cmd_name,
-                                       flag_option_list=new_flagoption_list,
-                                       operand_list=new_operand_list,
-                                       implicit_use_of_streaming_input=new_implicit_use_of_streaming_input,
-                                       implicit_use_of_streaming_output=new_implicit_use_of_streaming_output,
-                                       access_map=access_map)
+    command_invocation_with_io_vars = CommandInvocationWithIOVars(
+        cmd_name=command_invocation_with_io.cmd_name,
+        flag_option_list=new_flagoption_list,
+        operand_list=new_operand_list,
+        implicit_use_of_streaming_input=new_implicit_use_of_streaming_input,
+        implicit_use_of_streaming_output=new_implicit_use_of_streaming_output,
+        access_map=access_map,
+    )
     return command_invocation_with_io_vars, dfg_edges
 
 
-def compile_command_to_DFG(fileIdGen, command, options,
-                           redirections=[]):
-    command_invocation: CommandInvocationInitial = parse_arg_list_to_command_invocation(command, options)
-    io_info: InputOutputInfo = get_input_output_info_from_cmd_invocation_util(command_invocation)
+def compile_command_to_DFG(fileIdGen, command, options, redirections=None):
+    redirections = [] if redirections is None else redirections
+    command_invocation: CommandInvocationInitial = parse_arg_list_to_command_invocation(
+        command, options
+    )
+    io_info: InputOutputInfo = get_input_output_info_from_cmd_invocation_util(
+        command_invocation
+    )
     if io_info is None:
-        raise Exception(f"InputOutputInformation for {format_arg_chars(command)} not provided so considered side-effectful.")
+        raise UnparallelizableError(
+            f"InputOutputInformation for {format_arg_chars(command)} not provided so considered side-effectful."
+        )
     if io_info.has_other_outputs():
-        raise Exception(f"Command {format_arg_chars(command)} has outputs other than streaming.")
-    para_info: ParallelizabilityInfo = get_parallelizability_info_from_cmd_invocation_util(command_invocation)
+        raise UnparallelizableError(
+            f"Command {format_arg_chars(command)} has outputs other than streaming."
+        )
+    para_info: ParallelizabilityInfo = (
+        get_parallelizability_info_from_cmd_invocation_util(command_invocation)
+    )
     if para_info is None:
-        para_info = ParallelizabilityInfo() # defaults to no parallelizer's and all properties False
-    command_invocation_with_io = io_info.apply_input_output_info_to_command_invocation(command_invocation)
+        para_info = (
+            ParallelizabilityInfo()
+        )  # defaults to no parallelizer's and all properties False
+    command_invocation_with_io = io_info.apply_input_output_info_to_command_invocation(
+        command_invocation
+    )
     if para_info is None:
-        para_info = ParallelizabilityInfo()  # defaults to no parallelizer's and all properties False
-    parallelizer_list, round_robin_compatible_with_cat, is_commutative = para_info.unpack_info()
-    property_dict = [{'round_robin_compatible_with_cat': round_robin_compatible_with_cat,
-                     'is_commutative': is_commutative}]
+        para_info = (
+            ParallelizabilityInfo()
+        )  # defaults to no parallelizer's and all properties False
+    (
+        parallelizer_list,
+        round_robin_compatible_with_cat,
+        is_commutative,
+    ) = para_info.unpack_info()
+    property_dict = [
+        {
+            "round_robin_compatible_with_cat": round_robin_compatible_with_cat,
+            "is_commutative": is_commutative,
+        }
+    ]
     cmd_related_properties = CommandProperties(property_dict)
 
     ## TODO: Make an empty IR and add edges and nodes incrementally (using the methods defined in IR).
 
     ## Add all inputs and outputs to the DFG edges
-    cmd_invocation_with_io_vars, dfg_edges = add_file_id_vars(command_invocation_with_io, fileIdGen)
+    cmd_invocation_with_io_vars, dfg_edges = add_file_id_vars(
+        command_invocation_with_io, fileIdGen
+    )
     com_redirs = redirections
     ## TODO: Add assignments
     com_assignments = []
 
     ## Assume: Everything must be completely expanded
     ## TODO: Add an assertion about that.
-    dfg_node = DFGNode(cmd_invocation_with_io_vars,
-                        com_redirs=com_redirs,
-                        com_assignments=com_assignments,
-                        parallelizer_list=parallelizer_list,
-                        cmd_related_properties=cmd_related_properties
-                        )
+    dfg_node = DFGNode(
+        cmd_invocation_with_io_vars,
+        com_redirs=com_redirs,
+        com_assignments=com_assignments,
+        parallelizer_list=parallelizer_list,
+        cmd_related_properties=cmd_related_properties,
+    )
     # log(f'Dfg node: {dfg_node}')
     node_id = dfg_node.get_id()
 
     ## Assign the from, to node in edges
     for fid_id in dfg_node.get_input_list():
         fid, from_node, to_node = dfg_edges[fid_id]
-        assert(to_node is None)
+        assert to_node is None
         dfg_edges[fid_id] = (fid, from_node, node_id)
-    
+
     for fid_id in dfg_node.get_output_list():
         fid, from_node, to_node = dfg_edges[fid_id]
-        assert(from_node is None)
+        assert from_node is None
         dfg_edges[fid_id] = (fid, node_id, to_node)
-    
-    dfg_nodes = {node_id : dfg_node}
+
+    dfg_nodes = {node_id: dfg_node}
     dfg = IR(dfg_nodes, dfg_edges)
     # log(f'IR: {dfg}')
     return dfg
@@ -243,20 +320,17 @@ def compile_command_to_DFG(fileIdGen, command, options,
 ## Node builder functions
 ##
 
+
 def make_tee(input, outputs):
     com_name = Arg.string_to_arg("tee")
     com_category = "pure"
-    return DFGNode([input],
-                   outputs,
-                   com_name, 
-                   com_category)
+    return DFGNode([input], outputs, com_name, com_category)
 
 
 ## Note: This might need more information. E.g. all the file
 ## descriptors of the IR, and in general any other local information
 ## that might be relevant.
 class IR:
-
     ## TODO: Embed the fileIdGen as a field of the IR
 
     ## IR Assumptions:
@@ -266,7 +340,7 @@ class IR:
     ##
     ## - If two nodes have the same file as output, then they both
     ##   write to it concurrently.
-    def __init__(self, nodes, edges, background = False):
+    def __init__(self, nodes, edges, background=False):
         self.nodes = nodes
         self.edges = edges
         self.background = background
@@ -277,28 +351,30 @@ def __init__(self, nodes, edges, background = False):
         self.apply_redirections()
 
     def __repr__(self):
-        output = "(|-{} IR: {} {}-|)".format(self.get_stdin(), list(self.nodes.values()), self.get_stdout())
+        output = "(|-{} IR: {} {}-|)".format(
+            self.get_stdin(), list(self.nodes.values()), self.get_stdout()
+        )
         return output
 
     ## Initialize all edges
     def apply_redirections(self):
         for _, node in self.nodes.items():
             node.apply_redirections(self.edges)
-        
+
         ## We need to merge common files after redirections have been applied.
         self.combine_common_files()
 
-    ## Refactor these to call .add_edge, and .set_edge_to/from 
+    ## Refactor these to call .add_edge, and .set_edge_to/from
     ## Add an edge that points to a node
     def add_to_edge(self, to_edge, node_id):
         edge_id = to_edge.get_ident()
-        assert(not edge_id in self.edges)
+        assert not edge_id in self.edges
         self.edges[edge_id] = (to_edge, None, node_id)
 
     ## Add an edge that starts from a node
     def add_from_edge(self, node_id, from_edge):
         edge_id = from_edge.get_ident()
-        assert(not edge_id in self.edges)
+        assert not edge_id in self.edges
         self.edges[edge_id] = (from_edge, node_id, None)
 
     def set_edge_to(self, edge_id, to_node_id):
@@ -310,19 +386,19 @@ def set_edge_from(self, edge_id, from_node_id):
         self.edges[edge_id] = (edge_fid, from_node_id, to_node)
 
     def get_edge_fid(self, fid_id):
-        if(fid_id in self.edges):
+        if fid_id in self.edges:
             return self.edges[fid_id][0]
         else:
             return None
 
     def get_edge_from(self, edge_id):
-        if(edge_id in self.edges):
+        if edge_id in self.edges:
             return self.edges[edge_id][1]
         else:
             return None
 
     def replace_edge(self, old_edge_id, new_edge_fid):
-        assert(new_edge_fid not in self.all_fids())
+        assert new_edge_fid not in self.all_fids()
         new_edge_id = new_edge_fid.get_ident()
         old_fid, from_node, to_node = self.edges[old_edge_id]
         self.edges[new_edge_id] = (new_edge_fid, from_node, to_node)
@@ -331,7 +407,7 @@ def replace_edge(self, old_edge_id, new_edge_fid):
         if to_node:
             self.get_node(to_node).replace_edge(old_edge_id, new_edge_id)
         del self.edges[old_edge_id]
-        
+
     def get_stdin(self):
         stdin_id = self.get_stdin_id()
         stdin_fid = self.get_edge_fid(stdin_id)
@@ -348,39 +424,43 @@ def get_stdin_id(self):
         stdin_id = None
         for edge_id, (edge_fid, _from, _to) in self.edges.items():
             resource = edge_fid.get_resource()
-            if(resource.is_stdin()):
-                assert(stdin_id is None)
+            if resource.is_stdin():
+                assert stdin_id is None
                 stdin_id = edge_id
-        return stdin_id  
+        return stdin_id
 
     def get_stdout_id(self):
         ## ASSERT: There must be only one
         stdout_id = None
         for edge_id, (edge_fid, _from, _to) in self.edges.items():
             resource = edge_fid.get_resource()
-            if(resource.is_stdout()):
+            if resource.is_stdout():
                 # This is not true when using distributed_exec
                 # assert(stdout_id is None)
                 stdout_id = edge_id
-        return stdout_id   
+        return stdout_id
 
     def serialize(self):
         output = "Nodes:\n"
         all_file_ids = ""
         for i, node in enumerate(self.nodes):
-            serialized_input_file_ids = " ".join([fid.serialize()
-                                                  for fid in node.get_input_file_ids()])
-            serialized_output_file_ids = " ".join([fid.serialize()
-                                                   for fid in node.get_output_file_ids()])
+            serialized_input_file_ids = " ".join(
+                [fid.serialize() for fid in node.get_input_file_ids()]
+            )
+            serialized_output_file_ids = " ".join(
+                [fid.serialize() for fid in node.get_output_file_ids()]
+            )
             all_file_ids += serialized_input_file_ids + " "
             all_file_ids += serialized_output_file_ids + " "
-            output += "{} in: {} out: {} command: {}\n".format(i, serialized_input_file_ids,
-                                                               serialized_output_file_ids,
-                                                               node.serialize())
+            output += "{} in: {} out: {} command: {}\n".format(
+                i,
+                serialized_input_file_ids,
+                serialized_output_file_ids,
+                node.serialize(),
+            )
         output = "File ids:\n{}\n".format(all_file_ids) + output
         return output
 
-
     def to_ast(self, drain_streams) -> "list[AstNode]":
         asts = []
 
@@ -391,7 +471,7 @@ def to_ast(self, drain_streams) -> "list[AstNode]":
 
         ## Redirect stdin
         stdin_id = self.get_stdin_id()
-        if (not stdin_id is None):
+        if not stdin_id is None:
             ## Create a new ephemeral resource to redirect stdin to.
             fid = fileIdGen.next_file_id()
             fid.make_ephemeral()
@@ -400,15 +480,21 @@ def to_ast(self, drain_streams) -> "list[AstNode]":
             _prev_fid, from_node, to_node = self.edges[stdin_id]
             self.edges[stdin_id] = (fid, from_node, to_node)
             ## Create a command that redirects stdin to this ephemeral fid
-            redirect_stdin_script = os.path.join(config.PASH_TOP, config.config['runtime']['redirect_stdin_binary'])
-            com_args = [string_to_argument('source'), string_to_argument(redirect_stdin_script), file_to_redirect_to]
+            redirect_stdin_script = os.path.join(
+                config.PASH_TOP, config.config["runtime"]["redirect_stdin_binary"]
+            )
+            com_args = [
+                string_to_argument("source"),
+                string_to_argument(redirect_stdin_script),
+                file_to_redirect_to,
+            ]
             com = make_command(com_args)
             asts.append(com)
 
         ## Make the dataflow graph
         ##
         ## TODO: Normally this should have all sink nodes at the end, but
-        ##       for now we just have the stdout node in the end 
+        ##       for now we just have the stdout node in the end
         ##       (since this is always the output in our benchmarks).
         # sink_node_ids = self.sink_nodes()
         ##
@@ -418,15 +504,14 @@ def to_ast(self, drain_streams) -> "list[AstNode]":
         ## For now we just allow more than one output by waiting for one of them
         ## at random.
         stdout_edge_id = self.get_stdout_id()
-        if (not stdout_edge_id is None):
+        if not stdout_edge_id is None:
             sink_node_ids = [self.edges[stdout_edge_id][1]]
         else:
             sink_node_ids = self.sink_nodes()
             sink_node_ids = [sink_node_ids[0]]
 
-
         for node_id, node in self.nodes.items():
-            if(not node_id in sink_node_ids):
+            if not node_id in sink_node_ids:
                 node_ast = node.to_ast(self.edges, drain_streams)
                 asts.append(make_background(node_ast))
                 ## Gather all pids
@@ -445,20 +530,20 @@ def to_ast(self, drain_streams) -> "list[AstNode]":
         ## TODO: Ideally we would like to make them as typed nodes already
         class_asts = [to_ast_node(ast_node_to_untyped_deep(ast)) for ast in asts]
         return class_asts
-    
+
     def collect_pid_assignment(self):
         ## Creates:
         ## pids_to_kill="$! $pids_to_kill"
-        var_name = 'pids_to_kill'
-        rval = quote_arg([standard_var_ast('!'),
-                          char_to_arg_char(' '),
-                          standard_var_ast(var_name)])
+        var_name = "pids_to_kill"
+        rval = quote_arg(
+            [standard_var_ast("!"), char_to_arg_char(" "), standard_var_ast(var_name)]
+        )
         return make_assignment(var_name, [rval])
-    
+
     def init_pids_to_kill(self):
         ## Creates:
         ## pids_to_kill=""
-        var_name = 'pids_to_kill'
+        var_name = "pids_to_kill"
         rval = quote_arg([])
         return make_assignment(var_name, [rval])
 
@@ -469,7 +554,7 @@ def set_ast(self, ast):
     def set_background(self, background):
         self.background = background
 
-        if (background):
+        if background:
             ## Since the IR is in the background, we don't have access to
             ## its stdin, stdout anymore
             self.stdin = []
@@ -479,8 +564,8 @@ def is_in_background(self):
         return self.background
 
     def pipe_append(self, other):
-        assert(self.valid())
-        assert(other.valid())
+        assert self.valid()
+        assert other.valid()
 
         ## This combines the two IRs by adding all of the nodes
         ## together, and by union-ing the stdout of the first with the
@@ -491,12 +576,11 @@ def pipe_append(self, other):
         ##           both self and other are not empty.
         my_out = self.get_stdout_id()
         other_in = other.get_stdin_id()
-        assert(not my_out is None)
-        assert(not other_in is None)
-
+        assert not my_out is None
+        assert not other_in is None
 
         _other_in_fid, from_node, other_in_node_id = other.edges[other_in]
-        assert(from_node is None)
+        assert from_node is None
         ## ... = OtherInNode(..., other_in, ...)
         ##          v
         ## ... = OtherInNode(..., my_out, ...)
@@ -506,7 +590,7 @@ def pipe_append(self, other):
 
         ## Make the my_out id to be ephemeral file.
         my_out_fid, from_node, to_node = self.edges[my_out]
-        assert(to_node is None)
+        assert to_node is None
         my_out_fid.make_ephemeral()
 
         ## Add the other node in my edges
@@ -516,9 +600,9 @@ def pipe_append(self, other):
         self.union(other)
 
     def background_union(self, other):
-        assert(self.valid())
-        assert(other.valid())
-        assert(self.is_in_background())
+        assert self.valid()
+        assert other.valid()
+        assert self.is_in_background()
         ## This combines two IRs where at least the first one is in
         ## background. This means that the stdin only works with the second
         ## the second (or None if both are in background). Also if
@@ -526,7 +610,7 @@ def background_union(self, other):
 
         ## If one of them is not in the background, then the whole
         ## thing isn't.
-        if (not other.is_in_background()):
+        if not other.is_in_background():
             self.set_background(other.is_in_background())
 
         self.union(other)
@@ -545,7 +629,6 @@ def union(self, other):
         ## TODO: Handle connections of common files (pipes, etc)
         self.combine_common_files()
 
-
     ## Combines (unions) files that refer to the same resource.
     ##
     ## WARNING: This assumes that comparing file names statically
@@ -560,7 +643,6 @@ def union(self, other):
     ## the IR? Maybe it can be true if a command is run with
     ## variable assignments)
     def combine_common_files(self):
-
         ## For now we just unify a file if it exists exactly twice,
         ## once at the input of a node and once at the output of
         ## another node. If a file exists in several input locations,
@@ -572,19 +654,24 @@ def combine_common_files(self):
         ## of exactly one other node.
         # log("Combining files for:", self)
         for node_id1, _node1 in self.nodes.items():
-            inputs_with_file_resource = [(id, fid) for id, fid in self.get_node_input_ids_fids(node_id1)
-                                         if fid.has_file_resource()]
+            inputs_with_file_resource = [
+                (id, fid)
+                for id, fid in self.get_node_input_ids_fids(node_id1)
+                if fid.has_file_resource()
+            ]
             for id_in, fid_in in inputs_with_file_resource:
                 in_resource = fid_in.get_resource()
                 number_of_out_resources = 0
                 for node_id2, _node2 in self.nodes.items():
-                    outputs_with_file_resource = [(id, fid) for id, fid in self.get_node_output_ids_fids(node_id2)
-                                                  if fid.has_file_resource()]
+                    outputs_with_file_resource = [
+                        (id, fid)
+                        for id, fid in self.get_node_output_ids_fids(node_id2)
+                        if fid.has_file_resource()
+                    ]
                     for id_out, fid_out in outputs_with_file_resource:
                         out_resource = fid_out.get_resource()
                         ## Do not combine if the ids of the edges are already the same
-                        if (not id_in == id_out
-                            and in_resource == out_resource):
+                        if not id_in == id_out and in_resource == out_resource:
                             number_of_out_resources += 1
                             ## They point to the same File resource so we need to unify their fids
                             self.nodes[node_id2].replace_edge(id_out, id_in)
@@ -594,7 +681,7 @@ def combine_common_files(self):
                 ## Exit with an error if a file is written by more than one node.
                 ##
                 ## TODO: Could this ever be improved for additional performance?
-                assert(number_of_out_resources <= 1)
+                assert number_of_out_resources <= 1
 
     ## Returns all the file identifiers in the IR.
     def all_fids(self):
@@ -603,23 +690,25 @@ def all_fids(self):
 
     ## Returns all input fids of the IR
     def all_input_fids(self):
-        all_input_fids = [fid for fid, from_node, _to_node in self.edges.values()
-                          if from_node is None]
+        all_input_fids = [
+            fid for fid, from_node, _to_node in self.edges.values() if from_node is None
+        ]
         return all_input_fids
 
     ## Returns all output fids of the IR
     def all_output_fids(self):
-        all_output_fids = [fid for fid, _from_node, to_node in self.edges.values()
-                          if to_node is None]
+        all_output_fids = [
+            fid for fid, _from_node, to_node in self.edges.values() if to_node is None
+        ]
         return all_output_fids
 
     ## Returns the sources of the IR.
     ##   This includes both the nodes that have an incoming edge (file) that has no from_node,
-    ##     but also nodes that have no incoming edge (generator nodes). 
+    ##     but also nodes that have no incoming edge (generator nodes).
     def source_nodes(self):
         sources = set()
         for _edge_fid, from_node, to_node in self.edges.values():
-            if(from_node is None and not to_node is None):
+            if from_node is None and not to_node is None:
                 sources.add(to_node)
         for node_id, node in self.nodes.items():
             if len(node.get_input_list()) == 0:
@@ -629,7 +718,7 @@ def source_nodes(self):
     def sink_nodes(self):
         sources = set()
         for _edge_fid, from_node, to_node in self.edges.values():
-            if(to_node is None and not from_node is None):
+            if to_node is None and not from_node is None:
                 sources.add(from_node)
         return list(sources)
 
@@ -646,8 +735,8 @@ def get_next_nodes(self, node_id):
         next_nodes = []
         for edge_id in output_edge_ids:
             _fid, from_node, to_node = self.edges[edge_id]
-            assert(from_node == node_id)
-            if(not to_node is None):
+            assert from_node == node_id
+            if not to_node is None:
                 next_nodes.append(to_node)
         return next_nodes
 
@@ -656,14 +745,17 @@ def get_previous_nodes(self, node_id):
         previous_nodes = []
         for edge_id in input_edge_ids:
             _fid, from_node, to_node = self.edges[edge_id]
-            assert(to_node == node_id)
-            if(not from_node is None):
+            assert to_node == node_id
+            if not from_node is None:
                 previous_nodes.append(from_node)
         return previous_nodes
 
     def get_node_input_ids_fids(self, node_id):
         node = self.get_node(node_id)
-        return [(input_edge_id, self.edges[input_edge_id][0]) for input_edge_id in node.get_input_list()]
+        return [
+            (input_edge_id, self.edges[input_edge_id][0])
+            for input_edge_id in node.get_input_list()
+        ]
 
     def get_node_input_ids(self, node_id):
         return [fid_id for fid_id, _fid in self.get_node_input_ids_fids(node_id)]
@@ -673,7 +765,10 @@ def get_node_input_fids(self, node_id):
 
     def get_node_output_ids_fids(self, node_id):
         node = self.get_node(node_id)
-        return [(output_edge_id, self.edges[output_edge_id][0]) for output_edge_id in node.get_output_list()]
+        return [
+            (output_edge_id, self.edges[output_edge_id][0])
+            for output_edge_id in node.get_output_list()
+        ]
 
     def get_node_output_ids(self, node_id):
         return [fid_id for fid_id, _fid in self.get_node_output_ids_fids(node_id)]
@@ -700,7 +795,6 @@ def remove_node(self, node_id):
         for out_id in node.get_output_list():
             self.set_edge_from(out_id, None)
 
-
     def add_node(self, node):
         node_id = node.get_id()
         self.nodes[node_id] = node
@@ -722,160 +816,254 @@ def add_edges(self, edge_fids):
 
     def add_edge(self, edge_fid):
         fid_id = edge_fid.get_ident()
-        assert(not fid_id in self.edges)
+        assert not fid_id in self.edges
         self.edges[fid_id] = (edge_fid, None, None)
 
     ## Note: We assume that the lack of nodes is an adequate condition
     ##       to check emptiness.
     def empty(self):
-        return (len(self.nodes) == 0)
+        return len(self.nodes) == 0
 
-    def apply_parallelization_to_node(self, node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size):
+    def apply_parallelization_to_node(
+        self, node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size
+    ):
         splitter = parallelizer.get_splitter()
         if splitter.is_splitter_round_robin():
-            self.apply_round_robin_parallelization_to_node(node_id, parallelizer, fileIdGen, fan_out,
-                                                           r_split_batch_size)
+            self.apply_round_robin_parallelization_to_node(
+                node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size
+            )
         elif splitter.is_splitter_round_robin_with_unwrap_flag():
-            self.apply_round_robin_with_unwrap_flag_parallelization_to_node(node_id, parallelizer, fileIdGen, fan_out,
-                                                                            r_split_batch_size)
+            self.apply_round_robin_with_unwrap_flag_parallelization_to_node(
+                node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size
+            )
         elif splitter.is_splitter_consec_chunks():
-            self.apply_consecutive_chunks_parallelization_to_node(node_id, parallelizer, fileIdGen, fan_out)
+            self.apply_consecutive_chunks_parallelization_to_node(
+                node_id, parallelizer, fileIdGen, fan_out
+            )
         else:
-            raise Exception("Splitter not yet implemented")
+            raise UnparallelizableError("Splitter not yet implemented for command: {}".format(self.get_node(node_id=node_id).cmd_invocation_with_io_vars.cmd_name))
 
-    def apply_round_robin_parallelization_to_node(self, node_id, parallelizer, fileIdGen, fan_out,
-                                                  r_split_batch_size):
+    def apply_round_robin_parallelization_to_node(
+        self, node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size
+    ):
         # TODO: this control flow should move done to aggregators once we implement them;
         #  currently, this cannot be done since splitter etc. would be added...
         aggregator_spec = parallelizer.get_aggregator_spec()
         if aggregator_spec.is_aggregator_spec_adj_lines_merge():
-            raise Exception("adj_lines_merge not yet implemented in PaSh")
+            raise AdjLineNotImplementedError("adj_lines_merge not yet implemented in PaSh")
         elif aggregator_spec.is_aggregator_spec_adj_lines_seq():
-            raise Exception("adj_lines_seq not yet implemented in PaSh")
+            raise AdjLineNotImplementedError("adj_lines_seq not yet implemented in PaSh")
         elif aggregator_spec.is_aggregator_spec_adj_lines_func():
-            raise Exception("adj_lines_func not yet implemented in PaSh")
+            raise AdjLineNotImplementedError("adj_lines_func not yet implemented in PaSh")
         # END of what to move
 
         node = self.get_node(node_id)
         # get info from node, and delete it from graph
-        streaming_input, streaming_output, configuration_inputs = \
+        (
+            streaming_input,
+            streaming_output,
+            configuration_inputs,
+        ) = (
             node.get_single_streaming_input_single_output_and_configuration_inputs_of_node_for_parallelization()
+        )
         original_cmd_invocation_with_io_vars = node.cmd_invocation_with_io_vars
 
-
         can_be_fused_with_prev = False
         prev_nodes = self.get_previous_nodes(node_id)
         if len(prev_nodes) == 1:
-            first_pred_node, first_pred_cmd_inv = \
-                self.get_only_previous_node_and_only_previous_cmd_invocation(prev_nodes)
+            (
+                first_pred_node,
+                first_pred_cmd_inv,
+            ) = self.get_only_previous_node_and_only_previous_cmd_invocation(prev_nodes)
             if isinstance(first_pred_node, r_merge.RMerge):
                 can_be_fused_with_prev = True
 
         # remove node to be parallelized
-        self.remove_node(node_id) # remove it here already as as we need to remove edge end points ow. to avoid disconnecting graph to avoid disconnecting graph
+        self.remove_node(
+            node_id
+        )  # remove it here already as as we need to remove edge end points ow. to avoid disconnecting graph to avoid disconnecting graph
 
         if can_be_fused_with_prev:
-            self.remove_node(prev_nodes[0]) # also sets respective edge to's and from's to None
+            self.remove_node(
+                prev_nodes[0]
+            )  # also sets respective edge to's and from's to None
             in_mapper_ids = first_pred_cmd_inv.operand_list
-        else: # cannot be fused so introduce splitter
+        else:  # cannot be fused so introduce splitter
             # splitter
-            round_robin_splitter_generator = lambda input_id, output_ids: r_split.make_r_split(input_id, output_ids, r_split_batch_size)
-            out_split_ids = self.introduce_splitter(round_robin_splitter_generator, fan_out, fileIdGen, streaming_input)
+            round_robin_splitter_generator = (
+                lambda input_id, output_ids: r_split.make_r_split(
+                    input_id, output_ids, r_split_batch_size
+                )
+            )
+            out_split_ids = self.introduce_splitter(
+                round_robin_splitter_generator, fan_out, fileIdGen, streaming_input
+            )
             in_mapper_ids = out_split_ids
 
         # mappers
-        out_mapper_ids = self.introduce_mappers(fan_out, fileIdGen, in_mapper_ids, original_cmd_invocation_with_io_vars,
-                                                parallelizer)
-        out_mapper_ids = [out_ids[0] for out_ids in out_mapper_ids] # since we get list of list back for potential aux info
+        out_mapper_ids = self.introduce_mappers(
+            fan_out,
+            fileIdGen,
+            in_mapper_ids,
+            original_cmd_invocation_with_io_vars,
+            parallelizer,
+        )
+        out_mapper_ids = [
+            out_ids[0] for out_ids in out_mapper_ids
+        ]  # since we get list of list back for potential aux info
 
         # aggregator
-        self.introduce_aggregator_for_round_robin(out_mapper_ids, parallelizer, streaming_output)
+        self.introduce_aggregator_for_round_robin(
+            out_mapper_ids, parallelizer, streaming_output
+        )
 
-    def apply_round_robin_with_unwrap_flag_parallelization_to_node(self, node_id, parallelizer, fileIdGen, fan_out,
-                                                                   r_split_batch_size):
+    def apply_round_robin_with_unwrap_flag_parallelization_to_node(
+        self, node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size
+    ):
         # round robin with unwrap flag is an inferred parallelizer which ensures that
         # the command is commutative and has an aggregator for consecutive chunks;
         # thus we can check whether we can re-open a previous "RR"-parallelization ending with `r_merge`
         node = self.get_node(node_id)
-        streaming_input, streaming_output, configuration_inputs = \
+        (
+            streaming_input,
+            streaming_output,
+            configuration_inputs,
+        ) = (
             node.get_single_streaming_input_single_output_and_configuration_inputs_of_node_for_parallelization()
+        )
         original_cmd_invocation_with_io_vars = node.cmd_invocation_with_io_vars
 
         can_be_fused_with_prev = False
         prev_nodes = self.get_previous_nodes(node_id)
         if len(prev_nodes) == 1:
-            first_pred_node, first_pred_cmd_inv = \
-                self.get_only_previous_node_and_only_previous_cmd_invocation(prev_nodes)
+            (
+                first_pred_node,
+                first_pred_cmd_inv,
+            ) = self.get_only_previous_node_and_only_previous_cmd_invocation(prev_nodes)
             if isinstance(first_pred_node, r_merge.RMerge):
                 can_be_fused_with_prev = True
 
         # remove node to be parallelized
-        self.remove_node(node_id) # remove it here already as as we need to remove edge end points ow. to avoid disconnecting graph to avoid disconnecting graph
-
-        if can_be_fused_with_prev: # and node.is_commutative(): implied by how this kind of splitter is inferred
-            self.remove_node(prev_nodes[0]) # also sets respective edge to's and from's to None
+        self.remove_node(
+            node_id
+        )  # remove it here already as as we need to remove edge end points ow. to avoid disconnecting graph to avoid disconnecting graph
+
+        if (
+            can_be_fused_with_prev
+        ):  # and node.is_commutative(): implied by how this kind of splitter is inferred
+            self.remove_node(
+                prev_nodes[0]
+            )  # also sets respective edge to's and from's to None
             in_unwrap_ids = first_pred_cmd_inv.operand_list
             out_unwrap_ids = self.introduce_unwraps(fileIdGen, in_unwrap_ids)
             in_mapper_ids = out_unwrap_ids
         else:
             # splitter
-            round_robin_with_unwrap_flag_splitter_generator = lambda input_id, output_ids: r_split.make_r_split_with_unwrap_flag(input_id, output_ids, r_split_batch_size)
-            out_split_ids = self.introduce_splitter(round_robin_with_unwrap_flag_splitter_generator, fan_out, fileIdGen, streaming_input)
+            round_robin_with_unwrap_flag_splitter_generator = (
+                lambda input_id, output_ids: r_split.make_r_split_with_unwrap_flag(
+                    input_id, output_ids, r_split_batch_size
+                )
+            )
+            out_split_ids = self.introduce_splitter(
+                round_robin_with_unwrap_flag_splitter_generator,
+                fan_out,
+                fileIdGen,
+                streaming_input,
+            )
             in_mapper_ids = out_split_ids
 
         # mappers
-        out_mapper_ids = self.introduce_mappers(fan_out, fileIdGen, in_mapper_ids, original_cmd_invocation_with_io_vars,
-                                                parallelizer)
+        out_mapper_ids = self.introduce_mappers(
+            fan_out,
+            fileIdGen,
+            in_mapper_ids,
+            original_cmd_invocation_with_io_vars,
+            parallelizer,
+        )
 
         in_aggregator_ids = out_mapper_ids
         out_aggregator_id = streaming_output
-        self.introduce_aggregators_for_consec_chunks(fileIdGen, in_aggregator_ids,
-                                                     original_cmd_invocation_with_io_vars, out_aggregator_id, parallelizer,
-                                                     streaming_output)
-
-    def apply_consecutive_chunks_parallelization_to_node(self, node_id, parallelizer, fileIdGen, fan_out):
+        self.introduce_aggregators_for_consec_chunks(
+            fileIdGen,
+            in_aggregator_ids,
+            original_cmd_invocation_with_io_vars,
+            out_aggregator_id,
+            parallelizer,
+            streaming_output,
+        )
+
+    def apply_consecutive_chunks_parallelization_to_node(
+        self, node_id, parallelizer, fileIdGen, fan_out
+    ):
         # check whether we can fuse with previous node's parallelization:
         # we can do so if the previous node's parallelization is the same, and the aggregator is concatenation
         # Assumption: it suffices to check that the previous node is an aggregator node of type concatenate
         #  as this is unique for consecutive chunk parallelization (for now, this is true)
         node = self.get_node(node_id)
-        streaming_input, streaming_output, configuration_inputs = \
+        (
+            streaming_input,
+            streaming_output,
+            configuration_inputs,
+        ) = (
             node.get_single_streaming_input_single_output_and_configuration_inputs_of_node_for_parallelization()
+        )
         original_cmd_invocation_with_io_vars = node.cmd_invocation_with_io_vars
 
         can_be_fused_with_prev = False
         prev_nodes = self.get_previous_nodes(node_id)
         if len(prev_nodes) == 1:
-            first_pred_node, first_pred_cmd_inv = \
-                self.get_only_previous_node_and_only_previous_cmd_invocation(prev_nodes)
+            (
+                first_pred_node,
+                first_pred_cmd_inv,
+            ) = self.get_only_previous_node_and_only_previous_cmd_invocation(prev_nodes)
             if first_pred_cmd_inv.is_aggregator_concatenate():
                 can_be_fused_with_prev = True
 
         # remove node to be parallelized
-        self.remove_node(node_id) # remove it here already as as we need to remove edge end points ow. to avoid disconnecting graph to avoid disconnecting graph
+        self.remove_node(
+            node_id
+        )  # remove it here already as as we need to remove edge end points ow. to avoid disconnecting graph to avoid disconnecting graph
 
         if can_be_fused_with_prev:
-            self.remove_node(prev_nodes[0]) # also sets respective edge to's and from's to None
+            self.remove_node(
+                prev_nodes[0]
+            )  # also sets respective edge to's and from's to None
             in_mapper_ids = first_pred_cmd_inv.operand_list
-        else: # cannot be fused so introduce splitter
+        else:  # cannot be fused so introduce splitter
             # splitter
-            consec_chunks_splitter_generator = lambda input_id, output_ids: pash_split.make_split_file(input_id, output_ids)
-            out_split_ids = self.introduce_splitter(consec_chunks_splitter_generator, fan_out, fileIdGen, streaming_input)
+            consec_chunks_splitter_generator = (
+                lambda input_id, output_ids: pash_split.make_split_file(
+                    input_id, output_ids
+                )
+            )
+            out_split_ids = self.introduce_splitter(
+                consec_chunks_splitter_generator, fan_out, fileIdGen, streaming_input
+            )
             in_mapper_ids = out_split_ids
 
         # mappers
-        out_mapper_ids = self.introduce_mappers(fan_out, fileIdGen, in_mapper_ids, original_cmd_invocation_with_io_vars,
-                                                parallelizer)
+        out_mapper_ids = self.introduce_mappers(
+            fan_out,
+            fileIdGen,
+            in_mapper_ids,
+            original_cmd_invocation_with_io_vars,
+            parallelizer,
+        )
 
         # aggregators
         in_aggregator_ids = out_mapper_ids
         out_aggregator_id = streaming_output
-        self.introduce_aggregators_for_consec_chunks(fileIdGen, in_aggregator_ids,
-                                                     original_cmd_invocation_with_io_vars, out_aggregator_id, parallelizer,
-                                                     streaming_output)
+        self.introduce_aggregators_for_consec_chunks(
+            fileIdGen,
+            in_aggregator_ids,
+            original_cmd_invocation_with_io_vars,
+            out_aggregator_id,
+            parallelizer,
+            streaming_output,
+        )
 
     def get_only_previous_node_and_only_previous_cmd_invocation(self, prev_nodes):
-        assert (len(prev_nodes) > 0)
+        assert len(prev_nodes) > 0
         # get info about first one but also ensure that it is the only one if we fuse
         assert len(prev_nodes) == 1
         first_pred_id = prev_nodes[0]
@@ -883,7 +1071,9 @@ def get_only_previous_node_and_only_previous_cmd_invocation(self, prev_nodes):
         first_pred_cmd_inv = first_pred_node.cmd_invocation_with_io_vars
         return first_pred_node, first_pred_cmd_inv
 
-    def introduce_splitter(self, splitter_generator, fan_out, fileIdGen, streaming_input):
+    def introduce_splitter(
+        self, splitter_generator, fan_out, fileIdGen, streaming_input
+    ):
         out_split_ids = self.generate_ephemeral_edges(fileIdGen, fan_out)
         splitter = splitter_generator(streaming_input, out_split_ids)
         self.set_edge_to(streaming_input, splitter.get_id())
@@ -892,23 +1082,38 @@ def introduce_splitter(self, splitter_generator, fan_out, fileIdGen, streaming_i
         self.add_node(splitter)
         return out_split_ids
 
-    def introduce_mappers(self, fan_out, fileIdGen, in_mapper_ids, original_cmd_invocation_with_io_vars, parallelizer):
+    def introduce_mappers(
+        self,
+        fan_out,
+        fileIdGen,
+        in_mapper_ids,
+        original_cmd_invocation_with_io_vars,
+        parallelizer,
+    ):
         # -> [[input, aux1, aux2], [...], [...], ...]
         num_aux_mapper_to_aggregator = parallelizer.info_mapper_aggregator
         out_mapper_ids = []
-        for _ in range(0,fan_out):
-            out_mapper_ids.append(self.generate_ephemeral_edges(fileIdGen, num_aux_mapper_to_aggregator+1))
+        for _ in range(0, fan_out):
+            out_mapper_ids.append(
+                self.generate_ephemeral_edges(
+                    fileIdGen, num_aux_mapper_to_aggregator + 1
+                )
+            )
         # TODO: Fix that we use different ones here!
         # list of output, aux_output_1, aux_output_2, ...
         zip_mapper_in_out_ids = zip(in_mapper_ids, out_mapper_ids)
         all_mappers = []
-        for (in_id, out_ids) in zip_mapper_in_out_ids:
+        for in_id, out_ids in zip_mapper_in_out_ids:
             # BEGIN: these 4 lines could be refactored to be a function in graph such that
             # creating end point of edges and the creation of edges is not decoupled
             out_id = out_ids[0]
             aux_out_ids = out_ids[1:]
-            mapper_cmd_inv = parallelizer.get_actual_mapper(original_cmd_invocation_with_io_vars, in_id, out_id, aux_out_ids)
-            mapper = DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(mapper_cmd_inv)
+            mapper_cmd_inv = parallelizer.get_actual_mapper(
+                original_cmd_invocation_with_io_vars, in_id, out_id, aux_out_ids
+            )
+            mapper = DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(
+                mapper_cmd_inv
+            )
             self.set_edge_to(in_id, mapper.get_id())
             self.set_edge_from(out_id, mapper.get_id())
             for aux_out_id in aux_out_ids:
@@ -926,27 +1131,49 @@ def introduce_mappers(self, fan_out, fileIdGen, in_mapper_ids, original_cmd_invo
         return out_mapper_ids
 
     def introduce_unwraps(self, fileIdGen, in_unwrap_ids):
-        unwrap_to_commutative_mappers_ids = self.generate_ephemeral_edges(fileIdGen, len(in_unwrap_ids))
+        unwrap_to_commutative_mappers_ids = self.generate_ephemeral_edges(
+            fileIdGen, len(in_unwrap_ids)
+        )
         in_out_unwrap_ids = zip(in_unwrap_ids, unwrap_to_commutative_mappers_ids)
         for in_unwrap, out_unwrap in in_out_unwrap_ids:
             unwrap = r_unwrap.make_unwrap_node([in_unwrap], out_unwrap)
             self.add_node(unwrap)
-            self.set_edge_to(in_unwrap, unwrap.get_id())  # from are still (wrapped) mappers
-            self.set_edge_from(out_unwrap, unwrap.get_id())  # to will be set to mappers of current node
+            self.set_edge_to(
+                in_unwrap, unwrap.get_id()
+            )  # from are still (wrapped) mappers
+            self.set_edge_from(
+                out_unwrap, unwrap.get_id()
+            )  # to will be set to mappers of current node
         in_mapper_ids = unwrap_to_commutative_mappers_ids
         return in_mapper_ids
 
-    def introduce_aggregators_for_consec_chunks(self, fileIdGen, in_aggregator_ids,
-                                                original_cmd_invocation_with_io_vars, out_aggregator_id, parallelizer,
-                                                streaming_output):
+    def introduce_aggregators_for_consec_chunks(
+        self,
+        fileIdGen,
+        in_aggregator_ids,
+        original_cmd_invocation_with_io_vars,
+        out_aggregator_id,
+        parallelizer,
+        streaming_output,
+    ):
         # in_aggregator_ids: [[input, aux1, aux2, ...], [...], [...], ...]
         if parallelizer.info_mapper_aggregator == 0:
-            in_aggregator_ids = [in_ids[0] for in_ids in in_aggregator_ids]  # since we get list of list back for potential aux info
+            in_aggregator_ids = [
+                in_ids[0] for in_ids in in_aggregator_ids
+            ]  # since we get list of list back for potential aux info
             aggregator_spec = parallelizer.get_aggregator_spec()
-            if aggregator_spec.is_aggregator_spec_concatenate() or aggregator_spec.is_aggregator_spec_custom_n_ary():
-                aggregator_cmd_inv = parallelizer.get_actual_aggregator(original_cmd_invocation_with_io_vars,
-                                                                        in_aggregator_ids, out_aggregator_id)
-                aggregator = DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(aggregator_cmd_inv)
+            if (
+                aggregator_spec.is_aggregator_spec_concatenate()
+                or aggregator_spec.is_aggregator_spec_custom_n_ary()
+            ):
+                aggregator_cmd_inv = parallelizer.get_actual_aggregator(
+                    original_cmd_invocation_with_io_vars,
+                    in_aggregator_ids,
+                    out_aggregator_id,
+                )
+                aggregator = DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(
+                    aggregator_cmd_inv
+                )
                 for in_aggregator_id in in_aggregator_ids:
                     self.set_edge_to(in_aggregator_id, aggregator.get_id())
                 self.set_edge_from(streaming_output, aggregator.get_id())
@@ -958,17 +1185,29 @@ def introduce_aggregators_for_consec_chunks(self, fileIdGen, in_aggregator_ids,
                 # TODO: we simplify and assume that every mapper produces a single output for now
                 map_in_aggregator_ids = [[id] for id in in_aggregator_ids]
                 # TODO: turn node into cmd_invocation_with_io_vars since this is the only thing required in this function
-                self.create_generic_aggregator_tree(original_cmd_invocation_with_io_vars, parallelizer, map_in_aggregator_ids, out_aggregator_id, fileIdGen)
+                self.create_generic_aggregator_tree(
+                    original_cmd_invocation_with_io_vars,
+                    parallelizer,
+                    map_in_aggregator_ids,
+                    out_aggregator_id,
+                    fileIdGen,
+                )
             else:
-                raise Exception("aggregator kind not yet implemented")
-        else: # we got auxiliary information
-            assert(parallelizer.core_aggregator_spec.is_aggregator_spec_custom_2_ary())
+                raise UnparallelizableError("aggregator kind not yet implemented for command: {}".format(original_cmd_invocation_with_io_vars.cmd_name))
+        else:  # we got auxiliary information
+            assert parallelizer.core_aggregator_spec.is_aggregator_spec_custom_2_ary()
             map_in_aggregator_ids = in_aggregator_ids
-            self.create_generic_aggregator_tree(original_cmd_invocation_with_io_vars, parallelizer,
-                                                map_in_aggregator_ids, out_aggregator_id, fileIdGen)
-
-
-    def introduce_aggregator_for_round_robin(self, out_mapper_ids, parallelizer, streaming_output):
+            self.create_generic_aggregator_tree(
+                original_cmd_invocation_with_io_vars,
+                parallelizer,
+                map_in_aggregator_ids,
+                out_aggregator_id,
+                fileIdGen,
+            )
+
+    def introduce_aggregator_for_round_robin(
+        self, out_mapper_ids, parallelizer, streaming_output
+    ):
         aggregator_spec = parallelizer.get_aggregator_spec()
         if aggregator_spec.is_aggregator_spec_concatenate():
             in_aggregator_ids = out_mapper_ids
@@ -985,13 +1224,10 @@ def introduce_aggregator_for_round_robin(self, out_mapper_ids, parallelizer, str
             # TODO: this is where the other cases for aggregators need to be added
             pass
 
-
-
-
     ## Replicates an edge using tee and returns the new node_id.
     def tee_edge(self, edge_id, times, fileIdGen):
         ## Assert that the edge is unplugged
-        assert(self.edges[edge_id][2] is None)
+        assert self.edges[edge_id][2] is None
 
         output_fids = [fileIdGen.next_ephemeral_file_id() for _ in range(times)]
         output_ids = [fid.get_ident() for fid in output_fids]
@@ -1005,9 +1241,9 @@ def tee_edge(self, edge_id, times, fileIdGen):
             self.add_from_edge(new_node_id, edge_fid)
         self.add_node(new_node)
         self.set_edge_to(edge_id, new_node_id)
-        
+
         return new_node_id
-        
+
     def generate_graphviz(self):
         ## TODO: It is unclear if importing in here (instead of in general)
         ##       improves startup cost of the pash_runtime when not using graphviz.
@@ -1022,7 +1258,7 @@ def generate_graphviz(self):
             dot = node.add_dot_node(dot, node_id)
 
         ## (I/O) File nodes should be boxes
-        dot.attr('node', shape='box')
+        dot.attr("node", shape="box")
 
         ## Then generate all edges and input+output files
         for fid, from_node, to_node in self.edges.values():
@@ -1032,7 +1268,7 @@ def generate_graphviz(self):
                 ## TODO: We should investigate why this happens
                 if fid.has_file_resource():
                     label = fid.serialize()
-                    node_id = f'file-{str(fid.get_ident())}'
+                    node_id = f"file-{str(fid.get_ident())}"
                     dot.node(node_id, label)
 
                     if from_node is None:
@@ -1049,27 +1285,51 @@ def generate_graphviz(self):
     def edge_node_consistency(self):
         ## Check if edges and nodes are consistent
         for edge_id, (_, from_node_id, to_node_id) in self.edges.items():
-            if (not from_node_id is None):
+            if not from_node_id is None:
                 from_node = self.get_node(from_node_id)
-                if(not (edge_id in from_node.get_output_list())):
-                    log("Consistency Error: Edge id:", edge_id, "is not in the node outputs:", from_node)
+                if not (edge_id in from_node.get_output_list()):
+                    log(
+                        "Consistency Error: Edge id:",
+                        edge_id,
+                        "is not in the node outputs:",
+                        from_node,
+                    )
                     return False
-            if (not to_node_id is None):
+            if not to_node_id is None:
                 to_node = self.get_node(to_node_id)
-                if(not (edge_id in to_node.get_input_list())):
-                    log("Consistency Error: Edge id:", edge_id, "is not in the node inputs:", to_node)
+                if not (edge_id in to_node.get_input_list()):
+                    log(
+                        "Consistency Error: Edge id:",
+                        edge_id,
+                        "is not in the node inputs:",
+                        to_node,
+                    )
                     return False
 
         for node_id, node in self.nodes.items():
             for edge_id in node.get_input_list():
                 _, _, to_node_id = self.edges[edge_id]
-                if(not (to_node_id == node_id)):
-                    log("Consistency Error: The to_node_id of the input_edge:", edge_id, "of the node:", node, "is equal to:", to_node_id)
+                if not (to_node_id == node_id):
+                    log(
+                        "Consistency Error: The to_node_id of the input_edge:",
+                        edge_id,
+                        "of the node:",
+                        node,
+                        "is equal to:",
+                        to_node_id,
+                    )
                     return False
             for edge_id in node.get_output_list():
                 _, from_node_id, _ = self.edges[edge_id]
-                if(not (from_node_id == node_id)):
-                    log("Consistency Error: The from_node_id of the output_edge:", edge_id, "of the node:", node, "is equal to:", from_node_id)
+                if not (from_node_id == node_id):
+                    log(
+                        "Consistency Error: The from_node_id of the output_edge:",
+                        edge_id,
+                        "of the node:",
+                        node,
+                        "is equal to:",
+                        from_node_id,
+                    )
                     return False
 
         return True
@@ -1078,42 +1338,65 @@ def edge_node_consistency(self):
     ## has at least one node, and stdin, stdout set to some non-null
     ## file identifiers.
     def valid(self):
-        return (len(self.nodes) > 0 and
-                self.edge_node_consistency() and
-                (not self.is_in_background()
-                  or (self.get_stdin() is None)))
-                ## The following is not true. Background IRs should not have stdin, but they can have stdout.
-                #   and self.get_stdout() is None)))
-                ## The following is not true. A DFG might not have an stdin
-                #  or (not self.is_in_background()
-                #      and not self.get_stdin() is None 
-                #      and not self.get_stdout() is None)))
+        return (
+            len(self.nodes) > 0
+            and self.edge_node_consistency()
+            and (not self.is_in_background() or (self.get_stdin() is None))
+        )
+        ## The following is not true. Background IRs should not have stdin, but they can have stdout.
+        #   and self.get_stdout() is None)))
+        ## The following is not true. A DFG might not have an stdin
+        #  or (not self.is_in_background()
+        #      and not self.get_stdin() is None
+        #      and not self.get_stdout() is None)))
 
     ## This is a function that creates a reduce tree for a given node
-    def create_generic_aggregator_tree(self, cmd_invocation_with_io_vars, parallelizer, input_ids_for_aggregators, out_aggregator_id, fileIdGen):
+    def create_generic_aggregator_tree(
+        self,
+        cmd_invocation_with_io_vars,
+        parallelizer,
+        input_ids_for_aggregators,
+        out_aggregator_id,
+        fileIdGen,
+    ):
         def function_to_get_binary_aggregator(in_ids, out_ids):
             if len(out_ids) == 1:
-                aggregator_cmd_inv = parallelizer.get_actual_aggregator(cmd_invocation_with_io_vars, in_ids, out_ids[0])
-                aggregator = DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(aggregator_cmd_inv)
+                aggregator_cmd_inv = parallelizer.get_actual_aggregator(
+                    cmd_invocation_with_io_vars, in_ids, out_ids[0]
+                )
+                aggregator = DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(
+                    aggregator_cmd_inv
+                )
                 return aggregator
             else:
                 # list has been flattened ...
                 num_input_ids = len(in_ids)
-                assert(num_input_ids % 2 == 0)
+                assert num_input_ids % 2 == 0
                 fst_normal_input = in_ids[0]
-                fst_aux_inputs_from = in_ids[1:int(num_input_ids/2)]
-                snd_normal_input = in_ids[int(num_input_ids/2)]
-                snd_aux_inputs_from = in_ids[int(num_input_ids/2)+1:]
+                fst_aux_inputs_from = in_ids[1 : int(num_input_ids / 2)]
+                snd_normal_input = in_ids[int(num_input_ids / 2)]
+                snd_aux_inputs_from = in_ids[int(num_input_ids / 2) + 1 :]
                 output_to = out_ids[0]
                 aux_outputs_to = out_ids[1:]
                 aggregator_cmd_inv = parallelizer.get_actual_2_ary_aggregator_with_aux(
-                    fst_normal_input, fst_aux_inputs_from, snd_normal_input, snd_aux_inputs_from,
-                    output_to, aux_outputs_to)
-                aggregator = DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(aggregator_cmd_inv)
+                    fst_normal_input,
+                    fst_aux_inputs_from,
+                    snd_normal_input,
+                    snd_aux_inputs_from,
+                    output_to,
+                    aux_outputs_to,
+                )
+                aggregator = DFGNode.make_simple_dfg_node_from_cmd_inv_with_io_vars(
+                    aggregator_cmd_inv
+                )
                 return aggregator
+
         ## The Aggregator node takes a sequence of input ids and an output id
-        all_aggregators, new_edges, final_output_id = self.create_reduce_tree(lambda in_ids, out_ids: function_to_get_binary_aggregator(in_ids, out_ids),
-                                    input_ids_for_aggregators, fileIdGen)
+        all_aggregators, new_edges, final_output_id = self.create_reduce_tree(
+            lambda in_ids, out_ids: function_to_get_binary_aggregator(in_ids, out_ids),
+            input_ids_for_aggregators,
+            fileIdGen,
+        )
         ## Add the edges in the graph
         self.add_edges(new_edges)
         ## Add the merge commands in the graph
@@ -1135,8 +1418,10 @@ def create_reduce_tree(self, init_func, input_ids, fileIdGen):
         tree = []
         new_edges = []
         curr_ids = input_ids
-        while(len(curr_ids) > 1):
-            new_level, curr_ids, new_fids = self.create_reduce_tree_level(init_func, curr_ids, fileIdGen)
+        while len(curr_ids) > 1:
+            new_level, curr_ids, new_fids = self.create_reduce_tree_level(
+                init_func, curr_ids, fileIdGen
+            )
             tree += new_level
             new_edges += new_fids
 
@@ -1145,15 +1430,21 @@ def create_reduce_tree(self, init_func, input_ids, fileIdGen):
 
         ## Drain the final auxiliary outputs
         final_auxiliary_outputs = curr_ids[0][1:]
-        drain_fids = [fileIdGen.next_file_id()
-                      for final_auxiliary_output in final_auxiliary_outputs]
+        drain_fids = [
+            fileIdGen.next_file_id()
+            for final_auxiliary_output in final_auxiliary_outputs
+        ]
         for drain_fid in drain_fids:
-            drain_fid.set_resource(FileResource(Arg.string_to_arg('/dev/null')))
+            drain_fid.set_resource(FileResource(Arg.string_to_arg("/dev/null")))
             new_edges.append(drain_fid)
         drain_ids = [fid.get_ident() for fid in drain_fids]
 
-        drain_cat_commands = [make_cat_node([final_auxiliary_output], drain_id)
-                              for final_auxiliary_output, drain_id in zip(final_auxiliary_outputs, drain_ids)]
+        drain_cat_commands = [
+            make_cat_node([final_auxiliary_output], drain_id)
+            for final_auxiliary_output, drain_id in zip(
+                final_auxiliary_outputs, drain_ids
+            )
+        ]
         return (tree + drain_cat_commands), new_edges, final_output_id
 
     @staticmethod
@@ -1161,7 +1452,7 @@ def create_reduce_tree(self, init_func, input_ids, fileIdGen):
     ## output file ids must be lists of lists, as the input file ids and
     ## the output file ids might contain auxiliary files.
     def create_reduce_tree_level(init_func, input_ids, fileIdGen):
-        if(len(input_ids) % 2 == 0):
+        if len(input_ids) % 2 == 0:
             output_ids = []
             even_input_ids = input_ids
         else:
@@ -1175,7 +1466,9 @@ def create_reduce_tree_level(init_func, input_ids, fileIdGen):
             new_fids += new_out_fids
             new_out_ids = [fid.get_ident() for fid in new_out_fids]
             output_ids.append(new_out_ids)
-            new_node = IR.create_reduce_node(init_func, even_input_ids[i:i+2], new_out_ids)
+            new_node = IR.create_reduce_node(
+                init_func, even_input_ids[i : i + 2], new_out_ids
+            )
             level.append(new_node)
         return (level, output_ids, new_fids)
 
@@ -1183,6 +1476,5 @@ def create_reduce_tree_level(init_func, input_ids, fileIdGen):
     ## This function creates one node of the reduce tree
     def create_reduce_node(init_func, input_ids, output_ids):
         return init_func(flatten_list(input_ids), output_ids)
-    # TODO: this is where we need to use our aggregator spec/node
-
 
+    # TODO: this is where we need to use our aggregator spec/node
diff --git a/compiler/ir_to_ast.py b/compiler/ir_to_ast.py
index 033eb34d6..1e6277853 100644
--- a/compiler/ir_to_ast.py
+++ b/compiler/ir_to_ast.py
@@ -6,8 +6,9 @@
 from parse import from_ast_objects_to_shell
 import config
 
-RM_PASH_FIFOS_NAME="rm_pash_fifos"
-MKFIFO_PASH_FIFOS_NAME="mkfifo_pash_fifos"
+RM_PASH_FIFOS_NAME = "rm_pash_fifos"
+MKFIFO_PASH_FIFOS_NAME = "mkfifo_pash_fifos"
+
 
 def to_shell(ir, args):
     backend_start_time = datetime.now()
@@ -27,9 +28,9 @@ def to_shell(ir, args):
 def ir2ast(ir, args):
     clean_up_graph = False
     drain_streams = False
-    if(args.termination == "clean_up_graph"):
+    if args.termination == "clean_up_graph":
         clean_up_graph = True
-    elif(args.termination == "drain_stream"):
+    elif args.termination == "drain_stream":
         drain_streams = True
 
     ## NOTE: We first need to make the main body because it might create additional ephemeral fids.
@@ -52,8 +53,7 @@ def ir2ast(ir, args):
 
     # log("All fids:", all_fids)
     ## Find all the ephemeral fids and turn them to ASTs
-    ephemeral_fids = [fid for fid in all_fids
-                      if fid.is_ephemeral()]
+    ephemeral_fids = [fid for fid in all_fids if fid.is_ephemeral()]
 
     # log("Ephemeral fids:", ephemeral_fids)
 
@@ -67,6 +67,7 @@ def ir2ast(ir, args):
 
     return final_asts
 
+
 def make_rms_f_prologue_epilogue(ephemeral_fids):
     asts = []
     ## Create an `rm -f` for each ephemeral fid
@@ -76,6 +77,7 @@ def make_rms_f_prologue_epilogue(ephemeral_fids):
         asts.append(command)
     return asts
 
+
 def make_ir_prologue(ephemeral_fids) -> "list[AstNode]":
     asts = []
     ## Create an `rm -f` for each ephemeral fid
@@ -89,7 +91,7 @@ def make_ir_prologue(ephemeral_fids) -> "list[AstNode]":
         args = [eph_fid.to_ast()]
         command = make_mkfifo_ast(args)
         mkfifo_asts.append(command)
-    
+
     defun_mkfifos = make_defun(MKFIFO_PASH_FIFOS_NAME, make_semi_sequence(mkfifo_asts))
     asts.append(defun_mkfifos)
 
@@ -102,14 +104,20 @@ def make_ir_prologue(ephemeral_fids) -> "list[AstNode]":
     class_asts = [to_ast_node(ast) for ast in asts]
     return class_asts
 
+
 def make_ir_epilogue(ephemeral_fids, clean_up_graph, log_file) -> "list[AstNode]":
     asts = []
-    if (clean_up_graph):
+    if clean_up_graph:
         ## TODO: Wait for all output nodes not just one
-        pids = [[standard_var_ast('!')]]
-        clean_up_path_script = os.path.join(config.PASH_TOP, config.config['runtime']['clean_up_graph_binary'])
-        com_args = [string_to_argument('source'), string_to_argument(clean_up_path_script)] + pids
-        if (log_file == ""):
+        pids = [[standard_var_ast("!")]]
+        clean_up_path_script = os.path.join(
+            config.PASH_TOP, config.config["runtime"]["clean_up_graph_binary"]
+        )
+        com_args = [
+            string_to_argument("source"),
+            string_to_argument(clean_up_path_script),
+        ] + pids
+        if log_file == "":
             com = make_command(com_args)
         else:
             redirection = redir_append_stderr_to_string_file(log_file)
@@ -117,7 +125,7 @@ def make_ir_epilogue(ephemeral_fids, clean_up_graph, log_file) -> "list[AstNode]
         asts.append(com)
     else:
         ## Otherwise we just wait for all processes to die.
-        wait_com = make_command([string_to_argument('wait')])
+        wait_com = make_command([string_to_argument("wait")])
         exit_status = make_command([string_to_argument("internal_exec_status=$?")])
         asts.extend([wait_com, exit_status])
 
@@ -125,25 +133,28 @@ def make_ir_epilogue(ephemeral_fids, clean_up_graph, log_file) -> "list[AstNode]
     call_rm_pash_funs = make_command([string_to_argument(RM_PASH_FIFOS_NAME)])
     asts.append(call_rm_pash_funs)
 
-    ## Make the following command: 
+    ## Make the following command:
     #    (exit $internal_exec_status)
     exit_ec_ast = make_exit_ec_ast()
     asts.append(exit_ec_ast)
-    
+
     class_asts = [to_ast_node(ast) for ast in asts]
     return class_asts
 
+
 def make_exit_ec_ast():
-    command = make_command([string_to_argument("exit"), 
-                            [make_quoted_variable("internal_exec_status")]])
+    command = make_command(
+        [string_to_argument("exit"), [make_quoted_variable("internal_exec_status")]]
+    )
     ast = make_subshell(command)
     return ast
-    
+
 
 def make_rm_f_ast(arguments):
     all_args = [string_to_argument("rm"), string_to_argument("-f")] + arguments
     return make_command(all_args)
 
+
 def make_mkfifo_ast(arguments):
     all_args = [string_to_argument("mkfifo")] + arguments
     return make_command(all_args)
diff --git a/compiler/orchestrator_runtime/pash_init_setup.sh b/compiler/orchestrator_runtime/pash_init_setup.sh
index 06e953481..966474a5c 100644
--- a/compiler/orchestrator_runtime/pash_init_setup.sh
+++ b/compiler/orchestrator_runtime/pash_init_setup.sh
@@ -13,11 +13,12 @@ export pash_output_time_flag=1
 export pash_execute_flag=1
 export pash_dry_run_compiler_flag=0
 export pash_assert_compiler_success_flag=0
+export pash_assert_all_regions_parallelizable_flag=0
 export pash_checking_log_file=0
 export pash_checking_debug_level=0
 export pash_avoid_pash_runtime_completion_flag=0
 export pash_profile_driven_flag=1
-export pash_parallel_pipelines=0
+export pash_no_parallel_pipelines=0
 export pash_daemon_communicates_through_unix_pipes_flag=0
 export pash_speculative_flag=0
 export show_version=0
@@ -51,6 +52,10 @@ do
         export pash_assert_compiler_success_flag=1
     fi
 
+    if [ "--assert_all_regions_parallelizable" == "$item" ]; then
+        export pash_assert_all_regions_parallelizable_flag=1
+    fi
+
     if [ "--log_file" == "$item" ]; then
         pash_checking_log_file=1
     fi
@@ -67,8 +72,8 @@ do
         pash_checking_debug_level=1
     fi
 
-    if [ "--parallel_pipelines" == "$item" ]; then
-        export pash_parallel_pipelines=1
+    if [ "--no_parallel_pipelines" == "$item" ]; then
+        export pash_no_parallel_pipelines=1
     fi
 
     if [ "--daemon_communicates_through_unix_pipes" == "$item" ]; then
diff --git a/compiler/orchestrator_runtime/pash_prepare_call_compiler.sh b/compiler/orchestrator_runtime/pash_prepare_call_compiler.sh
index c05faf681..e6294ec9e 100644
--- a/compiler/orchestrator_runtime/pash_prepare_call_compiler.sh
+++ b/compiler/orchestrator_runtime/pash_prepare_call_compiler.sh
@@ -33,6 +33,12 @@ pash_redir_output echo "$$: (2) Before asking the daemon for compilation..."
 msg="Compile:${pash_compiled_script_file}| Variable File:${pash_runtime_shell_variables_file}| Input IR File:${pash_input_ir_file}"
 daemon_response=$(pash_communicate_daemon "$msg") # Blocking step, daemon will not send response until it's safe to continue
 
+if [[ "$daemon_response" == *"not all regions are parallelizable"* ]]; then
+    pash_all_region_parallelizable=1 
+else 
+    pash_all_region_parallelizable=0 
+fi
+
 if [[ "$daemon_response" == *"OK:"* ]]; then
     pash_runtime_return_code=0
 elif [ -z "$daemon_response" ]; then
@@ -43,6 +49,9 @@ else
     pash_runtime_return_code=1
 fi
 
+# save IFS to restore after field splitting
+[ -n "${IFS+x}" ] && saved_IFS=$IFS
+unset IFS
 # Get assigned process id
 # We need to split the daemon response into elements of an array by
 # shell's field splitting.
@@ -50,8 +59,23 @@ fi
 response_args=($daemon_response)
 process_id=${response_args[1]}
 
+[ -n "${saved_IFS+x}" ] && IFS="$saved_IFS"
+
 pash_redir_output echo "$$: (2) Compiler exited with code: $pash_runtime_return_code"
-if [ "$pash_runtime_return_code" -ne 0 ] && [ "$pash_assert_compiler_success_flag" -eq 1 ]; then
+
+## only when --assert_all_regions_parallellizable is used do we care about all regions being parallelizable
+if [ "$pash_all_region_parallelizable" -ne 0 ] && [ "$pash_assert_all_regions_parallelizable_flag" -eq 1 ]; then
+    pash_redir_output echo "$$: ERROR: (2) Compiler failed with error code because some regions were not parallelizable: $pash_all_region_parallelizable while assert_all_regions_parallelizable_flag was enabled! Exiting PaSh..."
+    exit 1
+fi
+
+if [ "$pash_runtime_return_code" -ne 0 ] && [ "$pash_assert_all_regions_parallelizable_flag" -eq 1 ]; then
+    pash_redir_output echo "$$: ERROR: (2) Compiler failed with error code: $pash_runtime_return_code while assert_all_regions_parallelizable_flag was enabled! Exiting PaSh..."
+    exit 1
+fi
+
+## for pash_assert_compiler_success_flag, exit when return code is 0 (general exception caught) and not when all regions are parallelizable
+if [ "$pash_runtime_return_code" -ne 0 ] && [ "$pash_all_region_parallelizable" -eq 0 ] && [ "$pash_assert_compiler_success_flag" -eq 1 ]; then
     pash_redir_output echo "$$: ERROR: (2) Compiler failed with error code: $pash_runtime_return_code while assert_compiler_success was enabled! Exiting PaSh..."
     exit 1
 fi
diff --git a/compiler/orchestrator_runtime/speculative/pash_spec_init_setup.sh b/compiler/orchestrator_runtime/speculative/pash_spec_init_setup.sh
index fe193a69d..0f692e960 100644
--- a/compiler/orchestrator_runtime/speculative/pash_spec_init_setup.sh
+++ b/compiler/orchestrator_runtime/speculative/pash_spec_init_setup.sh
@@ -2,6 +2,8 @@
 
 source "$PASH_TOP/compiler/orchestrator_runtime/pash_orch_lib.sh"
 
+export PASH_SPEC_NODE_DIRECTORY="${PASH_TMP_PREFIX}/speculative/partial_order/"
+
 pash_spec_communicate_scheduler()
 {
     local message=$1
diff --git a/compiler/orchestrator_runtime/speculative/speculative_runtime.sh b/compiler/orchestrator_runtime/speculative/speculative_runtime.sh
index b9a188484..e8f2a55f4 100644
--- a/compiler/orchestrator_runtime/speculative/speculative_runtime.sh
+++ b/compiler/orchestrator_runtime/speculative/speculative_runtime.sh
@@ -26,12 +26,33 @@ daemon_response=$(pash_spec_communicate_scheduler "$msg") # Blocking step, daemo
 
 ## Receive an exit code
 if [[ "$daemon_response" == *"OK:"* ]]; then
+    # save IFS to restore after field splitting
+    [ -n "${IFS+set}" ] && saved_IFS=$IFS
+    unset IFS
     # shellcheck disable=SC2206
     response_args=($daemon_response)
+    [ -n "${saved_IFS+set}" ] && IFS=$saved_IFS
     pash_redir_output echo "$$: (2) Scheduler responded: $daemon_response"
     cmd_exit_code=${response_args[1]}
     output_variable_file=${response_args[2]}
     stdout_file=${response_args[3]}
+
+    ## TODO: Restore the variables (doesn't work currently because variables are printed using `env`)
+    pash_redir_output echo "$$: (2) Recovering script variables from: $output_variable_file"
+    # source "$RUNTIME_DIR/pash_source_declare_vars.sh" "$output_variable_file"
+
+    pash_redir_output echo "$$: (2) Recovering stdout from: $stdout_file"
+    cat "${stdout_file}"
+elif [[ "$daemon_response" == *"UNSAFE:"* ]]; then
+    pash_redir_output echo "$$: (2) Scheduler responded: $daemon_response"
+    pash_redir_output echo "$$: (2) Executing command: $pash_speculative_command_id"
+    ## Execute the command.
+    ## KK 2023-06-01 Does `eval` work in general? We need to be precise
+    ##               about which commands are unsafe to determine how to execute them.
+    cmd=$(cat "$PASH_SPEC_NODE_DIRECTORY/$pash_speculative_command_id")
+    ## Word splitting isn't needed since eval combines all the arguments into a single string
+    eval "$cmd"
+    cmd_exit_code=$?
 elif [ -z "$daemon_response" ]; then
     ## Trouble... Daemon crashed, rip
     pash_redir_output echo "$$: ERROR: (2) Scheduler crashed!"
@@ -47,11 +68,5 @@ pash_redir_output echo "$$: (2) Scheduler returned exit code: ${cmd_exit_code} f
 
 pash_runtime_final_status=${cmd_exit_code}
 
-## TODO: Restore the variables (doesn't work currently because variables are printed using `env`)
-pash_redir_output echo "$$: (2) Recovering script variables from: $output_variable_file"
-# source "$RUNTIME_DIR/pash_source_declare_vars.sh" "$output_variable_file"
-
-pash_redir_output echo "$$: (2) Recovering stdout from: $stdout_file"
-cat "${stdout_file}"
 
 ## TODO: Also need to use wrap_vars maybe to `set` properly etc
diff --git a/compiler/parse.py b/compiler/parse.py
index 3d2bfc01b..4b680212f 100644
--- a/compiler/parse.py
+++ b/compiler/parse.py
@@ -12,41 +12,55 @@
 
 import libdash.parser
 
+
 ## Parses straight a shell script to an AST
 ## through python without calling it as an executable
+INITIALIZE_LIBDASH = True
 def parse_shell_to_asts(input_script_path):
+    global INITIALIZE_LIBDASH
     try:
-        new_ast_objects = libdash.parser.parse(input_script_path)
-
+        new_ast_objects = libdash.parser.parse(input_script_path, INITIALIZE_LIBDASH)
+        INITIALIZE_LIBDASH = False
         ## Transform the untyped ast objects to typed ones
         typed_ast_objects = []
-        for untyped_ast, original_text, linno_before, linno_after, in new_ast_objects:
-             typed_ast = to_ast_node(untyped_ast)
-             typed_ast_objects.append((typed_ast, original_text, linno_before, linno_after))
+        for (
+            untyped_ast,
+            original_text,
+            linno_before,
+            linno_after,
+        ) in new_ast_objects:
+            typed_ast = to_ast_node(untyped_ast)
+            typed_ast_objects.append(
+                (typed_ast, original_text, linno_before, linno_after)
+            )
 
         return typed_ast_objects
     except libdash.parser.ParsingException as e:
         log("Parsing error!", e)
         sys.exit(1)
 
+
 def parse_shell_to_asts_interactive(input_script_path: str):
     return libdash.parser.parse(input_script_path)
 
+
 def from_ast_objects_to_shell(asts):
     shell_list = []
     for ast in asts:
         # log("Ast:", ast)
-        if(isinstance(ast, UnparsedScript)):
+        if isinstance(ast, UnparsedScript):
             shell_list.append(ast.text)
         else:
             shell_list.append(ast.pretty())
     return "\n".join(shell_list) + "\n"
 
+
 def from_ast_objects_to_shell_file(asts, new_shell_filename):
     script = from_ast_objects_to_shell(asts)
-    with open(new_shell_filename, 'w') as new_shell_file:
+    with open(new_shell_filename, "w") as new_shell_file:
         new_shell_file.write(script)
 
+
 ## Simply wraps the string_of_arg
 def pash_string_of_arg(arg, quoted=False):
     return string_of_arg(arg, quoted)
diff --git a/compiler/pash.py b/compiler/pash.py
index c8fee1391..6554bcc1b 100755
--- a/compiler/pash.py
+++ b/compiler/pash.py
@@ -1,111 +1,73 @@
 import sys
 import os
 import subprocess
-import argparse
-from datetime import datetime
-
-from shell_ast import ast_to_ast
 
 from ir import *
-from parse import parse_shell_to_asts_interactive
 from pash_graphviz import maybe_init_graphviz_dir
 from preprocessor.preprocessor import preprocess
 from speculative import util_spec
 from util import *
 import config
-import shutil
+from cli import RunnerParser
 
 LOGGING_PREFIX = "PaSh: "
 
+
 @logging_prefix(LOGGING_PREFIX)
 def main():
     ## Parse arguments
     args, shell_name = parse_args()
     ## If it is interactive we need a different execution mode
     ##
-    ## The user can also ask for an interactive mode irregardless of whether pash was invoked in interactive mode. 
-    if(len(args.input) == 0 or args.interactive):
+    ## The user can also ask for an interactive mode irregardless of whether pash was invoked in interactive mode.
+    if len(args.input) == 0 or args.interactive:
         log("ERROR: --interactive option is not supported!", level=0)
-        assert(False)
+        assert False
     else:
         input_script_path = args.input[0]
         input_script_arguments = args.input[1:]
 
         ## Preprocess and execute the parsed ASTs
-        return_code = preprocess_and_execute_asts(input_script_path, args, input_script_arguments, shell_name)
-        
-        log("-" * 40) #log end marker
+        return_code = preprocess_and_execute_asts(
+            input_script_path, args, input_script_arguments, shell_name
+        )
+
+        log("-" * 40)  # log end marker
         ## Return the exit code of the executed script
         sys.exit(return_code)
 
-def preprocess_and_execute_asts(input_script_path, args, input_script_arguments, shell_name):
+
+def preprocess_and_execute_asts(
+    input_script_path, args, input_script_arguments, shell_name
+):
     preprocessed_shell_script = preprocess(input_script_path, args)
-    if(args.output_preprocessed):
+    if args.output_preprocessed:
         log("Preprocessed script:")
         log(preprocessed_shell_script)
-    
+
     ## Write the new shell script to a file to execute
     fname = ptempfile()
     log("Preprocessed script stored in:", fname)
-    with open(fname, 'w') as new_shell_file:
+    with open(fname, "w") as new_shell_file:
         new_shell_file.write(preprocessed_shell_script)
 
-
     ## 4. Execute the preprocessed version of the input script
-    if(not args.preprocess_only):
-        return_code = execute_script(fname, args.command, input_script_arguments, shell_name)
+    if not args.preprocess_only:
+        return_code = execute_script(
+            fname, args.command, input_script_arguments, shell_name
+        )
     else:
         return_code = 0
 
     return return_code
 
 
-
 def parse_args():
     prog_name = sys.argv[0]
-    if 'PASH_FROM_SH' in os.environ:
-        prog_name = os.environ['PASH_FROM_SH']
+    if "PASH_FROM_SH" in os.environ:
+        prog_name = os.environ["PASH_FROM_SH"]
     ## We need to set `+` as a prefix char too
-    parser = argparse.ArgumentParser(prog_name, prefix_chars='-+')
-    parser.add_argument("input", nargs='*', help="the script to be compiled and executed (followed by any command-line arguments")
-    parser.add_argument("--preprocess_only",
-                        help="only preprocess the input script and not execute it",
-                        action="store_true")
-    parser.add_argument("--output_preprocessed",
-                        help=" output the preprocessed script",
-                        action="store_true")
-    parser.add_argument("--interactive",
-                        help="Executes the script using an interactive internal shell session (experimental)",
-                        action="store_true")
-    parser.add_argument("-c", "--command",
-                        help="Evaluate the following as a script, rather than a file",
-                        default=None)
-    ## This is not the correct way to parse these, because more than one option can be given together, e.g., -ae
-    parser.add_argument("-a",
-                        help="Enabling the `allexport` shell option",
-                        action="store_true",
-                        default=False)
-    parser.add_argument("+a",
-                        help="Disabling the `allexport` shell option",
-                        action="store_false",
-                        default=False)    
-    ## These two are here for compatibility with respect to bash
-    parser.add_argument("-v",
-                        help="(experimental) prints shell input lines as they are read",
-                        action="store_true")
-    parser.add_argument("-x",
-                        help="(experimental) prints commands and their arguments as they execute",
-                        action="store_true")
-    ## Deprecated argument... keeping here just to output the message
-    ## TODO: Do that with a custom argparse Action (KK: I tried and failed)
-    parser.add_argument("--expand_using_bash_mirror",
-                        help="DEPRECATED: instead of expanding using the internal expansion code, expand using a bash mirror process (slow)",
-                        action="store_true")
-
-    ## Set the preprocessing mode to PaSh
-    parser.set_defaults(preprocess_mode='pash')
-
-    config.add_common_arguments(parser)
+    parser = RunnerParser(prog_name, prefix_chars="-+")
     args = parser.parse_args()
     config.set_config_globals_from_pash_args(args)
 
@@ -130,36 +92,33 @@ def parse_args():
         log(arg_name, arg_val)
     log("-" * 40)
 
-    ## Print the deprecated argument
-    if args.expand_using_bash_mirror:
-        log("WARNING: Option --expand_using_bash_mirror is deprecated and is *ignored*.", level=0)
-
     ## TODO: We might need to have a better default (like $0 of pa.sh)
     shell_name = "pash"
 
     if args.command is not None:
         fname = ptempfile()
-        with open(fname, 'w') as f:
+        with open(fname, "w") as f:
             f.write(args.command)
         ## If the shell is invoked with -c and arguments after it, then these arguments
         ## need to be assigned to $0, $1, $2, ... and not $1, $2, $3, ...
-        if(len(args.input) > 0):
+        if len(args.input) > 0:
             ## Assign $0
             shell_name = args.input[0]
             args.input = args.input[1:]
         args.input = [fname] + args.input
-    elif (len(args.input) > 0):
+    elif len(args.input) > 0:
         shell_name = args.input[0]
 
-
     return args, shell_name
 
+
 def shell_env(shell_name: str):
     new_env = os.environ.copy()
     new_env["PASH_TMP_PREFIX"] = config.PASH_TMP_PREFIX
     new_env["pash_shell_name"] = shell_name
     return new_env
 
+
 ## The following two functions need to correspond completely
 def bash_prefix_args():
     subprocess_args = ["/usr/bin/env", "bash"]
@@ -174,28 +133,36 @@ def bash_prefix_args():
         subprocess_args.append("-x")
     return subprocess_args
 
+
 def bash_exec_string(shell_name):
     flags = []
     if config.pash_args.a:
-        flags.append('-a')
+        flags.append("-a")
     if config.pash_args.v:
-        flags.append('-v')
+        flags.append("-v")
     if config.pash_args.x:
-        flags.append('-x')
+        flags.append("-x")
     return "exec -a{} bash {} -s $@\n".format(shell_name, " ".join(flags))
 
+
 def execute_script(compiled_script_filename, command, arguments, shell_name):
     new_env = shell_env(shell_name)
     subprocess_args = bash_prefix_args()
-    subprocess_args += ["-c", 'source {}'.format(compiled_script_filename), shell_name] + arguments
+    subprocess_args += [
+        "-c",
+        "source {}".format(compiled_script_filename),
+        shell_name,
+    ] + arguments
     # subprocess_args = ["/usr/bin/env", "bash", compiled_script_filename] + arguments
-    log("Executing:", "PASH_TMP_PREFIX={} pash_shell_name={} {}".format(config.PASH_TMP_PREFIX, 
-                                                                        shell_name,
-                                                                        " ".join(subprocess_args)))
+    log(
+        "Executing:",
+        "PASH_TMP_PREFIX={} pash_shell_name={} {}".format(
+            config.PASH_TMP_PREFIX, shell_name, " ".join(subprocess_args)
+        ),
+    )
     exec_obj = subprocess.run(subprocess_args, env=new_env, close_fds=False)
     return exec_obj.returncode
 
+
 if __name__ == "__main__":
     main()
-
-  
diff --git a/compiler/pash_compilation_server.py b/compiler/pash_compilation_server.py
index 3cbdf1a4b..9d7f6ad0a 100644
--- a/compiler/pash_compilation_server.py
+++ b/compiler/pash_compilation_server.py
@@ -1,20 +1,23 @@
-import argparse
 import signal
-import traceback
 from threading import Thread
 from datetime import datetime, timedelta
+
 # import queue
 
+from sh_expand import env_vars_util
+
 import config
-import env_vars_util
 from pash_graphviz import maybe_generate_graphviz
 import pash_compiler
 from util import *
 from dspash.worker_manager import WorkersManager
 import server_util
 
+from cli import BaseParser
+from custom_error import * 
+
 ##
-## A Daemon (not with the strict Unix sense) 
+## A Daemon (not with the strict Unix sense)
 ## that responds to requests for compilation
 ##
 
@@ -23,22 +26,25 @@ def handler(signum, frame):
     log("Signal:", signum, "caught")
     shutdown()
 
+
 signal.signal(signal.SIGTERM, handler)
 
+
 def parse_args():
-    parser = argparse.ArgumentParser(add_help=False)
-    config.add_common_arguments(parser)
-    args, unknown_args = parser.parse_known_args()
+    parser = BaseParser(add_help=False)
+    parser.add_pash_args()
+    args, _ = parser.parse_known_args()
 
     return args
 
+
 # Initialize the daemon
 
 
 def init():
     ## Set the logging prefix
     config.LOGGING_PREFIX = "Daemon: "
-    
+
     args = parse_args()
     config.set_config_globals_from_pash_args(args)
 
@@ -46,12 +52,11 @@ def init():
     if not config.config:
         config.load_config(args.config_path)
 
-    pash_compiler.runtime_config = config.config['distr_planner']
+    pash_compiler.runtime_config = config.config["distr_planner"]
 
     return args
 
 
-
 ##
 ## This class holds information for each process id
 ##
@@ -65,7 +70,7 @@ def __init__(self, input_ir, compiler_config, exec_time=None, start_exec_time=No
 
     def set_exec_time(self, exec_time):
         self.exec_time = exec_time
-    
+
     def set_start_exec_time(self, start_exec_time):
         self.start_exec_time = start_exec_time
 
@@ -73,19 +78,19 @@ def get_start_exec_time(self):
         return self.start_exec_time
 
     def __repr__(self):
-        return f'ProcIdInfo(InputIR:{self.input_ir}, CompConfig:{self.compiler_config}, ExecTime:{self.exec_time})'
+        return f"ProcIdInfo(InputIR:{self.input_ir}, CompConfig:{self.compiler_config}, ExecTime:{self.exec_time})"
 
 
 class Scheduler:
-    """ Takes care of running processes in parallel if there is no conflict. 
+    """Takes care of running processes in parallel if there is no conflict.
     The scheduler relies on the fact that process will wait for a compilation response.
     This allows it to control wether to allow the next process to run or wait for all other process.
     Flow:
-        input cmd -> 
-                    |   Compile -> 
+        input cmd ->
+                    |   Compile ->
                             1- Try compiling the pipeline
                             2- Wait for any unsafe processes to finish
-                            3- Check compilation for success and any conficts 
+                            3- Check compilation for success and any conficts
                                 - no side effects -> allow to run in parallel by sending a response
                                 - failed or conflict -> wait for all process to exit then run this process in unsafe mode
 
@@ -101,7 +106,9 @@ class Scheduler:
     def __init__(self):
         self.input_resources = set()
         self.output_resources = set()
-        self.process_resources = {}  # map process_id -> (input_resources, output_resources)
+        self.process_resources = (
+            {}
+        )  # map process_id -> (input_resources, output_resources)
         self.next_id = 0
         self.running_procs = 0
         self.unsafe_running = False
@@ -111,7 +118,7 @@ def __init__(self):
         self.reader_pipes_are_blocking = True
         self.request_processing_start_time = 0
         ## TODO: Make that be a class or something
-        
+
         ## A map that keeps mappings between proc_id and (input_ir, width, exec_time)
         self.process_id_input_ir_map = {}
         ## This is a map from input IRs, i.e., locations in the code, to a list of process_ids
@@ -120,7 +127,9 @@ def __init__(self):
     def check_resources_safety(self, process_id):
         proc_input_resources, proc_output_resources = self.process_resources[process_id]
         all_proc_resources = proc_input_resources.union(proc_output_resources)
-        if self.output_resources.intersection(all_proc_resources) or self.input_resources.intersection(proc_output_resources):
+        if self.output_resources.intersection(
+            all_proc_resources
+        ) or self.input_resources.intersection(proc_output_resources):
             return False
         return True
 
@@ -143,12 +152,12 @@ def determine_compiler_config(self, input_ir_file):
             ## Goal: Find the highest width that gives benefits
             ##
             ## Strategy, start trying lower widths, if the time seems to drop, keep trying lower.
-            ## 
+            ##
             width_avgs = self.get_averages_per_width(input_ir_file)
             log("Width averages:", width_avgs)
             widths = width_avgs.keys()
-            
-            ## If we have at least 1, with a specific width, 
+
+            ## If we have at least 1, with a specific width,
             ##   and the minimum width has the lowest average, then try one lower
             if len(widths) > 0:
                 min_width = min(widths)
@@ -167,7 +176,10 @@ def determine_compiler_config(self, input_ir_file):
                 if best_width == min_width and min_width > 1:
                     ## Divide the min_width by 2 and try again
                     selected_width = min_width // 2
-                    log("Best width is the lowest width, trying with width:", selected_width)
+                    log(
+                        "Best width is the lowest width, trying with width:",
+                        selected_width,
+                    )
                 else:
                     selected_width = best_width
                     log("Best width is:", best_width, "We will keep executing with it.")
@@ -198,19 +210,20 @@ def get_averages_per_width(self, input_ir_file):
                     width_times[width].append(exec_time)
                 except:
                     width_times[width] = [exec_time]
-        
+
         ## We have gathered all times for each width
         width_avgs = {}
         for width, exec_times in width_times.items():
             width_avgs[width] = sum(exec_times) / len(exec_times)
-        
+
         return width_avgs
 
     ## This adds the time measurement, or just removes the entry if there is no exec_time (for space reclamation)
     def handle_time_measurement(self, process_id, exec_time):
-        ## TODO: Could put those behind the profile_driven check too to not fill memory
-        assert(self.process_id_input_ir_map[process_id].exec_time is None)
-        
+        ## 2023-12-08 KK: When in parallel pipelines we receive two exits (when I tried to make it one something got stuck...)
+        ##                so this assert is not true
+        # assert self.process_id_input_ir_map[process_id].exec_time is None
+
         ## If we don't have the exec time we do Nothing
         ##
         ## TODO: Consider removing past entries that have no execution time.
@@ -222,8 +235,10 @@ def handle_time_measurement(self, process_id, exec_time):
         # log("All measurements:", self.process_id_input_ir_map)
 
     def add_proc_id_map(self, process_id, input_ir_file, compiler_config):
-        assert(not process_id in self.process_id_input_ir_map)
-        self.process_id_input_ir_map[process_id] = ProcIdInfo(input_ir_file, compiler_config)
+        assert not process_id in self.process_id_input_ir_map
+        self.process_id_input_ir_map[process_id] = ProcIdInfo(
+            input_ir_file, compiler_config
+        )
 
         ## Add the mapping from ir to process_id
         try:
@@ -238,14 +253,17 @@ def compile_and_add(self, compiled_script_file, var_file, input_ir_file):
         process_id = self.get_next_id()
         run_parallel = False
         compile_success = False
+        current_region_parallelizable = True 
 
         variable_reading_start_time = datetime.now()
         # Read any shell variables files if present
-        vars_dict = env_vars_util.read_vars_file(var_file)
+        vars_dict = env_vars_util.read_vars_file(var_file, config.BASH_VERSION)
         config.set_vars_file(var_file, vars_dict)
 
         variable_reading_end_time = datetime.now()
-        print_time_delta("Variable Loading", variable_reading_start_time, variable_reading_end_time)
+        print_time_delta(
+            "Variable Loading", variable_reading_start_time, variable_reading_end_time
+        )
 
         daemon_compile_start_time = datetime.now()
         ## TODO: Make the compiler config based on profiling data
@@ -253,52 +271,94 @@ def compile_and_add(self, compiled_script_file, var_file, input_ir_file):
         ## Add the process_id -> input_ir mapping
         self.add_proc_id_map(process_id, input_ir_file, compiler_config)
 
-        ast_or_ir = pash_compiler.compile_ir(
-            input_ir_file, compiled_script_file, config.pash_args, compiler_config)
+        # check if any general exceptions are caught to report to --assert_compiler_success flag 
+        try: 
+            ast_or_ir = pash_compiler.compile_ir(
+                input_ir_file, compiled_script_file, config.pash_args, compiler_config
+            )
+        except NotAllRegionParallelizableError: 
+            ast_or_ir = None 
+            current_region_parallelizable = False
+                    
 
         daemon_compile_end_time = datetime.now()
-        print_time_delta("Daemon Compile", daemon_compile_start_time, daemon_compile_end_time)
+        print_time_delta(
+            "Daemon Compile", daemon_compile_start_time, daemon_compile_end_time
+        )
 
         self.wait_unsafe()
         if ast_or_ir != None:
             compile_success = True
 
-            maybe_generate_graphviz(ast_or_ir, config.pash_args, name=f'dfg-{process_id}')
-
-
-            proc_input_resources = set(map(lambda out: str(out.resource) if str(
-                out.resource) != "None" else out, ast_or_ir.all_input_fids()))
-            proc_output_resources = set(map(lambda out: str(out.resource) if str(
-                out.resource) != "None" else out, ast_or_ir.all_output_fids()))
-
-            self.process_resources[process_id] = (proc_input_resources, proc_output_resources)
+            maybe_generate_graphviz(
+                ast_or_ir, config.pash_args, name=f"dfg-{process_id}"
+            )
+
+            proc_input_resources = set(
+                map(
+                    lambda out: str(out.resource)
+                    if str(out.resource) != "None"
+                    else out,
+                    ast_or_ir.all_input_fids(),
+                )
+            )
+            proc_output_resources = set(
+                map(
+                    lambda out: str(out.resource)
+                    if str(out.resource) != "None"
+                    else out,
+                    ast_or_ir.all_output_fids(),
+                )
+            )
+
+            self.process_resources[process_id] = (
+                proc_input_resources,
+                proc_output_resources,
+            )
 
             run_parallel = self.check_resources_safety(process_id)
             if run_parallel:
                 self.input_resources = self.input_resources.union(proc_input_resources)
-                self.output_resources = self.output_resources.union(proc_output_resources)
+                self.output_resources = self.output_resources.union(
+                    proc_output_resources
+                )
 
-        
         if not run_parallel:
+            ## If we are not running in parallel everything has to finish first before scheduling for execution
             self.wait_for_all()
-            
+        else:
+            ## Wait if we have more pipelines running than our current limit
+            self.wait_until_limit(config.pash_args.parallel_pipelines_limit)
+        
         if compile_success:
             response = server_util.success_response(
-                f'{process_id} {compiled_script_file} {var_file} {input_ir_file}')
+                f"{process_id} {compiled_script_file} {var_file} {input_ir_file}"
+            )
+        elif not current_region_parallelizable: 
+            # send specified message to say current region is not parallelizable instead of general exception caught
+            response = server_util.error_response(f"{process_id} current region is not parallelizable; failed to compile")
+            self.unsafe_running = True
         else:
-            response = server_util.error_response(f'{process_id} failed to compile')
+            response = server_util.error_response(f"{process_id} failed to compile")
             self.unsafe_running = True
+           
 
-        ## Do not increase the running procs if assert_compiler_success is enabled
+        ## Do not increase the running procs if assert_all_regions_parallelizable is enabled
         ##  and compilation failed, since nothing will run then.
-        if not compile_success and config.pash_args.assert_compiler_success:
-            pass
+        ## Do not increase when compile is not successful but regions are parallelizable (in the case that general exceptions are caught), 
+        ##  nothing will run in this case also 
+        if (not compile_success and config.pash_args.assert_all_regions_parallelizable): 
+            pass 
+        elif (not compile_success and current_region_parallelizable and config.pash_args.assert_compiler_success): 
+            pass 
         else:
             self.running_procs += 1
 
         ## Get the time before we start executing (roughly) to determine how much time this command execution will take
         command_exec_start_time = datetime.now()
-        self.process_id_input_ir_map[process_id].set_start_exec_time(command_exec_start_time)
+        self.process_id_input_ir_map[process_id].set_start_exec_time(
+            command_exec_start_time
+        )
         return response
 
     def remove_process(self, process_id):
@@ -306,8 +366,18 @@ def remove_process(self, process_id):
         if process_id in self.process_resources:
             del self.process_resources[process_id]
             # TODO: Should be improved to not rebuild inputs and outputs from scratch maybe use counters
-            self.input_resources = set().union(*[input_resources for input_resources, _ in self.process_resources.values()])
-            self.output_resources = set().union(*[output_resources for _, output_resources in self.process_resources.values()])
+            self.input_resources = set().union(
+                *[
+                    input_resources
+                    for input_resources, _ in self.process_resources.values()
+                ]
+            )
+            self.output_resources = set().union(
+                *[
+                    output_resources
+                    for _, output_resources in self.process_resources.values()
+                ]
+            )
 
         self.running_procs -= 1
         if self.running_procs == 0:
@@ -318,25 +388,38 @@ def get_next_id(self):
         return self.next_id
 
     def wait_for_all(self):
-        log("Waiting for all processes to finish. There are", self.running_procs, "processes remaining.")
-        while self.running_procs > 0:
+        log(
+            "Waiting for all processes to finish."
+        )
+        self.wait_until_limit(1)
+        self.unsafe_running = False
+
+    def wait_until_limit(self, limit: int):
+        log(
+            f"Waiting for less than {limit} processes to be running. There are",
+            self.running_procs,
+            "processes remaining.",
+        )
+        while self.running_procs >= limit:
             input_cmd = self.get_input()
             # must be exit command or something is wrong
-            if (input_cmd.startswith("Exit:")):
+            if input_cmd.startswith("Exit:"):
                 self.handle_exit(input_cmd)
             else:
-                raise Exception(
-                    f"Command should be exit but it was {input_cmd}")
-        self.unsafe_running = False
+                raise Exception(f"Command should be exit but it was {input_cmd}")
 
     def handle_exit(self, input_cmd):
-        assert(input_cmd.startswith("Exit:"))
+        assert input_cmd.startswith("Exit:")
         process_id = int(input_cmd.split(":")[1])
 
-        ## Get the execution time        
+        ## Get the execution time
         command_finish_exec_time = datetime.now()
-        command_start_exec_time = self.process_id_input_ir_map[process_id].get_start_exec_time()
-        exec_time = (command_finish_exec_time - command_start_exec_time) / timedelta(milliseconds=1)
+        command_start_exec_time = self.process_id_input_ir_map[
+            process_id
+        ].get_start_exec_time()
+        exec_time = (command_finish_exec_time - command_start_exec_time) / timedelta(
+            milliseconds=1
+        )
         log("Process:", process_id, "exited. Exec time was:", exec_time)
         self.handle_time_measurement(process_id, exec_time)
         self.remove_process(process_id)
@@ -346,34 +429,42 @@ def handle_exit(self, input_cmd):
     def wait_unsafe(self):
         log("Unsafe running:", self.unsafe_running)
         if self.unsafe_running:
-            assert(self.running_procs == 1)
+            assert self.running_procs == 1
             self.wait_for_all()
             self.unsafe_running = False
 
     def parse_and_run_cmd(self, input_cmd):
-        if(input_cmd.startswith("Compile")):
-            compiled_script_file, var_file, input_ir_file = self.__parse_compile_command(
-                input_cmd)
-            response = self.compile_and_add(compiled_script_file, var_file, input_ir_file)
+        if input_cmd.startswith("Compile"):
+            (
+                compiled_script_file,
+                var_file,
+                input_ir_file,
+            ) = self.__parse_compile_command(input_cmd)
+            response = self.compile_and_add(
+                compiled_script_file, var_file, input_ir_file
+            )
             request_processing_end_time = datetime.now()
-            print_time_delta("Request handling", self.request_processing_start_time, request_processing_end_time)
+            print_time_delta(
+                "Request handling",
+                self.request_processing_start_time,
+                request_processing_end_time,
+            )
             ## Send output to the specific command
             self.respond(response)
-        elif (input_cmd.startswith("Exit:")):
+        elif input_cmd.startswith("Exit:"):
             self.handle_exit(input_cmd)
-        elif (input_cmd.startswith("Done")):
+        elif input_cmd.startswith("Done"):
             self.wait_for_all()
             ## We send output to the top level pash process
             ## to signify that we are done.
             self.respond("All finished")
             self.done = True
-        elif (input_cmd.startswith("Daemon Start") or input_cmd == ""):
+        elif input_cmd.startswith("Daemon Start") or input_cmd == "":
             ## This happens when pa.sh first connects to daemon to see if it is on
             self.close_last_connection()
         else:
-            log(server_util.error_response(f'Error: Unsupported command: {input_cmd}'))
-            raise Exception(f'Error: Unsupported command: {input_cmd}')
-
+            log(server_util.error_response(f"Error: Unsupported command: {input_cmd}"))
+            raise Exception(f"Error: Unsupported command: {input_cmd}")
 
     ## This method calls the reader to get an input
     def get_input(self):
@@ -395,16 +486,20 @@ def __parse_compile_command(self, input):
             input_ir_file = components[2].split(":")[1]
             return compiled_script_file, var_file, input_ir_file
         except:
-            raise Exception(f'Parsing failure for line: {input}')
+            raise Exception(f"Parsing failure for line: {input}")
 
     def run(self):
         ## By default communicate through sockets, except if the user wants to do it through pipes
-        if (config.pash_args.daemon_communicates_through_unix_pipes):
+        if config.pash_args.daemon_communicates_through_unix_pipes:
             in_filename = os.getenv("RUNTIME_IN_FIFO")
             out_filename = os.getenv("RUNTIME_OUT_FIFO")
-            self.connection_manager = server_util.UnixPipeReader(in_filename, out_filename, self.reader_pipes_are_blocking)
+            self.connection_manager = server_util.UnixPipeReader(
+                in_filename, out_filename, self.reader_pipes_are_blocking
+            )
         else:
-            self.connection_manager = server_util.SocketManager(os.getenv('DAEMON_SOCKET'))
+            self.connection_manager = server_util.SocketManager(
+                os.getenv("DAEMON_SOCKET")
+            )
         while not self.done:
             # Process a single request
             input_cmd = self.get_input()
@@ -412,17 +507,17 @@ def run(self):
 
             ## Parse the command (potentially also sending a response)
             self.parse_and_run_cmd(input_cmd)
-        
+
         self.connection_manager.close()
         shutdown()
 
 
-
 def shutdown():
     ## There may be races since this is called through the signal handling
     log("PaSh daemon is shutting down...")
     log("PaSh daemon shut down successfully...")
 
+
 def main():
     args = init()
     if args.distributed_exec:
@@ -433,7 +528,7 @@ def main():
 
     scheduler = Scheduler()
     scheduler.run()
-   
+
 
 if __name__ == "__main__":
     main()
diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py
index 3d6995471..04b5db9eb 100644
--- a/compiler/pash_compiler.py
+++ b/compiler/pash_compiler.py
@@ -1,34 +1,37 @@
-import argparse
 import sys
 import pickle
 import traceback
 from datetime import datetime
 
-from pash_annotations.annotation_generation.datatypes.parallelizability.AggregatorKind import AggregatorKindEnum
+from sh_expand import env_vars_util
+from sh_expand.expand import ExpansionError 
 
 import config
-import env_vars_util
 from ir import *
 from ast_to_ir import compile_asts
 from ir_to_ast import to_shell
 from pash_graphviz import maybe_generate_graphviz
 from util import *
+from custom_error import *
 
 from definitions.ir.aggregator_node import *
 
-from definitions.ir.dfg_node import DFGNode
 from definitions.ir.nodes.eager import *
 from definitions.ir.nodes.pash_split import *
 
-import definitions.ir.nodes.r_merge as r_merge
 import definitions.ir.nodes.r_split as r_split
 import definitions.ir.nodes.r_unwrap as r_unwrap
 import definitions.ir.nodes.dgsh_tee as dgsh_tee
 import definitions.ir.nodes.dfs_split_reader as dfs_split_reader
+
 # Distirbuted Exec
-import dspash.hdfs_utils as hdfs_utils 
+import dspash.hdfs_utils as hdfs_utils
+
+from cli import CompilerParser
 
 runtime_config = {}
+
+
 ## We want to catch all exceptions here so that they are logged correctly
 ## and not just printed to the stderr.
 def main():
@@ -39,6 +42,7 @@ def main():
         log(traceback.format_exc())
         sys.exit(1)
 
+
 def main_body():
     global runtime_config
 
@@ -50,40 +54,36 @@ def main_body():
     if not config.config:
         config.load_config(args.config_path)
 
-    runtime_config = config.config['distr_planner']
+    runtime_config = config.config["distr_planner"]
 
     ## Read any shell variables files if present
-    vars_dict = env_vars_util.read_vars_file(args.var_file)
+    vars_dict = env_vars_util.read_vars_file(args.var_file, config.BASH_VERSION)
     config.set_vars_file(args.var_file, vars_dict)
 
     log("Input:", args.input_ir, "Compiled file:", args.compiled_script_file)
 
     ## Call the main procedure
     compiler_config = CompilerConfig(args.width)
-    ast_or_ir = compile_optimize_output_script(args.input_ir, args.compiled_script_file, args, compiler_config)
+    ast_or_ir = compile_optimize_output_script(
+        args.input_ir, args.compiled_script_file, args, compiler_config
+    )
     maybe_generate_graphviz(ast_or_ir, args)
 
 
 def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("compiled_script_file",
-                        help="the file in which to output the compiled script")
-    parser.add_argument("input_ir",
-                        help="the file containing the dataflow graph to be optimized and executed")
-    parser.add_argument("--var_file",
-                        help="determines the path of a file containing all shell variables.",
-                        default=None)
-    config.add_common_arguments(parser)
-    args, unknown_args = parser.parse_known_args()
+    parser = CompilerParser()
+    args, _ = parser.parse_known_args()
     return args
 
+
 ## TODO: Add more fields from args in this
 class CompilerConfig:
     def __init__(self, width):
         self.width = width
-    
+
     def __repr__(self):
-        return f'CompilerConfig(Width:{self.width})'
+        return f"CompilerConfig(Width:{self.width})"
+
 
 def compile_ir(ir_filename, compiled_script_file, args, compiler_config):
     """
@@ -91,61 +91,82 @@ def compile_ir(ir_filename, compiled_script_file, args, compiler_config):
     """
     ret = None
     try:
-        ret = compile_optimize_output_script(ir_filename, compiled_script_file, args, compiler_config)
+        ret = compile_optimize_output_script(
+            ir_filename, compiled_script_file, args, compiler_config
+        )
+    except ExpansionError as e: 
+        log("WARNING: Exception caught because some region(s) are not expandable and therefore unparallelizable:", e) 
+        raise NotAllRegionParallelizableError()
+    except UnparallelizableError as e: 
+        log("WARNING: Exception caught because some region(s) are unparallelizable:", e) 
+        raise NotAllRegionParallelizableError()
+        # log(traceback.format_exc()) # uncomment for exact trace report (PaSh user should see informative messages for unparellizable regions) 
+    except (AdjLineNotImplementedError, NotImplementedError) as e: 
+        log("WARNING: Exception caught because some part is not implemented:", e)
+        log(traceback.format_exc())
     except Exception as e:
         log("WARNING: Exception caught:", e)
-        # traceback.print_exc()
+        log(traceback.format_exc())
 
     return ret
 
-def compile_optimize_output_script(ir_filename, compiled_script_file, args, compiler_config):
+
+def compile_optimize_output_script(
+    ir_filename, compiled_script_file, args, compiler_config
+):
     global runtime_config
-    
+
     ret = None
 
     ## Load the df_region from a file
     candidate_df_region = load_df_region(ir_filename)
-    
+
     ## Compile it
-    optimized_ast_or_ir = compile_optimize_df_region(candidate_df_region, args, compiler_config)
+    optimized_ast_or_ir = compile_optimize_df_region(
+        candidate_df_region, args, compiler_config
+    )
 
     ## Call the backend that executes the optimized dataflow graph
     ## TODO: Should never be the case for now. This is obsolete.
-    assert(not runtime_config['distr_backend'])
+    assert not runtime_config["distr_backend"]
 
     ## If the candidate DF region was indeed a DF region then we have an IR
     ## which should be translated to a parallel script.
-    if(isinstance(optimized_ast_or_ir, IR)):
+    if isinstance(optimized_ast_or_ir, IR):
         if args.distributed_exec:
             ir_filename = ptempfile()
-            script_to_execute = f"$PASH_TOP/compiler/dspash/remote_exec_graph.sh {ir_filename}\n"
+            script_to_execute = (
+                f"$PASH_TOP/compiler/dspash/remote_exec_graph.sh {ir_filename}\n"
+            )
             ## This might not be needed anymore (since the output script is output anyway)
             ## TODO: This is probably useless, remove
             maybe_log_optimized_script(script_to_execute, args)
 
             with open(ir_filename, "wb") as f:
-                obj = (optimized_ast_or_ir, config.config['shell_variables'])
+                obj = (optimized_ast_or_ir, config.config["shell_variables"])
                 pickle.dump(obj, f)
         else:
             script_to_execute = to_shell(optimized_ast_or_ir, args)
-            
+
         log("Optimized script saved in:", compiled_script_file)
         with open(compiled_script_file, "w") as f:
             f.write(script_to_execute)
-    
+
         ret = optimized_ast_or_ir
     else:
-        raise Exception("Script failed to compile!")
-    
+        raise UnparallelizableError("Script failed to compile!")
+
     return ret
 
+
 def load_df_region(ir_filename):
-    log("Retrieving candidate DF region: {} ... ".format(ir_filename), end='')
+    log("Retrieving candidate DF region: {} ... ".format(ir_filename), end="")
     with open(ir_filename, "rb") as ir_file:
         candidate_df_region = pickle.load(ir_file)
     log("Done!")
     return candidate_df_region
 
+
 def compile_optimize_df_region(df_region, args, compiler_config):
     ## Compile the candidate DF regions
     compilation_start_time = datetime.now()
@@ -154,7 +175,7 @@ def compile_optimize_df_region(df_region, args, compiler_config):
     print_time_delta("Compilation", compilation_start_time, compilation_end_time)
 
     ## Optimize all the IRs that can be optimized
-    if(args.no_optimize):
+    if args.no_optimize:
         optimized_asts_and_irs = asts_and_irs
     else:
         optimized_asts_and_irs = optimize_irs(asts_and_irs, args, compiler_config)
@@ -167,28 +188,30 @@ def compile_optimize_df_region(df_region, args, compiler_config):
     ##
     ## TODO: This might bite us with the quick-abort.
     ##       It might complicate things having a script whose half is compiled to a graph and its other half not.
-    assert(len(optimized_asts_and_irs) == 1)
+    assert len(optimized_asts_and_irs) == 1
     optimized_ast_or_ir = optimized_asts_and_irs[0]
-    
+
     return optimized_ast_or_ir
 
+
 def maybe_log_optimized_script(script_to_execute, args):
     ## TODO: Merge this write with the one below. Maybe even move this logic in `pash_runtime.sh`
     ## Output the optimized shell script for inspection
-    if(args.output_optimized):
-        output_script_path = runtime_config['optimized_script_filename']
+    if args.output_optimized:
+        output_script_path = runtime_config["optimized_script_filename"]
         with open(output_script_path, "w") as output_script_file:
             log("Optimized script:")
             log(script_to_execute)
             output_script_file.write(script_to_execute)
 
+
 def compile_candidate_df_region(candidate_df_region, config):
     ## This is for the files in the IR
     fileIdGen = FileIdGen()
-    
+
     ## If the candidate DF region is not from the top level then
     ## it won't be a list and thus we need to make it into a list to compile it.
-    if(not isinstance(candidate_df_region, list)):
+    if not isinstance(candidate_df_region, list):
         candidate_df_region = [candidate_df_region]
 
     ## Compile the asts
@@ -199,6 +222,7 @@ def compile_candidate_df_region(candidate_df_region, config):
 
     return compiled_asts
 
+
 ## TODO: Switch args to compiler_config
 def optimize_irs(asts_and_irs, args, compiler_config):
     global runtime_config
@@ -207,25 +231,28 @@ def optimize_irs(asts_and_irs, args, compiler_config):
 
     optimized_asts_and_irs = []
     for ast_or_ir in asts_and_irs:
-        if(isinstance(ast_or_ir, IR)):
+        if isinstance(ast_or_ir, IR):
             ## Assert that the graph that was returned from compilation is valid
-            assert(ast_or_ir.valid())
+            assert ast_or_ir.valid()
 
             # log(ir_node)
             # with cProfile.Profile() as pr:
-            distributed_graph = choose_and_apply_parallelizing_transformations(ast_or_ir, compiler_config.width,
-                                                                      runtime_config['batch_size'],
-                                                                      args.r_split_batch_size)
+            distributed_graph = choose_and_apply_parallelizing_transformations(
+                ast_or_ir,
+                compiler_config.width,
+                runtime_config["batch_size"],
+                args.r_split_batch_size,
+            )
             # pr.print_stats()
 
             # Eagers are added in remote notes when using distributed exec
-            if(not args.no_eager and not args.distributed_exec): 
+            if not args.no_eager and not args.distributed_exec:
                 eager_distributed_graph = add_eager_nodes(distributed_graph)
             else:
                 eager_distributed_graph = distributed_graph
 
             ## Assert that the graph stayed valid after all transformations
-            assert(eager_distributed_graph.valid())
+            assert eager_distributed_graph.valid()
 
             ## Print statistics of output nodes
             print_graph_statistics(eager_distributed_graph)
@@ -248,30 +275,37 @@ def print_graph_statistics(graph):
     log("Eager nodes:", len(eager_nodes))
 
 
-def choose_and_apply_parallelizing_transformations(graph, fan_out, batch_size, r_split_batch_size):
+def choose_and_apply_parallelizing_transformations(
+    graph, fan_out, batch_size, r_split_batch_size
+):
     parallelizer_map = choose_parallelizing_transformations(graph)
-    apply_parallelizing_transformations(graph, parallelizer_map, fan_out, batch_size, 
-                                        r_split_batch_size)
+    apply_parallelizing_transformations(
+        graph, parallelizer_map, fan_out, batch_size, r_split_batch_size
+    )
     return graph
 
 
-def choose_parallelizing_transformations(graph): # shall return map
+def choose_parallelizing_transformations(graph):  # shall return map
     source_node_ids = graph.source_nodes()
     parallelizer_map = {}
     workset = source_node_ids
     visited = set()
     # We apply a modified BFS such that we ensure that we know which parallelizer was chosen for all previous nodes
     # and assume that the decision for any subsequent node will exploit any potential synergy effects
-    while (len(workset) > 0):
+    while len(workset) > 0:
         curr_id = workset.pop(0)
-        assert(isinstance(curr_id, int))
-        all_previous_nodes_visited = all(prev in visited for prev in graph.get_previous_nodes(curr_id))
+        assert isinstance(curr_id, int)
+        all_previous_nodes_visited = all(
+            prev in visited for prev in graph.get_previous_nodes(curr_id)
+        )
         if not all_previous_nodes_visited:
             workset.append(curr_id)
         elif not curr_id in visited:
             next_node_ids = graph.get_next_nodes(curr_id)
             workset += next_node_ids
-            parallelizer_map[curr_id] = choose_parallelizing_transformation(curr_id, graph)
+            parallelizer_map[curr_id] = choose_parallelizing_transformation(
+                curr_id, graph
+            )
             visited.add(curr_id)
     return parallelizer_map
 
@@ -280,29 +314,41 @@ def choose_parallelizing_transformations(graph): # shall return map
 ## 1. The round robin
 ## 2. The round robin after having performed unwrap (not sure why this is the second priority)
 ## 3. The consecutive chunks
-## 
-## TODO: In the future, we could develop more complex strategies      
-def choose_parallelizing_transformation(curr_id, graph): # shall return map entry
+##
+## TODO: In the future, we could develop more complex strategies
+def choose_parallelizing_transformation(curr_id, graph):  # shall return map entry
     curr = graph.get_node(curr_id)
-    list_all_parallelizers_in_priority = [curr.get_option_implemented_round_robin_parallelizer(),
-                                          curr.get_option_implemented_round_robin_with_unwrap_parallelizer(),
-                                          curr.get_option_implemented_consecutive_chunks_parallelizer()]
-    return next((item for item in list_all_parallelizers_in_priority if item is not None), None)
-
-
-def apply_parallelizing_transformations(graph, parallelizer_map, fan_out, batch_size, r_split_batch_size):
+    list_all_parallelizers_in_priority = [
+        curr.get_option_implemented_round_robin_parallelizer(),
+        curr.get_option_implemented_round_robin_with_unwrap_parallelizer(),
+        curr.get_option_implemented_consecutive_chunks_parallelizer(),
+    ]
+    return next(
+        (item for item in list_all_parallelizers_in_priority if item is not None), None
+    )
+
+
+def apply_parallelizing_transformations(
+    graph, parallelizer_map, fan_out, batch_size, r_split_batch_size
+):
     fileIdGen = graph.get_file_id_gen()
-    node_id_non_none_parallelizer_list = [(node_id, parallelizer) for (node_id, parallelizer) in parallelizer_map.items()
-                                                                  if parallelizer is not None]
-    for (node_id, parallelizer) in node_id_non_none_parallelizer_list:
-        graph.apply_parallelization_to_node(node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size)
+    node_id_non_none_parallelizer_list = [
+        (node_id, parallelizer)
+        for (node_id, parallelizer) in parallelizer_map.items()
+        if parallelizer is not None
+    ]
+    for node_id, parallelizer in node_id_non_none_parallelizer_list:
+        graph.apply_parallelization_to_node(
+            node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size
+        )
+
 
 def split_hdfs_cat_input(hdfs_cat, next_node, graph, fileIdGen):
     """
     Replaces hdfs cat with a cat per block, each cat uses has an HDFSResource input fid
     Returns: A normal Cat that merges the blocks (will be removed when parallizing next_node)
     """
-    assert(isinstance(hdfs_cat, HDFSCat))
+    assert isinstance(hdfs_cat, HDFSCat)
 
     ## At the moment this only works for nodes that have one standard input.
     if len(next_node.get_standard_inputs()) != 1:
@@ -315,9 +361,11 @@ def split_hdfs_cat_input(hdfs_cat, next_node, graph, fileIdGen):
 
     # Create a cat command per file block
     file_config = hdfs_utils.get_file_config(hdfs_filepath)
-    dummy_config_path = ptempfile() # Dummy config file, should be updated by workers
+    dummy_config_path = ptempfile()  # Dummy config file, should be updated by workers
     for split_num, block in enumerate(file_config.blocks):
-        resource = DFSSplitResource(file_config.dumps(), dummy_config_path, split_num, block.hosts)
+        resource = DFSSplitResource(
+            file_config.dumps(), dummy_config_path, split_num, block.hosts
+        )
         block_fid = fileIdGen.next_file_id()
         block_fid.set_resource(resource)
         graph.add_edge(block_fid)
@@ -327,7 +375,12 @@ def split_hdfs_cat_input(hdfs_cat, next_node, graph, fileIdGen):
         output_ids.append(output_fid.get_ident())
         graph.add_edge(output_fid)
 
-        split_reader_node = dfs_split_reader.make_dfs_split_reader_node([block_fid.get_ident()], output_fid.get_ident(), split_num, config.HDFS_PREFIX)
+        split_reader_node = dfs_split_reader.make_dfs_split_reader_node(
+            [block_fid.get_ident()],
+            output_fid.get_ident(),
+            split_num,
+            config.HDFS_PREFIX,
+        )
         graph.add_node(split_reader_node)
 
     # Remove the HDFS Cat command as it's not used anymore
@@ -341,7 +394,6 @@ def split_hdfs_cat_input(hdfs_cat, next_node, graph, fileIdGen):
     return new_merger
 
 
-
 ## This functions adds an eager on a given edge.
 def add_eager(eager_input_id, graph, fileIdGen):
     new_fid = fileIdGen.next_ephemeral_file_id()
@@ -355,7 +407,7 @@ def add_eager(eager_input_id, graph, fileIdGen):
 
     ## Modify the next node inputs to be the new inputs
     next_node_id = graph.edges[eager_input_id][2]
-    if(not next_node_id is None):
+    if not next_node_id is None:
         next_node = graph.get_node(next_node_id)
         next_node.replace_edge(eager_input_id, new_id)
         graph.set_edge_to(new_id, next_node_id)
@@ -372,12 +424,16 @@ def add_eager_nodes(graph):
     fileIdGen = graph.get_file_id_gen()
 
     ## Get the next nodes
-    workset = [node for source_node_id in source_node_ids for node in graph.get_next_nodes(source_node_id)]
+    workset = [
+        node
+        for source_node_id in source_node_ids
+        for node in graph.get_next_nodes(source_node_id)
+    ]
     visited = set()
-    while (len(workset) > 0):
+    while len(workset) > 0:
         curr_id = workset.pop(0)
         curr = graph.get_node(curr_id)
-        if (not curr_id in visited):
+        if not curr_id in visited:
             visited.add(curr_id)
             next_node_ids = graph.get_next_nodes(curr_id)
             workset += next_node_ids
@@ -386,7 +442,7 @@ def add_eager_nodes(graph):
 
             ## Add eager nodes if the node has more than one input
             curr_input_ids = graph.get_node_input_ids(curr_id)
-            if (len(curr_input_ids) > 1):
+            if len(curr_input_ids) > 1:
                 ## TODO: If we know that a command reads its inputs in a list,
                 ##       then we might not need to put an eager on its first input.
                 ## Note: This cannot be done for `sort -m` so we need to know in the
@@ -394,23 +450,23 @@ def add_eager_nodes(graph):
 
                 for curr_input_id in curr_input_ids:
                     _fid, from_node, to_node = graph.edges[curr_input_id]
-                    assert(to_node == curr_id)
+                    assert to_node == curr_id
                     ## If the edge is an input edge, then we don't want to put eager.
-                    if(not from_node is None):
+                    if not from_node is None:
                         add_eager(curr_input_id, graph, fileIdGen)
 
-            if(isinstance(curr, Split)):
+            if isinstance(curr, Split):
                 eager_input_ids = curr.get_output_list()[:-1]
                 for edge_id in eager_input_ids:
                     add_eager(edge_id, graph, fileIdGen)
 
-            ## Add an eager after r_unwrap            
-            if(isinstance(curr, r_unwrap.RUnwrap)):
+            ## Add an eager after r_unwrap
+            if isinstance(curr, r_unwrap.RUnwrap):
                 eager_input_id = curr.get_output_list()[0]
                 add_eager(eager_input_id, graph, fileIdGen)
 
             ## Add an eager after r_split
-            if(isinstance(curr, r_split.RSplit)):
+            if isinstance(curr, r_split.RSplit):
                 eager_input_ids = curr.get_output_list()
                 for edge_id in eager_input_ids:
                     add_eager(edge_id, graph, fileIdGen)
diff --git a/compiler/pash_graphviz.py b/compiler/pash_graphviz.py
index 425a00df4..70ad53909 100644
--- a/compiler/pash_graphviz.py
+++ b/compiler/pash_graphviz.py
@@ -1,34 +1,37 @@
-
 import os
 
 from ir import *
 from util import *
 
 ## Ensure that PASH_TMP_PREFIX is set by pa.sh
-assert(not os.getenv('PASH_TIMESTAMP') is None)
-PASH_TIMESTAMP = os.getenv('PASH_TIMESTAMP')
-DIR_NAME = f'pash_graphviz_{PASH_TIMESTAMP}'
+assert not os.getenv("PASH_TIMESTAMP") is None
+PASH_TIMESTAMP = os.getenv("PASH_TIMESTAMP")
+DIR_NAME = f"pash_graphviz_{PASH_TIMESTAMP}"
+
 
 def maybe_init_graphviz_dir(args):
     if not args.graphviz == "no":
         init_graphviz_dir(args)
 
+
 def init_graphviz_dir(args):
     graphviz_dir_path = os.path.join(args.graphviz_dir, DIR_NAME)
 
     try:
         os.mkdir(graphviz_dir_path)
     except:
-        print(f'Error: Graphviz dir:{graphviz_dir_path} could not be created!')
+        print(f"Error: Graphviz dir:{graphviz_dir_path} could not be created!")
         exit(1)
-    
+
     log("Created graphviz dir:", graphviz_dir_path)
 
-def maybe_generate_graphviz(ir: IR, args, name='dfg'):
+
+def maybe_generate_graphviz(ir: IR, args, name="dfg"):
     if not args.graphviz == "no":
         generate_graphviz(ir, args, name=name)
 
-def generate_graphviz(ir: IR, args, name='dfg'):
+
+def generate_graphviz(ir: IR, args, name="dfg"):
     ## TODO: It is unclear if importing in here (instead of in general)
     ##       improves startup cost of the pash_runtime when not using graphviz.
     import graphviz
diff --git a/compiler/pash_runtime.sh b/compiler/pash_runtime.sh
index e5e6c70e5..4bdd0c8a9 100755
--- a/compiler/pash_runtime.sh
+++ b/compiler/pash_runtime.sh
@@ -103,21 +103,14 @@ else
     ## Invoke the compiler and make any necessary preparations
     source "$RUNTIME_DIR/pash_prepare_call_compiler.sh"
 
-    function run_parallel() {
-        trap inform_daemon_exit SIGTERM SIGINT EXIT
-        export SCRIPT_TO_EXECUTE="$pash_script_to_execute"
-        source "$RUNTIME_DIR/pash_restore_state_and_execute.sh"
-        inform_daemon_exit
-    }
-
     ## Check if there are traps set, and if so do not execute in parallel
     ## TODO: This might be an overkill but is conservative
     traps_set=$(trap)
     pash_redir_output echo "$$: (2) Traps set: $traps_set"
     # Don't fork if compilation failed. The script might have effects on the shell state.
     if [ "$pash_runtime_return_code" -ne 0 ] ||
-        ## If parallel pipelines is not enabled we shouldn't fork 
-        [ "$pash_parallel_pipelines" -eq 0 ] ||
+        ## If parallel pipelines is disabled using a flag we shouldn't fork 
+        [ "$pash_no_parallel_pipelines" -eq 1 ] ||
         ## If parallel pipelines is explicitly disabled (e.g., due to context), no forking
         [ "$pash_disable_parallel_pipelines" -eq 1 ] ||
         ## If traps are set, no forking
@@ -147,6 +140,12 @@ else
 
         pash_redir_output echo "$$: (5) BaSh script exited with ec: $pash_runtime_final_status"
     else 
+        function run_parallel() {
+            trap inform_daemon_exit SIGTERM SIGINT EXIT
+            export SCRIPT_TO_EXECUTE="$pash_script_to_execute"
+            source "$RUNTIME_DIR/pash_restore_state_and_execute.sh"
+            inform_daemon_exit
+        }
         # Should we redirect errors aswell?
         # TODO: capturing the return state here isn't completely correct. 
         run_parallel "$@" <&0 &
diff --git a/compiler/preprocessor/preprocessor.py b/compiler/preprocessor/preprocessor.py
index d44a5e0fd..817aeaf84 100644
--- a/compiler/preprocessor/preprocessor.py
+++ b/compiler/preprocessor/preprocessor.py
@@ -1,49 +1,64 @@
-import argparse
 from datetime import datetime
 import os
 
 import config
-from shell_ast import ast_to_ast
-from ir import FileIdGen
+from shell_ast import transformation_options, ast_to_ast
 from parse import parse_shell_to_asts, from_ast_objects_to_shell
 from util import *
 import server_util
 from speculative import util_spec
+from cli import PreprocessorParser
 
 LOGGING_PREFIX = "PaSh Preprocessor: "
 
+
 @logging_prefix(LOGGING_PREFIX)
 def preprocess(input_script_path, args):
     ## 1. Execute the POSIX shell parser that returns the AST in JSON
     preprocessing_parsing_start_time = datetime.now()
     ast_objects = parse_shell_to_asts(input_script_path)
     preprocessing_parsing_end_time = datetime.now()
-    print_time_delta("Preprocessing -- Parsing", preprocessing_parsing_start_time, preprocessing_parsing_end_time)
+    print_time_delta(
+        "Preprocessing -- Parsing",
+        preprocessing_parsing_start_time,
+        preprocessing_parsing_end_time,
+    )
 
     ## 2. Preprocess ASTs by replacing possible candidates for compilation
     ##    with calls to the PaSh runtime.
     preprocessing_pash_start_time = datetime.now()
     preprocessed_asts = preprocess_asts(ast_objects, args)
     preprocessing_pash_end_time = datetime.now()
-    print_time_delta("Preprocessing -- PaSh", preprocessing_pash_start_time, preprocessing_pash_end_time)
+    print_time_delta(
+        "Preprocessing -- PaSh",
+        preprocessing_pash_start_time,
+        preprocessing_pash_end_time,
+    )
 
     ## 3. Translate the new AST back to shell syntax
     preprocessing_unparsing_start_time = datetime.now()
     preprocessed_shell_script = from_ast_objects_to_shell(preprocessed_asts)
-    
+
     preprocessing_unparsing_end_time = datetime.now()
-    print_time_delta("Preprocessing -- Unparsing", preprocessing_unparsing_start_time, preprocessing_unparsing_end_time)
+    print_time_delta(
+        "Preprocessing -- Unparsing",
+        preprocessing_unparsing_start_time,
+        preprocessing_unparsing_end_time,
+    )
     return preprocessed_shell_script
 
 
 def preprocess_asts(ast_objects, args):
-    trans_mode = ast_to_ast.TransformationType(args.preprocess_mode)
-    if trans_mode is ast_to_ast.TransformationType.SPECULATIVE:
-        trans_options = ast_to_ast.SpeculativeTransformationState(mode=trans_mode,
-                                                                  po_file=args.partial_order_file)
+    trans_mode = transformation_options.TransformationType(args.preprocess_mode)
+    if trans_mode is transformation_options.TransformationType.SPECULATIVE:
+        trans_options = transformation_options.SpeculativeTransformationState(
+            po_file=args.partial_order_file
+        )
         util_spec.initialize(trans_options)
+    elif trans_mode is transformation_options.TransformationType.AIRFLOW:
+        trans_options = transformation_options.AirflowTransformationState()
     else:
-        trans_options = ast_to_ast.TransformationState(mode=trans_mode)
+        trans_options = transformation_options.TransformationState()
 
     ## Preprocess ASTs by replacing AST regions with calls to PaSh's runtime.
     ## Then the runtime will do the compilation and optimization with additional
@@ -52,40 +67,22 @@ def preprocess_asts(ast_objects, args):
 
     ## Let the scheduler know that we are done with the partial_order file
     ## TODO: We could stream the partial_order_file to the scheduler
-    if trans_mode is ast_to_ast.TransformationType.SPECULATIVE:
+    if trans_mode is transformation_options.TransformationType.SPECULATIVE:
         ## First complete the partial_order file
         util_spec.serialize_partial_order(trans_options)
 
         ## Then inform the scheduler that it can read it
         unix_socket_file = os.getenv("PASH_SPEC_SCHEDULER_SOCKET")
-        msg = util_spec.scheduler_server_init_po_msg(trans_options.get_partial_order_file())
+        msg = util_spec.scheduler_server_init_po_msg(
+            trans_options.get_partial_order_file()
+        )
         server_util.unix_socket_send_and_forget(unix_socket_file, msg)
 
     return preprocessed_asts
 
-##
-## This is the command line interface for the preprocessor
-##
-def main():
-    parser = argparse.ArgumentParser()
-    config.add_general_config_arguments(parser)
-
-    subparsers = parser.add_subparsers(help='sub-command help')
 
-    # create the parser for the "a" command
-    parser_pash = subparsers.add_parser('pash', help='Preprocess the script so that it can be run with PaSh')
-    config.add_common_arguments(parser_pash)
-    parser_pash.add_argument("input", help="the script to be preprocessed")
-    parser_pash.set_defaults(preprocess_mode='pash')
-
-    # create the parser for the "b" command
-    parser_spec = subparsers.add_parser('spec', help='Preprocess the script so that it can be run with speculation')
-    parser_spec.add_argument("input", help="the script to be preprocessed")
-
-    ## TODO: When we better integrate, this should be automatically set.
-    parser_spec.add_argument("partial_order_file", help="the file to store the partial order (currently just a sequence)")
-    parser_spec.set_defaults(preprocess_mode='spec')
-    
+def main():
+    parser = PreprocessorParser()
     args = parser.parse_args()
     config.set_config_globals_from_pash_args(args)
 
@@ -98,5 +95,6 @@ def main():
     preprocessed_shell_script = preprocess(args.input, args)
     print(preprocessed_shell_script)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/compiler/server_util.py b/compiler/server_util.py
index 0bee98d3e..c50db3a50 100644
--- a/compiler/server_util.py
+++ b/compiler/server_util.py
@@ -4,15 +4,17 @@
 import config
 from util import log
 
+
 def success_response(string):
-    return f'OK: {string}\n'
+    return f"OK: {string}\n"
 
 
 def error_response(string):
-    return f'ERROR: {string}\n'
+    return f"ERROR: {string}\n"
+
 
 class UnixPipeReader:
-    def __init__(self, in_filename, out_filename, blocking = True):
+    def __init__(self, in_filename, out_filename, blocking=True):
         self.in_filename = in_filename
         self.out_filename = out_filename
         self.buffer = ""
@@ -35,7 +37,6 @@ def get_next_cmd(self):
             cmd = self.get_next_cmd_aux()
         return cmd
 
-
     def get_next_cmd_aux(self):
         """
         This method return depends on the reading mode. In blocking mode this method will
@@ -46,13 +47,15 @@ def get_next_cmd_aux(self):
         input_buffer = ""
         if self.buffer:
             # Don't wait on fin if cmd buffer isn't empty
-            log("Reader buffer isn't empty. Using it instead of reading new data for the next command")
+            log(
+                "Reader buffer isn't empty. Using it instead of reading new data for the next command"
+            )
             input_buffer = self.buffer
         else:
             log("Reader buffer is empty. Reading new data from input fifo")
             if self.blocking:
                 with open(self.in_filename) as fin:
-                    # This seems to be necessary for reading the full data. 
+                    # This seems to be necessary for reading the full data.
                     # It seems like slower/smaller machines might not read the full data in one read
                     while True:
                         data = fin.read()
@@ -64,7 +67,7 @@ def get_next_cmd_aux(self):
 
         log("Input buffer:", input_buffer)
         if "\n" in input_buffer:
-            cmd, rest = input_buffer.split("\n", 1) # split on the first \n only
+            cmd, rest = input_buffer.split("\n", 1)  # split on the first \n only
             self.buffer = rest
         else:
             cmd = input_buffer
@@ -83,7 +86,6 @@ def respond(self, message):
         fout.flush()
         fout.close()
 
-
     ## This method doesn't do anything for unix pipe reader since we always read and write
     ## to and from the same fifos
     def close_last_connection(self):
@@ -99,18 +101,16 @@ def unix_socket_send_and_forget(socket_file: str, msg: str):
     try:
         sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
         sock.connect(socket_file)
-        msg_with_newline = msg + '\n'
-        byte_msg = msg_with_newline.encode('utf-8')
+        msg_with_newline = msg + "\n"
+        byte_msg = msg_with_newline.encode("utf-8")
         sock.sendall(byte_msg)
         data = sock.recv(config.SOCKET_BUF_SIZE)
-        str_data = data.decode('utf-8')
+        str_data = data.decode("utf-8")
         ## There should be no response on these messages
-        assert(len(str_data) == 0)
+        assert len(str_data) == 0
     finally:
         log("Sent message:", msg, "to server.", level=1)
         sock.close()
-    
-
 
 
 ## TODO: Instead of this, think of using a standard SocketServer
@@ -137,28 +137,27 @@ def __init__(self, socket_addr: str):
         log("SocketManager: Created socket")
 
         self.sock.bind(server_address)
-        log("SocketManager: Successfully bound to socket")    
+        log("SocketManager: Successfully bound to socket")
 
         ## TODO: Check if we need to configure the backlog
-        self.sock.listen()    
-        log("SocketManager: Listenting on socket")    
+        self.sock.listen()
+        log("SocketManager: Listenting on socket")
 
         ## Connection stack
         self.connections = []
-    
 
     def get_next_cmd(self):
         connection, client_address = self.sock.accept()
         data = connection.recv(self.buf_size)
 
         ## TODO: This could be avoided for efficiency
-        str_data = data.decode('utf-8')
+        str_data = data.decode("utf-8")
         log("Received data:", str_data)
         ## TODO: Lift this requirement if needed
         ##
         ## We need to ensure that we read a command at once or the command was empty (only relevant in the first invocation)
-        assert(str_data.endswith("\n") or str_data == "")
-        
+        assert str_data.endswith("\n") or str_data == ""
+
         self.connections.append(connection)
         return str_data
 
@@ -166,7 +165,7 @@ def get_next_cmd(self):
     ## In the case of the UnixPipes, we don't have any state management here
     ##   since all reads/writes go to/from the same fifos
     def respond(self, message):
-        bytes_message = message.encode('utf-8')
+        bytes_message = message.encode("utf-8")
         self.connections[-1].sendall(bytes_message)
         self.close_last_connection()
 
diff --git a/compiler/shell_ast/ast_to_ast.py b/compiler/shell_ast/ast_to_ast.py
index 7af0828c7..b1fe71054 100644
--- a/compiler/shell_ast/ast_to_ast.py
+++ b/compiler/shell_ast/ast_to_ast.py
@@ -1,146 +1,26 @@
-from enum import Enum
-import copy
-import pickle
+"""
+AST to AST transformation
 
-import config
 
-from env_var_names import *
-from shell_ast.ast_util import *
-from shasta.ast_node import ast_match
-from shasta.json_to_ast import to_ast_node
-from parse import from_ast_objects_to_shell
-from speculative import util_spec
-
-## There are two types of ast_to_ast transformations
-class TransformationType(Enum):
-    PASH = 'pash'
-    SPECULATIVE = 'spec'
-
-## Use this object to pass options inside the preprocessing
-## trasnformation.
-class TransformationState:
-    def __init__(self, mode: TransformationType):
-        self.mode = mode
-        self.node_counter = 0
-        self.loop_counter = 0
-        self.loop_contexts = []
-            
-    def get_mode(self):
-        return self.mode
-
-    ## Node id related
-    def get_next_id(self):
-        new_id = self.node_counter
-        self.node_counter += 1
-        return new_id
-    
-    def get_current_id(self):
-        return self.node_counter - 1
-    
-    def get_number_of_ids(self):
-        return self.node_counter
-
-    ## Loop id related
-    def get_next_loop_id(self):
-        new_id = self.loop_counter
-        self.loop_counter += 1
-        return new_id
-
-    def get_current_loop_context(self):
-        ## We want to copy that
-        return self.loop_contexts[:]
-
-    def get_current_loop_id(self):
-        if len(self.loop_contexts) == 0:
-            return None
-        else:
-            return self.loop_contexts[0]
-
-    def enter_loop(self):
-        new_loop_id = self.get_next_loop_id()
-        self.loop_contexts.insert(0, new_loop_id)
-        return new_loop_id
-
-    def exit_loop(self):
-        self.loop_contexts.pop(0)
-
-
-## TODO: Turn it into a Transformation State class, and make a subclass for
-##       each of the two transformations. It is important for it to be state, because
-##       it will need to be passed around while traversing the tree.
-class SpeculativeTransformationState(TransformationState):
-    def __init__(self, mode: TransformationType, po_file: str):
-        super().__init__(mode)
-        assert(self.mode is TransformationType.SPECULATIVE)
-        self.partial_order_file = po_file
-        self.partial_order_edges = []
-        self.partial_order_node_loop_contexts = {}
-
-    def get_partial_order_file(self):
-        assert(self.mode is TransformationType.SPECULATIVE)
-        return self.partial_order_file
-
-    def add_edge(self, from_id: int, to_id: int):
-        self.partial_order_edges.append((from_id, to_id))
-
-    def get_all_edges(self):
-        return self.partial_order_edges
-
-    def add_node_loop_context(self, node_id: int, loop_contexts):
-        self.partial_order_node_loop_contexts[node_id] = loop_contexts
-
-    def get_all_loop_contexts(self):
-        return self.partial_order_node_loop_contexts
-
-
-##
-## Preprocessing
-##
-
-## The preprocessing pass replaces all _candidate_ dataflow regions with
-## calls to PaSh's runtime to let it establish if they are actually dataflow
-## regions. The pass serializes all candidate dataflow regions:
-## - A list of ASTs if at the top level or
-## - an AST subtree if at a lower level
-##
-## The PaSh runtime then deserializes the(m, compiles them (if safe) and optimizes them.
-
-preprocess_cases = {
-    "Pipe": (lambda trans_options, last_object:
-             lambda ast_node: preprocess_node_pipe(ast_node, trans_options, last_object=last_object)),
-    "Command": (lambda trans_options, last_object:
-                lambda ast_node: preprocess_node_command(ast_node, trans_options, last_object=last_object)),
-    "Redir": (lambda trans_options, last_object:
-              lambda ast_node: preprocess_node_redir(ast_node, trans_options, last_object=last_object)),
-    "Background": (lambda trans_options, last_object:
-                   lambda ast_node: preprocess_node_background(ast_node, trans_options, last_object=last_object)),
-    "Subshell": (lambda trans_options, last_object:
-                   lambda ast_node: preprocess_node_subshell(ast_node, trans_options, last_object=last_object)),
-    "For": (lambda trans_options, last_object:
-            lambda ast_node: preprocess_node_for(ast_node, trans_options, last_object=last_object)),
-    "While": (lambda trans_options, last_object:
-              lambda ast_node: preprocess_node_while(ast_node, trans_options, last_object=last_object)),
-    "Defun": (lambda trans_options, last_object:
-              lambda ast_node: preprocess_node_defun(ast_node, trans_options, last_object=last_object)),
-    "Semi": (lambda trans_options, last_object:
-             lambda ast_node: preprocess_node_semi(ast_node, trans_options, last_object=last_object)),
-    "Or": (lambda trans_options, last_object:
-           lambda ast_node: preprocess_node_or(ast_node, trans_options, last_object=last_object)),
-    "And": (lambda trans_options, last_object:
-            lambda ast_node: preprocess_node_and(ast_node, trans_options, last_object=last_object)),
-    "Not": (lambda trans_options, last_object:
-            lambda ast_node: preprocess_node_not(ast_node, trans_options, last_object=last_object)),
-    "If": (lambda trans_options, last_object:
-            lambda ast_node: preprocess_node_if(ast_node, trans_options, last_object=last_object)),
-    "Case": (lambda trans_options, last_object:
-             lambda ast_node: preprocess_node_case(ast_node, trans_options, last_object=last_object))
-}
+The preprocessing pass replaces all _candidate_ dataflow regions with
+calls to PaSh's runtime to let it establish if they are actually dataflow
+regions. The pass serializes all candidate dataflow regions:
+- A list of ASTs if at the top level or
+- an AST subtree if at a lower level
 
+The PaSh runtime then deserializes the(m, compiles them (if safe) and optimizes them.
+"""
 
+from env_var_names import *
+from shell_ast.ast_util import *
+from shell_ast.preprocess_ast_cases import preprocess_node
+from shell_ast.transformation_options import AbstractTransformationState
 
-## Replace candidate dataflow AST regions with calls to PaSh's runtime.
-def replace_ast_regions(ast_objects, trans_options):
 
+def replace_ast_regions(ast_objects, trans_options: AbstractTransformationState):
+    """
+    Replace candidate dataflow AST regions with calls to PaSh's runtime.
+    """
     preprocessed_asts = []
     candidate_dataflow_region = []
     last_object = False
@@ -150,12 +30,12 @@ def replace_ast_regions(ast_objects, trans_options):
         ## If we are working on the last object we need to keep that in mind when replacing.
         ##
         ## The last df-region should not be executed in parallel no matter what (to not lose its exit code.)
-        if (i == len(ast_objects) - 1):
+        if i == len(ast_objects) - 1:
             # log("Last object")
             last_object = True
 
         ast, original_text, _linno_before, _linno_after = ast_object
-        assert(isinstance(ast, AstNode))
+        assert isinstance(ast, AstNode)
 
         ## Goals: This transformation can approximate in several directions.
         ##        1. Not replacing a candidate dataflow region.
@@ -174,487 +54,87 @@ def replace_ast_regions(ast_objects, trans_options):
         ##   then the second output is true.
         ## - If the next AST needs to be replaced too (e.g. if the current one is a background)
         ##   then the third output is true
-        preprocessed_ast_object = preprocess_node(ast, trans_options, last_object=last_object)
+        preprocessed_ast_object = preprocess_node(
+            ast, trans_options, last_object=last_object
+        )
         ## If the dataflow region is not maximal then it implies that the whole
         ## AST should be replaced.
-        assert(not preprocessed_ast_object.is_non_maximal() 
-               or preprocessed_ast_object.should_replace_whole_ast())
-        
+        assert (
+            not preprocessed_ast_object.is_non_maximal()
+            or preprocessed_ast_object.should_replace_whole_ast()
+        )
+
         ## If the whole AST needs to be replaced then it implies that
         ## something will be replaced
-        assert(not preprocessed_ast_object.should_replace_whole_ast() 
-               or preprocessed_ast_object.will_anything_be_replaced())
+        assert (
+            not preprocessed_ast_object.should_replace_whole_ast()
+            or preprocessed_ast_object.will_anything_be_replaced()
+        )
 
         ## If it isn't maximal then we just add it to the candidate
-        if(preprocessed_ast_object.is_non_maximal()):
-            candidate_dataflow_region.append((preprocessed_ast_object.ast,
-                                              original_text))
+        if preprocessed_ast_object.is_non_maximal():
+            candidate_dataflow_region.append(
+                (preprocessed_ast_object.ast, original_text)
+            )
         else:
             ## If the current candidate dataflow region is non-empty
             ## it means that the previous AST was in the background so
             ## the current one has to be included in the process no matter what
-            if (len(candidate_dataflow_region) > 0):
-                candidate_dataflow_region.append((preprocessed_ast_object.ast,
-                                                  original_text))
+            if len(candidate_dataflow_region) > 0:
+                candidate_dataflow_region.append(
+                    (preprocessed_ast_object.ast, original_text)
+                )
                 ## Since the current one is maximal (or not wholy replaced)
                 ## we close the candidate.
-                dataflow_region_asts, dataflow_region_lines = unzip(candidate_dataflow_region)
+                dataflow_region_asts, dataflow_region_lines = unzip(
+                    candidate_dataflow_region
+                )
                 dataflow_region_text = join_original_text_lines(dataflow_region_lines)
-                replaced_ast = replace_df_region(dataflow_region_asts, trans_options,
-                                                 ast_text=dataflow_region_text, disable_parallel_pipelines=last_object)
+                replaced_ast = trans_options.replace_df_region(
+                    dataflow_region_asts,
+                    ast_text=dataflow_region_text,
+                    disable_parallel_pipelines=last_object,
+                )
                 candidate_dataflow_region = []
                 preprocessed_asts.append(replaced_ast)
             else:
-                if(preprocessed_ast_object.should_replace_whole_ast()):
-                    replaced_ast = replace_df_region([preprocessed_ast_object.ast], trans_options,
-                                                     ast_text=original_text, disable_parallel_pipelines=last_object)
+                if preprocessed_ast_object.should_replace_whole_ast():
+                    replaced_ast = trans_options.replace_df_region(
+                        [preprocessed_ast_object.ast],
+                        ast_text=original_text,
+                        disable_parallel_pipelines=last_object,
+                    )
                     preprocessed_asts.append(replaced_ast)
                 else:
                     ## In this case, it is possible that no replacement happened,
                     ## meaning that we can simply return the original parsed text as it was.
-                    if(preprocessed_ast_object.will_anything_be_replaced() or original_text is None):
+                    if (
+                        preprocessed_ast_object.will_anything_be_replaced()
+                        or original_text is None
+                    ):
                         preprocessed_asts.append(preprocessed_ast_object.ast)
                     else:
                         preprocessed_asts.append(UnparsedScript(original_text))
 
     ## Close the final dataflow region
-    if(len(candidate_dataflow_region) > 0):
+    if len(candidate_dataflow_region) > 0:
         dataflow_region_asts, dataflow_region_lines = unzip(candidate_dataflow_region)
         dataflow_region_text = join_original_text_lines(dataflow_region_lines)
-        replaced_ast = replace_df_region(dataflow_region_asts, trans_options,
-                                         ast_text=dataflow_region_text, disable_parallel_pipelines=True)
+        replaced_ast = trans_options.replace_df_region(
+            dataflow_region_asts,
+            ast_text=dataflow_region_text,
+            disable_parallel_pipelines=True,
+        )
         candidate_dataflow_region = []
         preprocessed_asts.append(replaced_ast)
 
     return preprocessed_asts
 
-## This function joins original unparsed shell source in a safe way 
+
+## This function joins original unparsed shell source in a safe way
 ##   so as to deal with the case where some of the text is None (e.g., in case of stdin parsing).
 def join_original_text_lines(shell_source_lines_or_none):
     if any([text_or_none is None for text_or_none in shell_source_lines_or_none]):
         return None
     else:
         return "\n".join(shell_source_lines_or_none)
-
-def preprocess_node(ast_object, trans_options, last_object=False):
-    global preprocess_cases
-    return ast_match(ast_object, preprocess_cases, trans_options, last_object)
-
-## This preprocesses the AST node and also replaces it if it needs replacement .
-## It is called by constructs that cannot be included in a dataflow region.
-def preprocess_close_node(ast_object, trans_options, last_object=False):
-    preprocessed_ast_object = preprocess_node(ast_object, trans_options, last_object=last_object)
-    preprocessed_ast = preprocessed_ast_object.ast
-    should_replace_whole_ast = preprocessed_ast_object.should_replace_whole_ast()
-    if(should_replace_whole_ast):
-        final_ast = replace_df_region([preprocessed_ast], trans_options,
-                                      disable_parallel_pipelines=last_object)
-        something_replaced = True
-    else:
-        final_ast = preprocessed_ast
-        something_replaced = preprocessed_ast_object.will_anything_be_replaced()
-    return final_ast, something_replaced
-
-def preprocess_node_pipe(ast_node, trans_options, last_object=False):
-    ## A pipeline is *always* a candidate dataflow region.
-    ## Q: Is that true?
-
-    ## TODO: Preprocess the internals of the pipe to allow
-    ##       for mutually recursive calls to PaSh.
-    ##
-    ##       For example, if a command in the pipe has a command substitution
-    ##       in one of its arguments then we would like to call our runtime
-    ##       there instead of
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=True,
-                                              non_maximal=ast_node.is_background,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-## TODO: Complete this
-def preprocess_node_command(ast_node, trans_options, last_object=False):
-    ## TODO: Preprocess the internals of the pipe to allow
-    ##       for mutually recursive calls to PaSh.
-    ##
-    ##       For example, if a command in the pipe has a command substitution
-    ##       in one of its arguments then we would like to call our runtime
-    ##       there instead of
-
-    ## If there are no arguments, the command is just an
-    ## assignment (Q: or just redirections?)
-    if(len(ast_node.arguments) == 0):
-        preprocessed_ast_object = PreprocessedAST(ast_node,
-                                                  replace_whole=False,
-                                                  non_maximal=False,
-                                                  something_replaced=False,
-                                                  last_ast=last_object)
-        return preprocessed_ast_object
-
-    ## This means we have a command. Commands are always candidate dataflow
-    ## regions.
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=True,
-                                              non_maximal=False,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-# Background of (linno * t * redirection list) 
-## TODO: It might be possible to actually not close the inner node but rather apply the redirections on it
-def preprocess_node_redir(ast_node, trans_options, last_object=False):
-    preprocessed_node, something_replaced = preprocess_close_node(ast_node.node, trans_options,
-                                                                  last_object=last_object)
-    ast_node.node = preprocessed_node
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=something_replaced,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-## TODO: Is that correct? Also, this should probably affect `semi`, `and`, and `or`
-def preprocess_node_background(ast_node, trans_options, last_object=False):
-    ## A background node is *always* a candidate dataflow region.
-    ## Q: Is that true?
-
-    ## TODO: Preprocess the internals of the background to allow
-    ##       for mutually recursive calls to PaSh.
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=True,
-                                              non_maximal=True,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-## TODO: We can actually preprocess the underlying node and then
-##       return its characteristics above. However, we would need
-##       to add a field in the IR that a node runs in a subshell
-##       (which would have implications on how the backend outputs it).
-##
-##       e.g. a subshell node should also be output as a subshell in the backend.
-## FIXME: This might not just be suboptimal, but also wrong.
-def preprocess_node_subshell(ast_node, trans_options, last_object=False):
-    preprocessed_body, something_replaced = preprocess_close_node(ast_node.body, trans_options,
-                                                                  last_object=last_object)
-    ast_node.body = preprocessed_body
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=something_replaced,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-## TODO: For all of the constructs below, think whether we are being too conservative
-
-## TODO: This is not efficient at all since it calls the PaSh runtime everytime the loop is entered.
-##       We have to find a way to improve that.
-def preprocess_node_for(ast_node, trans_options, last_object=False):
-    ## If we are in a loop, we push the loop identifier into the loop context
-    loop_id = trans_options.enter_loop()
-    preprocessed_body, something_replaced = preprocess_close_node(ast_node.body, trans_options, last_object=last_object)
-
-    ## TODO: Then send this iteration identifier when talking to the spec scheduler
-    ## TODO: After running checks put this behind a check to only run under speculation
-
-    ## Create a new variable that tracks loop iterations
-    var_name = loop_iter_var(loop_id)
-    export_node = make_export_var_constant_string(var_name, '0')
-    increment_node = make_increment_var(var_name)
-
-    ## Also store the whole sequence of loop iters in a file
-    all_loop_ids = trans_options.get_current_loop_context()
-
-    ## export pash_loop_iters="$pash_loop_XXX_iter $pash_loop_YYY_iter ..."
-    save_loop_iters_node = export_pash_loop_iters_for_current_context(all_loop_ids)
-
-    ## Prepend the increment in the body
-    ast_node.body = make_typed_semi_sequence(
-        [to_ast_node(increment_node), 
-         to_ast_node(save_loop_iters_node), 
-         copy.deepcopy(preprocessed_body)])
-
-    ## We pop the loop identifier from the loop context.
-    ##
-    ## KK 2023-04-27: Could this exit happen before the replacement leading to wrong 
-    ##     results? I think not because we use the _close_node preprocessing variant.
-    ##     A similar issue might happen for while
-    trans_options.exit_loop()
-
-    ## reset the loop iters after we exit the loop
-    out_of_loop_loop_ids = trans_options.get_current_loop_context()
-    reset_loop_iters_node = export_pash_loop_iters_for_current_context(out_of_loop_loop_ids)
-
-    ## Prepend the export in front of the loop
-    # new_node = ast_node
-    new_node = make_typed_semi_sequence(
-        [to_ast_node(export_node), 
-         ast_node, 
-         to_ast_node(reset_loop_iters_node)])
-    # print(new_node)
-
-    preprocessed_ast_object = PreprocessedAST(new_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=something_replaced,
-                                              last_ast=last_object)
-    
-    return preprocessed_ast_object
-
-def preprocess_node_while(ast_node, trans_options, last_object=False):
-    ## If we are in a loop, we push the loop identifier into the loop context
-    trans_options.enter_loop()
-
-    preprocessed_test, sth_replaced_test = preprocess_close_node(ast_node.test, trans_options, last_object=last_object)
-    preprocessed_body, sth_replaced_body = preprocess_close_node(ast_node.body, trans_options, last_object=last_object)
-    ast_node.test = preprocessed_test
-    ast_node.body = preprocessed_body
-    something_replaced = sth_replaced_test or sth_replaced_body
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=something_replaced,
-                                              last_ast=last_object)
-    
-    ## We pop the loop identifier from the loop context.
-    trans_options.exit_loop()
-    return preprocessed_ast_object
-
-## This is the same as the one for `For`
-def preprocess_node_defun(ast_node, trans_options, last_object=False):
-    ## TODO: For now we don't want to compile function bodies
-    # preprocessed_body = preprocess_close_node(ast_node.body)
-    # ast_node.body = preprocessed_body
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=False,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-## TODO: If the preprocessed is not maximal we actually need to combine it with the one on the right.
-def preprocess_node_semi(ast_node, trans_options, last_object=False):
-    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config)
-    ##
-    ## TODO: Is it valid that only the right one is considered the last command?
-    preprocessed_left, sth_replaced_left = preprocess_close_node(ast_node.left_operand, trans_options, last_object=False)
-    preprocessed_right, sth_replaced_right = preprocess_close_node(ast_node.right_operand, trans_options, last_object=last_object)
-    ast_node.left_operand = preprocessed_left
-    ast_node.right_operand = preprocessed_right
-    sth_replaced = sth_replaced_left or sth_replaced_right
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=sth_replaced,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-## TODO: Make sure that what is inside an `&&`, `||`, `!` (and others) does not run in parallel_pipelines 
-##       since we need its exit code.
-def preprocess_node_and(ast_node, trans_options, last_object=False):
-    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config)
-    preprocessed_left, sth_replaced_left = preprocess_close_node(ast_node.left_operand, trans_options, last_object=last_object)
-    preprocessed_right, sth_replaced_right = preprocess_close_node(ast_node.right_operand, trans_options, last_object=last_object)
-    ast_node.left_operand = preprocessed_left
-    ast_node.right_operand = preprocessed_right
-    sth_replaced = sth_replaced_left or sth_replaced_right
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=sth_replaced,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-def preprocess_node_or(ast_node, trans_options, last_object=False):
-    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config)
-    preprocessed_left, sth_replaced_left = preprocess_close_node(ast_node.left_operand, trans_options, last_object=last_object)
-    preprocessed_right, sth_replaced_right = preprocess_close_node(ast_node.right_operand, trans_options, last_object=last_object)
-    ast_node.left_operand = preprocessed_left
-    ast_node.right_operand = preprocessed_right
-    sth_replaced = sth_replaced_left or sth_replaced_right
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=sth_replaced,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-def preprocess_node_not(ast_node, trans_options, last_object=False):
-    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left)
-    preprocessed_body, sth_replaced = preprocess_close_node(ast_node.body, trans_options, last_object=last_object)
-    ast_node.body = preprocessed_body
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=sth_replaced,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-
-def preprocess_node_if(ast_node, trans_options, last_object=False):
-    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config)
-    preprocessed_cond, sth_replaced_cond = preprocess_close_node(ast_node.cond, trans_options, last_object=last_object)
-    preprocessed_then, sth_replaced_then = preprocess_close_node(ast_node.then_b, trans_options, last_object=last_object)
-    preprocessed_else, sth_replaced_else = preprocess_close_node(ast_node.else_b, trans_options, last_object=last_object)
-    ast_node.cond = preprocessed_cond
-    ast_node.then_b = preprocessed_then
-    ast_node.else_b = preprocessed_else
-    sth_replaced = sth_replaced_cond or sth_replaced_then or sth_replaced_else
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=sth_replaced,
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-def preprocess_case(case, trans_options, last_object=False):
-    preprocessed_body, sth_replaced = preprocess_close_node(case["cbody"], trans_options, last_object=last_object)
-    case["cbody"] = preprocessed_body
-    return case, sth_replaced
-
-def preprocess_node_case(ast_node, trans_options, last_object=False):
-    preprocessed_cases_replaced = [preprocess_case(case, trans_options, last_object=last_object) for case in ast_node.cases]
-    preprocessed_cases, sth_replaced_cases = list(zip(*preprocessed_cases_replaced))
-    ast_node.cases = preprocessed_cases
-    preprocessed_ast_object = PreprocessedAST(ast_node,
-                                              replace_whole=False,
-                                              non_maximal=False,
-                                              something_replaced=any(sth_replaced_cases),
-                                              last_ast=last_object)
-    return preprocessed_ast_object
-
-
-## TODO: I am a little bit confused about how compilation happens.
-##       Does it happen bottom up or top down: i.e. when we first encounter an occurence
-##       do we recurse in it and then compile from the leaf, or just compile the surface?
-
-
-
-## Replaces IR subtrees with a command that calls them (more
-## precisely, a command that calls a python script to call them).
-##
-## Note: The traversal that replace_irs does, is exactly the same as
-## the one that is done by compile_node. Both of these functions
-## transform nodes of type t to something else.
-##
-## TODO: For now this just replaces the IRs starting from the ourside
-## one first, but it should start from the bottom up to handle
-## recursive IRs.
-
-## This function serializes a candidate df_region in a file, and in its place,
-## it adds a command that calls our distribution planner with the name of the
-## saved file.
-##
-## If we are need to disable parallel pipelines, e.g., if we are in the context of an if,
-## or if we are in the end of a script, then we set a variable.
-def replace_df_region(asts, trans_options, disable_parallel_pipelines=False, ast_text=None) -> AstNode:
-    transformation_mode = trans_options.get_mode()
-    if transformation_mode is TransformationType.PASH:
-        ir_filename = ptempfile()
-
-        ## Serialize the node in a file
-        with open(ir_filename, "wb") as ir_file:
-            pickle.dump(asts, ir_file)
-
-        ## Serialize the candidate df_region asts back to shell
-        ## so that the sequential script can be run in parallel to the compilation.
-        sequential_script_file_name = ptempfile()
-        text_to_output = get_shell_from_ast(asts, ast_text=ast_text)
-        ## However, if we have the original ast text, then we can simply output that.
-        with open(sequential_script_file_name, "w") as script_file:
-            script_file.write(text_to_output)
-        replaced_node = make_call_to_pash_runtime(ir_filename, sequential_script_file_name, disable_parallel_pipelines)
-    elif transformation_mode is TransformationType.SPECULATIVE:
-        text_to_output = get_shell_from_ast(asts, ast_text=ast_text)
-        ## Generate an ID
-        df_region_id = trans_options.get_next_id()
-
-        ## Get the current loop id and save it so that the runtime knows
-        ## which loop it is in.
-        loop_id = trans_options.get_current_loop_id()
-
-        ## Determine its predecessors
-        ## TODO: To make this properly work, we should keep some state
-        ##       in the AST traversal to be able to determine predecessors.
-        if df_region_id == 0:
-            predecessors = []
-        else:
-            predecessors = [df_region_id - 1]
-        ## Write to a file indexed by its ID
-        util_spec.save_df_region(text_to_output, trans_options, df_region_id, predecessors)
-        ## TODO: Add an entry point to spec through normal PaSh
-        replaced_node = make_call_to_spec_runtime(df_region_id, loop_id)
-    else:
-        ## Unreachable
-        assert(False)
-
-    return to_ast_node(replaced_node)
-
-
-def get_shell_from_ast(asts, ast_text=None) -> str:
-    ## If we don't have the original ast text, we need to unparse the ast
-    if (ast_text is None):
-        text_to_output = from_ast_objects_to_shell(asts)
-    else:
-        text_to_output = ast_text
-    return text_to_output
-
-
-##
-## Code that constructs the preprocessed ASTs
-##
-
-
-## This function makes a command that calls the pash runtime
-## together with the name of the file containing an IR. Then the
-## pash runtime should read from this file and continue
-## execution.
-##
-## TODO: At the moment this is written in python but it is in essense a simple shell script.
-##       Is it possible to make it be a simple string instead of manually creating the AST?
-##
-## (MAYBE) TODO: The way I did it, is by calling the parser once, and seeing
-## what it returns. Maybe it would make sense to call the parser on
-## the fly to have a cleaner implementation here?
-def make_call_to_pash_runtime(ir_filename, sequential_script_file_name,
-                              disable_parallel_pipelines) -> AstNode:
-
-    ## Disable parallel pipelines if we are in the last command of the script.
-    ## ```
-    ## pash_disable_parallel_pipelines=1
-    ## ```
-    if(disable_parallel_pipelines):
-        assignments = [["pash_disable_parallel_pipelines",
-                        string_to_argument("1")]]
-    else:
-        assignments = [["pash_disable_parallel_pipelines",
-                        string_to_argument("0")]]
-    assignments.append(["pash_sequential_script_file", 
-                        string_to_argument(sequential_script_file_name)])
-    assignments.append(["pash_input_ir_file", 
-                        string_to_argument(ir_filename)])
-
-    ## Call the runtime
-    arguments = [string_to_argument("source"),
-                 string_to_argument(config.RUNTIME_EXECUTABLE)]
-    runtime_node = make_command(arguments,
-                                assignments=assignments)
-    return runtime_node
-
-## TODO: Make that an actual call to the spec runtime
-def make_call_to_spec_runtime(command_id: int, loop_id) -> AstNode:
-    assignments = [["pash_spec_command_id",
-                        string_to_argument(str(command_id))]]
-    if loop_id is None:
-        loop_id_str = ""
-    else:
-        loop_id_str = str(loop_id)
-    
-    assignments.append(["pash_spec_loop_id",
-                        string_to_argument(loop_id_str)])
-
-    ## Call the runtime
-    arguments = [string_to_argument("source"),
-                 string_to_argument(config.RUNTIME_EXECUTABLE)]
-    ## Pass all relevant argument to the planner
-    runtime_node = make_command(arguments,
-                                assignments=assignments)
-
-    return runtime_node
diff --git a/compiler/shell_ast/ast_util.py b/compiler/shell_ast/ast_util.py
index 57529904f..4f695328c 100644
--- a/compiler/shell_ast/ast_util.py
+++ b/compiler/shell_ast/ast_util.py
@@ -1,4 +1,3 @@
-
 from env_var_names import *
 from shasta.ast_node import *
 from shasta.json_to_ast import *
@@ -7,8 +6,10 @@
 
 ## This class is used by the preprocessor in ast_to_ir
 class PreprocessedAST:
-    def __init__(self, ast, replace_whole, non_maximal, something_replaced=True, last_ast=False):
-        assert(isinstance(ast, AstNode))
+    def __init__(
+        self, ast, replace_whole, non_maximal, something_replaced=True, last_ast=False
+    ):
+        assert isinstance(ast, AstNode)
         self.ast = ast
         self.replace_whole = replace_whole
         self.non_maximal = non_maximal
@@ -20,13 +21,14 @@ def should_replace_whole_ast(self):
 
     def is_non_maximal(self):
         return self.non_maximal
-    
+
     def will_anything_be_replaced(self):
         return self.something_replaced
 
     def is_last_ast(self):
         return self.last_ast
 
+
 ## This class represents text that was not modified at all by preprocessing, and therefore does not
 ## need to be unparsed.
 class UnparsedScript:
@@ -38,99 +40,127 @@ def __init__(self, text):
 ## Pattern matching for the AST
 ##
 
+
 def check_if_ast_is_supported(construct, arguments, **kwargs):
     return
 
+
 def format_args(args):
     formatted_args = [format_arg_chars(arg_chars) for arg_chars in args]
     return formatted_args
 
+
 def format_arg_chars(arg_chars):
     chars = [format_arg_char(arg_char) for arg_char in arg_chars]
     return "".join(chars)
 
+
 def format_arg_char(arg_char: ArgChar) -> str:
     return arg_char.format()
 
+
 def string_to_carg_char_list(string: str) -> "list[CArgChar]":
     ret = [CArgChar(ord(char)) for char in string]
     return ret
 
+
 def string_to_arguments(string):
     return [string_to_argument(word) for word in string.split(" ")]
 
+
 def string_to_argument(string):
     ret = [char_to_arg_char(char) for char in string]
     return ret
 
+
 def concat_arguments(arg1, arg2):
     ## Arguments are simply `arg_char list` and therefore can just be concatenated
     return arg1 + arg2
 
+
 ## FIXME: This is certainly not complete. It is used to generate the
 ## AST for the call to the distributed planner. It only handles simple
 ## characters
 def char_to_arg_char(char):
-    return ['C' , ord(char)]
+    return ["C", ord(char)]
+
 
 def escaped_char(char):
-    return ['E' , ord(char)]
+    return ["E", ord(char)]
+
 
 def standard_var_ast(string):
     return make_kv("V", ["Normal", False, string, []])
 
+
 def make_arith(arg):
-     return make_kv("A", arg)
+    return make_kv("A", arg)
+
 
 def make_quoted_variable(string):
     return make_kv("Q", [standard_var_ast(string)])
 
+
 def quote_arg(arg):
     return make_kv("Q", arg)
 
+
 def redir_append_stderr_to_string_file(string):
-    return make_kv("File",["Append",2,string_to_argument(string)])
+    return make_kv("File", ["Append", 2, string_to_argument(string)])
+
 
 def redir_stdout_to_file(arg):
-    return make_kv("File",["To", 1, arg])
+    return make_kv("File", ["To", 1, arg])
+
 
 def redir_file_to_stdin(arg):
-    return make_kv("File",["From", 0, arg])
+    return make_kv("File", ["From", 0, arg])
 
-def make_background(body, redirections=[]):
+
+def make_background(body, redirections=None):
+    redirections = [] if redirections is None else redirections
     lineno = 0
     node = make_kv("Background", [lineno, body, redirections])
     return node
 
+
 def make_backquote(node):
     node = make_kv("B", node)
     return node
 
-def make_subshell(body, redirections=[]):
+
+def make_subshell(body, redirections=None):
+    redirections = [] if redirections is None else redirections
     lineno = 0
     node = make_kv("Subshell", [lineno, body, redirections])
     return node
 
-def make_command(arguments, redirections=[], assignments=[]):
+
+def make_command(arguments, redirections=None, assignments=None):
+    redirections = [] if redirections is None else redirections
+    assignments = [] if assignments is None else assignments
     lineno = 0
     node = make_kv("Command", [lineno, assignments, arguments, redirections])
     return node
 
+
 def make_nop():
     return make_command([string_to_argument(":")])
 
+
 def make_assignment(var, value):
     lineno = 0
-    assignment=(var, value)
-    assignments=[assignment]
+    assignment = (var, value)
+    assignments = [assignment]
     node = make_kv("Command", [lineno, assignments, [], []])
     return node
 
+
 def make_semi_sequence(asts):
-    if(len(asts) == 0):
+    if len(asts) == 0:
         return make_nop()
 
-    if(len(asts) == 1):
+    if len(asts) == 1:
         return asts[0]
     else:
         acc = asts[-1]
@@ -140,35 +170,41 @@ def make_semi_sequence(asts):
             acc = make_kv("Semi", [ast, acc])
         return acc
 
+
 def make_defun(name, body):
     lineno = 0
     node = make_kv("Defun", [lineno, name, body])
     return node
 
+
 ##
 ## Make some nodes
 ##
 
+
 def make_export_var_constant_string(var_name: str, value: str):
     node = make_export_var(var_name, string_to_argument(value))
     return node
 
+
 def make_export_var(var_name: str, arg_char_list):
     ## An argument is an arg_char_list
-    arg1 = string_to_argument(f'{var_name}=')
-    arguments = [string_to_argument("export"),
-                 concat_arguments(arg1, arg_char_list)]
+    arg1 = string_to_argument(f"{var_name}=")
+    arguments = [string_to_argument("export"), concat_arguments(arg1, arg_char_list)]
     ## Pass all relevant argument to the planner
     node = make_command(arguments)
     return node
 
+
 def export_pash_loop_iters_for_current_context(all_loop_ids: "list[int]"):
     if len(all_loop_ids) > 0:
         iter_var_names = [loop_iter_var(loop_id) for loop_id in all_loop_ids]
-        iter_vars = [standard_var_ast(iter_var_name) for iter_var_name in iter_var_names]
+        iter_vars = [
+            standard_var_ast(iter_var_name) for iter_var_name in iter_var_names
+        ]
         concatted_vars = [iter_vars[0]]
         for iter_var in iter_vars[1:]:
-            concatted_vars.append(char_to_arg_char('-'))
+            concatted_vars.append(char_to_arg_char("-"))
             concatted_vars.append(iter_var)
         quoted_vars = [quote_arg(concatted_vars)]
     else:
@@ -182,46 +218,46 @@ def export_pash_loop_iters_for_current_context(all_loop_ids: "list[int]"):
 
 def make_unset_var(var_name: str):
     ## An argument is an arg_char_list
-    arguments = [string_to_argument("unset"),
-                 string_to_argument(var_name)]
+    arguments = [string_to_argument("unset"), string_to_argument(var_name)]
     ## Pass all relevant argument to the planner
     node = make_command(arguments)
     return node
 
+
 def make_increment_var(var_name: str):
-    arg = string_to_argument(f'{var_name}+1')
+    arg = string_to_argument(f"{var_name}+1")
     arith_expr = make_arith(arg)
-    assignments = [[var_name,
-                    [arith_expr]]]
+    assignments = [[var_name, [arith_expr]]]
     node = make_command([], assignments=assignments)
     return node
 
+
 def make_echo_ast(argument, var_file_path):
     nodes = []
     ## Source variables if present
-    if(not var_file_path is None):
+    if not var_file_path is None:
         arguments = [string_to_argument("source"), string_to_argument(var_file_path)]
 
         line_number = 0
-        node = make_kv('Command', [line_number, [], arguments, []])
+        node = make_kv("Command", [line_number, [], arguments, []])
         nodes.append(node)
 
     ## Reset the exit status
-    variable_arg = make_kv('V', ['Normal', "false", 'pash_previous_exit_status', []])
+    variable_arg = make_kv("V", ["Normal", "false", "pash_previous_exit_status", []])
     arguments = [string_to_argument("exit"), [variable_arg]]
-    exit_node = make_kv('Command', [0, [], arguments, []])
-    node = make_kv('Subshell', [0, exit_node, []])
+    exit_node = make_kv("Command", [0, [], arguments, []])
+    node = make_kv("Subshell", [0, exit_node, []])
     nodes.append(node)
 
     ## Reset the input arguments
-    variable_arg = make_kv('V', ['Normal', "false", 'pash_input_args', []])
+    variable_arg = make_kv("V", ["Normal", "false", "pash_input_args", []])
     arguments = [string_to_argument("set"), string_to_argument("--"), [variable_arg]]
-    set_node = make_kv('Command', [0, [], arguments, []])
+    set_node = make_kv("Command", [0, [], arguments, []])
     nodes.append(set_node)
 
     arguments = [string_to_argument("echo"), string_to_argument("-n"), argument]
 
     line_number = 0
-    node = make_kv('Command', [line_number, [], arguments, []])
+    node = make_kv("Command", [line_number, [], arguments, []])
     nodes.append(node)
-    return nodes
\ No newline at end of file
+    return nodes
diff --git a/compiler/shell_ast/preprocess_ast_cases.py b/compiler/shell_ast/preprocess_ast_cases.py
new file mode 100644
index 000000000..3ba32f584
--- /dev/null
+++ b/compiler/shell_ast/preprocess_ast_cases.py
@@ -0,0 +1,479 @@
+import copy
+
+from shell_ast.ast_util import *
+from shell_ast.transformation_options import AbstractTransformationState
+from shasta.ast_node import AstNode
+
+
+def preprocess_node(
+    ast_node: AstNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool,
+) -> PreprocessedAST:
+    """
+    Preprocesses an AstNode. Given an AstNode of any type, it will appropriately
+    dispatch a preprocessor for the specificy node type
+
+    Parameters:
+        ast_node (AstNode): The AstNode to parse
+        trans_options (AbstractTransformationState):
+            A concrete transformation state instance corresponding to the output target
+        last_object (bool): Flag for whether this is the last AstNode
+
+    Returns:
+        PreprocessedAst: the preprocessed version of the original AstNode
+
+    Note:
+        For preprocess_node to dispatch the right function, the function being
+        called must follow the convention "preprocess_node_<node_name>"
+    """
+    node_name = type(ast_node).NodeName.lower()
+    preprocess_fn = globals().get(f"preprocess_node_{node_name}")
+    if preprocess_fn is None:
+        raise KeyError(f"Could not find appropriate preprocessor for {node_name}")
+    return preprocess_fn(ast_node, trans_options, last_object)
+
+
+## This preprocesses the AST node and also replaces it if it needs replacement .
+## It is called by constructs that cannot be included in a dataflow region.
+def preprocess_close_node(
+    ast_node: AstNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    preprocessed_ast_object = preprocess_node(
+        ast_node, trans_options, last_object=last_object
+    )
+    preprocessed_ast = preprocessed_ast_object.ast
+    should_replace_whole_ast = preprocessed_ast_object.should_replace_whole_ast()
+    if should_replace_whole_ast:
+        final_ast = trans_options.replace_df_region(
+            asts=[preprocessed_ast], disable_parallel_pipelines=last_object
+        )
+        something_replaced = True
+    else:
+        final_ast = preprocessed_ast
+        something_replaced = preprocessed_ast_object.will_anything_be_replaced()
+    return final_ast, something_replaced
+
+
+## TODO: I am a little bit confused about how compilation happens.
+##       Does it happen bottom up or top down: i.e. when we first encounter an occurence
+##       do we recurse in it and then compile from the leaf, or just compile the surface?
+
+## Replaces IR subtrees with a command that calls them (more
+## precisely, a command that calls a python script to call them).
+##
+## Note: The traversal that replace_irs does, is exactly the same as
+## the one that is done by compile_node. Both of these functions
+## transform nodes of type t to something else.
+##
+## TODO: For now this just replaces the IRs starting from the ourside
+## one first, but it should start from the bottom up to handle
+## recursive IRs.
+
+## This function serializes a candidate df_region in a file, and in its place,
+## it adds a command that calls our distribution planner with the name of the
+## saved file.
+##
+## If we are need to disable parallel pipelines, e.g., if we are in the context of an if,
+## or if we are in the end of a script, then we set a variable.
+
+
+def preprocess_node_pipe(
+    ast_node: PipeNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    ## A pipeline is *always* a candidate dataflow region.
+    ## Q: Is that true?
+
+    ## TODO: Preprocess the internals of the pipe to allow
+    ##       for mutually recursive calls to PaSh.
+    ##
+    ##       For example, if a command in the pipe has a command substitution
+    ##       in one of its arguments then we would like to call our runtime
+    ##       there instead of
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=True,
+        non_maximal=ast_node.is_background,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+## TODO: Complete this
+def preprocess_node_command(
+    ast_node: CommandNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    ## TODO: Preprocess the internals of the pipe to allow
+    ##       for mutually recursive calls to PaSh.
+    ##
+    ##       For example, if a command in the pipe has a command substitution
+    ##       in one of its arguments then we would like to call our runtime
+    ##       there instead of
+
+    ## If there are no arguments, the command is just an
+    ## assignment (Q: or just redirections?)
+    if len(ast_node.arguments) == 0:
+        preprocessed_ast_object = PreprocessedAST(
+            ast_node,
+            replace_whole=False,
+            non_maximal=False,
+            something_replaced=False,
+            last_ast=last_object,
+        )
+        return preprocessed_ast_object
+
+    ## This means we have a command. Commands are always candidate dataflow
+    ## regions.
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node, replace_whole=True, non_maximal=False, last_ast=last_object
+    )
+    return preprocessed_ast_object
+
+
+# Background of (linno * t * redirection list)
+## TODO: It might be possible to actually not close the inner node but rather apply the redirections on it
+def preprocess_node_redir(
+    ast_node: RedirNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    preprocessed_node, something_replaced = preprocess_close_node(
+        ast_node.node, trans_options, last_object=last_object
+    )
+    ast_node.node = preprocessed_node
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=something_replaced,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+## TODO: Is that correct? Also, this should probably affect `semi`, `and`, and `or`
+def preprocess_node_background(
+    ast_node: BackgroundNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    ## A background node is *always* a candidate dataflow region.
+    ## Q: Is that true?
+
+    ## TODO: Preprocess the internals of the background to allow
+    ##       for mutually recursive calls to PaSh.
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node, replace_whole=True, non_maximal=True, last_ast=last_object
+    )
+    return preprocessed_ast_object
+
+
+## TODO: We can actually preprocess the underlying node and then
+##       return its characteristics above. However, we would need
+##       to add a field in the IR that a node runs in a subshell
+##       (which would have implications on how the backend outputs it).
+##
+##       e.g. a subshell node should also be output as a subshell in the backend.
+## FIXME: This might not just be suboptimal, but also wrong.
+def preprocess_node_subshell(
+    ast_node: SubshellNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    preprocessed_body, something_replaced = preprocess_close_node(
+        ast_node.body, trans_options, last_object=last_object
+    )
+    ast_node.body = preprocessed_body
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=something_replaced,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+## TODO: For all of the constructs below, think whether we are being too conservative
+
+
+## TODO: This is not efficient at all since it calls the PaSh runtime everytime the loop is entered.
+##       We have to find a way to improve that.
+def preprocess_node_for(
+    ast_node: ForNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    ## If we are in a loop, we push the loop identifier into the loop context
+    loop_id = trans_options.enter_loop()
+    preprocessed_body, something_replaced = preprocess_close_node(
+        ast_node.body, trans_options, last_object=last_object
+    )
+
+    ## TODO: Then send this iteration identifier when talking to the spec scheduler
+    ## TODO: After running checks put this behind a check to only run under speculation
+
+    ## Create a new variable that tracks loop iterations
+    var_name = loop_iter_var(loop_id)
+    export_node = make_export_var_constant_string(var_name, "0")
+    increment_node = make_increment_var(var_name)
+
+    ## Also store the whole sequence of loop iters in a file
+    all_loop_ids = trans_options.get_current_loop_context()
+
+    ## export pash_loop_iters="$pash_loop_XXX_iter $pash_loop_YYY_iter ..."
+    save_loop_iters_node = export_pash_loop_iters_for_current_context(all_loop_ids)
+
+    ## Prepend the increment in the body
+    ast_node.body = make_typed_semi_sequence(
+        [
+            to_ast_node(increment_node),
+            to_ast_node(save_loop_iters_node),
+            copy.deepcopy(preprocessed_body),
+        ]
+    )
+
+    ## We pop the loop identifier from the loop context.
+    ##
+    ## KK 2023-04-27: Could this exit happen before the replacement leading to wrong
+    ##     results? I think not because we use the _close_node preprocessing variant.
+    ##     A similar issue might happen for while
+    trans_options.exit_loop()
+
+    ## reset the loop iters after we exit the loop
+    out_of_loop_loop_ids = trans_options.get_current_loop_context()
+    reset_loop_iters_node = export_pash_loop_iters_for_current_context(
+        out_of_loop_loop_ids
+    )
+
+    ## Prepend the export in front of the loop
+    # new_node = ast_node
+    new_node = make_typed_semi_sequence(
+        [to_ast_node(export_node), ast_node, to_ast_node(reset_loop_iters_node)]
+    )
+    # print(new_node)
+
+    preprocessed_ast_object = PreprocessedAST(
+        new_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=something_replaced,
+        last_ast=last_object,
+    )
+
+    return preprocessed_ast_object
+
+
+def preprocess_node_while(
+    ast_node: WhileNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    ## If we are in a loop, we push the loop identifier into the loop context
+    trans_options.enter_loop()
+
+    preprocessed_test, sth_replaced_test = preprocess_close_node(
+        ast_node.test, trans_options, last_object=last_object
+    )
+    preprocessed_body, sth_replaced_body = preprocess_close_node(
+        ast_node.body, trans_options, last_object=last_object
+    )
+    ast_node.test = preprocessed_test
+    ast_node.body = preprocessed_body
+    something_replaced = sth_replaced_test or sth_replaced_body
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=something_replaced,
+        last_ast=last_object,
+    )
+
+    ## We pop the loop identifier from the loop context.
+    trans_options.exit_loop()
+    return preprocessed_ast_object
+
+
+## This is the same as the one for `For`
+def preprocess_node_defun(
+    ast_node: DefunNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    ## TODO: For now we don't want to compile function bodies
+    # preprocessed_body = preprocess_close_node(ast_node.body)
+    # ast_node.body = preprocessed_body
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=False,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+## TODO: If the preprocessed is not maximal we actually need to combine it with the one on the right.
+def preprocess_node_semi(
+    ast_node: SemiNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config)
+    ##
+    ## TODO: Is it valid that only the right one is considered the last command?
+    preprocessed_left, sth_replaced_left = preprocess_close_node(
+        ast_node.left_operand, trans_options, last_object
+    )
+    preprocessed_right, sth_replaced_right = preprocess_close_node(
+        ast_node.right_operand, trans_options, last_object=last_object
+    )
+    ast_node.left_operand = preprocessed_left
+    ast_node.right_operand = preprocessed_right
+    sth_replaced = sth_replaced_left or sth_replaced_right
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=sth_replaced,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+## TODO: Make sure that what is inside an `&&`, `||`, `!` (and others) does not run in parallel_pipelines
+##       since we need its exit code.
+def preprocess_node_and(
+    ast_node: AndNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config)
+    preprocessed_left, sth_replaced_left = preprocess_close_node(
+        ast_node.left_operand, trans_options, last_object=last_object
+    )
+    preprocessed_right, sth_replaced_right = preprocess_close_node(
+        ast_node.right_operand, trans_options, last_object=last_object
+    )
+    ast_node.left_operand = preprocessed_left
+    ast_node.right_operand = preprocessed_right
+    sth_replaced = sth_replaced_left or sth_replaced_right
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=sth_replaced,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+def preprocess_node_or(
+    ast_node: OrNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config)
+    preprocessed_left, sth_replaced_left = preprocess_close_node(
+        ast_node.left_operand, trans_options, last_object=last_object
+    )
+    preprocessed_right, sth_replaced_right = preprocess_close_node(
+        ast_node.right_operand, trans_options, last_object=last_object
+    )
+    ast_node.left_operand = preprocessed_left
+    ast_node.right_operand = preprocessed_right
+    sth_replaced = sth_replaced_left or sth_replaced_right
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=sth_replaced,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+def preprocess_node_not(
+    ast_node: NotNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left)
+    preprocessed_body, sth_replaced = preprocess_close_node(
+        ast_node.body, trans_options, last_object=last_object
+    )
+    ast_node.body = preprocessed_body
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=sth_replaced,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+def preprocess_node_if(
+    ast_node: IfNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    # preprocessed_left, should_replace_whole_ast, is_non_maximal = preprocess_node(ast_node.left, irFileGen, config)
+    preprocessed_cond, sth_replaced_cond = preprocess_close_node(
+        ast_node.cond, trans_options, last_object=last_object
+    )
+    preprocessed_then, sth_replaced_then = preprocess_close_node(
+        ast_node.then_b, trans_options, last_object=last_object
+    )
+    preprocessed_else, sth_replaced_else = preprocess_close_node(
+        ast_node.else_b, trans_options, last_object=last_object
+    )
+    ast_node.cond = preprocessed_cond
+    ast_node.then_b = preprocessed_then
+    ast_node.else_b = preprocessed_else
+    sth_replaced = sth_replaced_cond or sth_replaced_then or sth_replaced_else
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=sth_replaced,
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
+
+
+def preprocess_case(
+    case, trans_options: AbstractTransformationState, last_object: bool
+):
+    preprocessed_body, sth_replaced = preprocess_close_node(
+        case["cbody"], trans_options, last_object=last_object
+    )
+    case["cbody"] = preprocessed_body
+    return case, sth_replaced
+
+
+def preprocess_node_case(
+    ast_node: CaseNode,
+    trans_options: AbstractTransformationState,
+    last_object: bool = False,
+):
+    preprocessed_cases_replaced = [
+        preprocess_case(case, trans_options, last_object=last_object)
+        for case in ast_node.cases
+    ]
+    preprocessed_cases, sth_replaced_cases = list(zip(*preprocessed_cases_replaced))
+    ast_node.cases = preprocessed_cases
+    preprocessed_ast_object = PreprocessedAST(
+        ast_node,
+        replace_whole=False,
+        non_maximal=False,
+        something_replaced=any(sth_replaced_cases),
+        last_ast=last_object,
+    )
+    return preprocessed_ast_object
diff --git a/compiler/shell_ast/transformation_options.py b/compiler/shell_ast/transformation_options.py
new file mode 100644
index 000000000..0d5221c2c
--- /dev/null
+++ b/compiler/shell_ast/transformation_options.py
@@ -0,0 +1,217 @@
+from abc import ABC, abstractmethod
+from enum import Enum
+import pickle
+
+from shell_ast.ast_util import *
+from shasta.json_to_ast import to_ast_node
+from speculative import util_spec
+from parse import from_ast_objects_to_shell
+
+
+## There are two types of ast_to_ast transformations
+class TransformationType(Enum):
+    PASH = "pash"
+    SPECULATIVE = "spec"
+    AIRFLOW = "airflow"
+
+
+class AbstractTransformationState(ABC):
+    def __init__(self):
+        self._node_counter = 0
+        self._loop_counter = 0
+        self._loop_contexts = []
+
+    def get_mode(self):
+        return TransformationType.PASH
+
+    ## Node id related
+    def get_next_id(self):
+        new_id = self._node_counter
+        self._node_counter += 1
+        return new_id
+
+    def get_current_id(self):
+        return self._node_counter - 1
+
+    def get_number_of_ids(self):
+        return self._node_counter
+
+    ## Loop id related
+    def get_next_loop_id(self):
+        new_id = self._loop_counter
+        self._loop_counter += 1
+        return new_id
+
+    def get_current_loop_context(self):
+        ## We want to copy that
+        return self._loop_contexts[:]
+
+    def get_current_loop_id(self):
+        if len(self._loop_contexts) == 0:
+            return None
+        else:
+            return self._loop_contexts[0]
+
+    def enter_loop(self):
+        new_loop_id = self.get_next_loop_id()
+        self._loop_contexts.insert(0, new_loop_id)
+        return new_loop_id
+
+    def exit_loop(self):
+        self._loop_contexts.pop(0)
+
+    @abstractmethod
+    def replace_df_region(
+        self, asts, disable_parallel_pipelines=False, ast_text=None
+    ) -> AstNode:
+        pass
+
+
+## Use this object to pass options inside the preprocessing
+## trasnformation.
+class TransformationState(AbstractTransformationState):
+    def replace_df_region(
+        self, asts, disable_parallel_pipelines=False, ast_text=None
+    ) -> AstNode:
+        ir_filename = ptempfile()
+
+        ## Serialize the node in a file
+        with open(ir_filename, "wb") as ir_file:
+            pickle.dump(asts, ir_file)
+
+        ## Serialize the candidate df_region asts back to shell
+        ## so that the sequential script can be run in parallel to the compilation.
+        sequential_script_file_name = ptempfile()
+        text_to_output = get_shell_from_ast(asts, ast_text=ast_text)
+        ## However, if we have the original ast text, then we can simply output that.
+        with open(sequential_script_file_name, "w") as script_file:
+            script_file.write(text_to_output)
+        replaced_node = TransformationState.make_call_to_pash_runtime(
+            ir_filename, sequential_script_file_name, disable_parallel_pipelines
+        )
+
+        return to_ast_node(replaced_node)
+
+    ## This function makes a command that calls the pash runtime
+    ## together with the name of the file containing an IR. Then the
+    ## pash runtime should read from this file and continue
+    ## execution.
+    ##
+    ## TODO: At the moment this is written in python but it is in essense a simple shell script.
+    ##       Is it possible to make it be a simple string instead of manually creating the AST?
+    ##
+    ## (MAYBE) TODO: The way I did it, is by calling the parser once, and seeing
+    ## what it returns. Maybe it would make sense to call the parser on
+    ## the fly to have a cleaner implementation here?
+    @staticmethod
+    def make_call_to_pash_runtime(
+        ir_filename, sequential_script_file_name, disable_parallel_pipelines
+    ) -> AstNode:
+        ## Disable parallel pipelines if we are in the last command of the script.
+        ## ```
+        ## pash_disable_parallel_pipelines=1
+        ## ```
+        if disable_parallel_pipelines:
+            assignments = [["pash_disable_parallel_pipelines", string_to_argument("1")]]
+        else:
+            assignments = [["pash_disable_parallel_pipelines", string_to_argument("0")]]
+        assignments.append(
+            [
+                "pash_sequential_script_file",
+                string_to_argument(sequential_script_file_name),
+            ]
+        )
+        assignments.append(["pash_input_ir_file", string_to_argument(ir_filename)])
+
+        ## Call the runtime
+        arguments = [
+            string_to_argument("source"),
+            string_to_argument(config.RUNTIME_EXECUTABLE),
+        ]
+        runtime_node = make_command(arguments, assignments=assignments)
+        return runtime_node
+
+
+## TODO: Turn it into a Transformation State class, and make a subclass for
+##       each of the two transformations. It is important for it to be state, because
+##       it will need to be passed around while traversing the tree.
+class SpeculativeTransformationState(AbstractTransformationState):
+    def __init__(self, po_file: str):
+        self.partial_order_file = po_file
+        self.partial_order_edges = []
+        self.partial_order_node_loop_contexts = {}
+
+    def replace_df_region(
+        self, asts, disable_parallel_pipelines=False, ast_text=None
+    ) -> AstNode:
+        text_to_output = get_shell_from_ast(asts, ast_text=ast_text)
+        ## Generate an ID
+        df_region_id = self.get_next_id()
+
+        ## Get the current loop id and save it so that the runtime knows
+        ## which loop it is in.
+        loop_id = self.get_current_loop_id()
+
+        ## Determine its predecessors
+        ## TODO: To make this properly work, we should keep some state
+        ##       in the AST traversal to be able to determine predecessors.
+        if df_region_id == 0:
+            predecessors = []
+        else:
+            predecessors = [df_region_id - 1]
+        ## Write to a file indexed by its ID
+        util_spec.save_df_region(text_to_output, self, df_region_id, predecessors)
+        ## TODO: Add an entry point to spec through normal PaSh
+        replaced_node = SpeculativeTransformationState.make_call_to_spec_runtime(
+            df_region_id, loop_id
+        )
+        return to_ast_node(replaced_node)
+
+    def get_partial_order_file(self):
+        return self.partial_order_file
+
+    def add_edge(self, from_id: int, to_id: int):
+        self.partial_order_edges.append((from_id, to_id))
+
+    def get_all_edges(self):
+        return self.partial_order_edges
+
+    def add_node_loop_context(self, node_id: int, loop_contexts):
+        self.partial_order_node_loop_contexts[node_id] = loop_contexts
+
+    def get_all_loop_contexts(self):
+        return self.partial_order_node_loop_contexts
+
+    ## TODO: Make that an actual call to the spec runtime
+    @staticmethod
+    def make_call_to_spec_runtime(command_id: int, loop_id) -> AstNode:
+        assignments = [["pash_spec_command_id", string_to_argument(str(command_id))]]
+        if loop_id is None:
+            loop_id_str = ""
+        else:
+            loop_id_str = str(loop_id)
+
+        assignments.append(["pash_spec_loop_id", string_to_argument(loop_id_str)])
+
+        ## Call the runtime
+        arguments = [
+            string_to_argument("source"),
+            string_to_argument(config.RUNTIME_EXECUTABLE),
+        ]
+        ## Pass all relevant argument to the planner
+        runtime_node = make_command(arguments, assignments=assignments)
+
+        return runtime_node
+
+
+class AirflowTransformationState(TransformationState):
+    pass
+
+
+def get_shell_from_ast(asts, ast_text=None) -> str:
+    ## If we don't have the original ast text, we need to unparse the ast
+    if ast_text is None:
+        text_to_output = from_ast_objects_to_shell(asts)
+    else:
+        text_to_output = ast_text
+    return text_to_output
diff --git a/compiler/speculative/util_spec.py b/compiler/speculative/util_spec.py
index 7783832fe..c117e4c6f 100644
--- a/compiler/speculative/util_spec.py
+++ b/compiler/speculative/util_spec.py
@@ -1,4 +1,3 @@
-
 import os
 import config
 
@@ -8,6 +7,7 @@
 ## This file contains utility functions useful for the speculative execution component
 ##
 
+
 def initialize(trans_options) -> None:
     ## Make the directory that contains the files in the partial order
     dir_path = partial_order_directory()
@@ -15,24 +15,31 @@ def initialize(trans_options) -> None:
     # ## Initialize the po file
     # initialize_po_file(trans_options, dir_path)
 
+
 def partial_order_directory() -> str:
-    return f'{config.PASH_TMP_PREFIX}/speculative/partial_order/'
+    return f"{config.PASH_TMP_PREFIX}/speculative/partial_order/"
+
 
 def partial_order_file_path():
-    return f'{config.PASH_TMP_PREFIX}/speculative/partial_order_file'
+    return f"{config.PASH_TMP_PREFIX}/speculative/partial_order_file"
+
 
 def initialize_po_file(trans_options, dir_path) -> None:
     ## Initializae the partial order file
-    with open(trans_options.get_partial_order_file(), 'w') as f:
-        f.write(f'# Partial order files path:\n')
-        f.write(f'{dir_path}\n')
+    with open(trans_options.get_partial_order_file(), "w") as f:
+        f.write(f"# Partial order files path:\n")
+        f.write(f"{dir_path}\n")
+
 
 def scheduler_server_init_po_msg(partial_order_file: str) -> str:
-    return f'Init:{partial_order_file}'
+    return f"Init:{partial_order_file}"
+
 
 ## TODO: To support partial orders, we need to pass some more context here,
 ##       i.e., the connections of this node. Now it assumes we have a sequence.
-def save_df_region(text_to_output: str, trans_options, df_region_id: int, predecessor_ids: int) -> None:
+def save_df_region(
+    text_to_output: str, trans_options, df_region_id: int, predecessor_ids: int
+) -> None:
     ## To support loops we also need to associate nodes with their surrounding loops
     current_loop_context = trans_options.get_current_loop_context()
     log("Df region:", df_region_id, "loop context:", current_loop_context)
@@ -41,7 +48,7 @@ def save_df_region(text_to_output: str, trans_options, df_region_id: int, predec
     trans_options.add_node_loop_context(df_region_id, current_loop_context)
 
     # Save df_region as text in its own file
-    df_region_path = f'{partial_order_directory()}/{df_region_id}'
+    df_region_path = f"{partial_order_directory()}/{df_region_id}"
     with open(df_region_path, "w") as f:
         f.write(text_to_output)
 
@@ -50,21 +57,24 @@ def save_df_region(text_to_output: str, trans_options, df_region_id: int, predec
         trans_options.add_edge(predecessor, df_region_id)
 
 
-
 ## TODO: Figure out a way to put all serialization/deserialization of messages
 ##       and parsing/unparsing in a specific module.
 
+
 ## TODO: Move serialization to a partial_order_file.py
 def serialize_edge(from_id: int, to_id: int) -> str:
-    return f'{from_id} -> {to_id}\n'
+    return f"{from_id} -> {to_id}\n"
+
 
 def serialize_number_of_nodes(number_of_ids: int) -> str:
-    return f'{number_of_ids}\n'
+    return f"{number_of_ids}\n"
+
 
 def serialize_loop_context(node_id: int, loop_contexts) -> str:
     ## Galaxy brain serialization
     loop_contexts_str = ",".join([str(loop_ctx) for loop_ctx in loop_contexts])
-    return f'{node_id}-loop_ctx-{loop_contexts_str}\n'
+    return f"{node_id}-loop_ctx-{loop_contexts_str}\n"
+
 
 ## TODO: Eventually we might want to retrieve the number_of_ids from trans_options
 def save_number_of_nodes(trans_options):
@@ -73,6 +83,7 @@ def save_number_of_nodes(trans_options):
     with open(partial_order_file_path, "a") as po_file:
         po_file.write(serialize_number_of_nodes(number_of_ids))
 
+
 def save_loop_contexts(trans_options):
     loop_context_dict = trans_options.get_all_loop_contexts()
     log("Loop context dict:", loop_context_dict)
@@ -82,6 +93,7 @@ def save_loop_contexts(trans_options):
             loop_ctx = loop_context_dict[node_id]
             po_file.write(serialize_loop_context(node_id, loop_ctx))
 
+
 def serialize_partial_order(trans_options):
     ## Initialize the po file
     dir_path = partial_order_directory()
diff --git a/compiler/util.py b/compiler/util.py
index 2c131e0f7..4406a6dcb 100644
--- a/compiler/util.py
+++ b/compiler/util.py
@@ -2,30 +2,34 @@
 import functools
 import logging
 from typing import Optional, TypeVar, Union, List, Any
+
 TType = TypeVar("TType")
 import os
 import sys
 import config
 import tempfile
 
+
 def flatten_list(lst):
     return [item for sublist in lst for item in sublist]
 
+
 def unzip(lst):
-    res = [[ i for i, j in lst ],
-           [ j for i, j in lst ]]
+    res = [[i for i, j in lst], [j for i, j in lst]]
     return res
 
+
 def pad(lst, index):
-    if(index >= len(lst)):
+    if index >= len(lst):
         lst += [None] * (index + 1 - len(lst))
     return lst
 
+
 def print_time_delta(prefix, start_time, end_time):
     ## Always output time in the log.
     time_difference = (end_time - start_time) / timedelta(milliseconds=1)
     ## If output_time flag is set, log the time
-    if (config.OUTPUT_TIME):
+    if config.OUTPUT_TIME:
         log("{} time:".format(prefix), time_difference, " ms", level=0)
     else:
         log("{} time:".format(prefix), time_difference, " ms")
@@ -41,17 +45,21 @@ def wrapper(*args, **kwargs):
             result = func(*args, **kwargs)
             config.LOGGING_PREFIX = old_prefix
             return result
+
         return wrapper
+
     return decorator
 
+
 ## This is a wrapper for prints
-def log(*args, end='\n', level=1):
+def log(*args, end="\n", level=1):
     ## If the debug logging level is at least
     ## as high as this log message.
     ## TODO: Allow all levels
     if level >= 1:
         concatted_args = " ".join([str(a) for a in list(args)])
-        logging.info(f'{config.LOGGING_PREFIX} {concatted_args}')
+        logging.info(f"{config.LOGGING_PREFIX} {concatted_args}")
+
 
 def ptempfile():
     fd, name = tempfile.mkstemp(dir=config.PASH_TMP_PREFIX)
@@ -59,21 +67,27 @@ def ptempfile():
     os.close(fd)
     return name
 
-def return_empty_list_if_none_else_itself(arg: Optional[TType]) -> Union[TType, List[Any]]: #list always empty
+
+def return_empty_list_if_none_else_itself(
+    arg: Optional[TType],
+) -> Union[TType, List[Any]]:  # list always empty
     if arg is None:
         return []
     else:
         return arg
 
+
 def return_default_if_none_else_itself(arg: Optional[TType], default: TType) -> TType:
     if arg is None:
         return default
     else:
         return arg
 
+
 ## This function gets a key and a value from the ast json format
 def get_kv(dic):
     return (dic[0], dic[1])
 
+
 def make_kv(key, val):
     return [key, val]
diff --git a/evaluation/tests/interface_tests/run.sh b/evaluation/tests/interface_tests/run.sh
index e0cd53cf1..0aa93edec 100755
--- a/evaluation/tests/interface_tests/run.sh
+++ b/evaluation/tests/interface_tests/run.sh
@@ -4,7 +4,7 @@ export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-
 # time: print real in seconds, to simplify parsing
 
 bash="bash"
-pash="$PASH_TOP/pa.sh --parallel_pipelines --profile_driven"
+pash="$PASH_TOP/pa.sh --profile_driven"
 
 output_dir="$PASH_TOP/evaluation/tests/interface_tests/output"
 rm -rf "$output_dir"
@@ -321,6 +321,12 @@ test_redir_dup()
     $shell redir-dup.sh
 }
 
+test_IFS()
+{
+    local shell=$1
+    $shell test-IFS.sh
+}
+
 ## We run all tests composed with && to exit on the first that fails
 if [ "$#" -eq 0 ]; then
     run_test test1
@@ -365,6 +371,7 @@ if [ "$#" -eq 0 ]; then
     run_test test_star
     run_test test_env_vars
     run_test test_redir_dup
+    run_test test_IFS
 else
     for testname in $@
     do
diff --git a/evaluation/tests/interface_tests/test-IFS.sh b/evaluation/tests/interface_tests/test-IFS.sh
new file mode 100644
index 000000000..ecc977f80
--- /dev/null
+++ b/evaluation/tests/interface_tests/test-IFS.sh
@@ -0,0 +1,5 @@
+IFS=/
+curr_dir=/test1/test2/test3/test4
+for name in $curr_dir; do
+  echo "$name"
+done
diff --git a/evaluation/tests/test_evaluation_scripts.sh b/evaluation/tests/test_evaluation_scripts.sh
index b3c6731de..519365886 100755
--- a/evaluation/tests/test_evaluation_scripts.sh
+++ b/evaluation/tests/test_evaluation_scripts.sh
@@ -47,12 +47,11 @@ n_inputs=(
 
 if [ "$EXPERIMENTAL" -eq 1 ]; then
     configurations=(
-        # "" # Commenting this out since the tests take a lot of time to finish
-        "--parallel_pipelines"
+        ""
     )
 else
     configurations=(
-        "--parallel_pipelines --profile_driven"
+        "--profile_driven"
     )
 fi
 
@@ -190,7 +189,7 @@ execute_tests() {
 }
 
 execute_tests "" "${script_microbenchmarks[@]}"
-execute_tests "--assert_compiler_success" "${pipeline_microbenchmarks[@]}"
+execute_tests "--assert_all_regions_parallelizable" "${pipeline_microbenchmarks[@]}"
 
 #cat ${results_time} | sed 's/,/./' > /tmp/a
 #cat /tmp/a | sed 's/@/,/' > ${results_time}
diff --git a/pa.sh b/pa.sh
index 30922f029..609dad786 100755
--- a/pa.sh
+++ b/pa.sh
@@ -31,6 +31,9 @@ then
     exit
 fi
 
+## get bash version for pash
+export PASH_BASH_VERSION="${BASH_VERSINFO[@]:0:3}"
+
 ## Create a temporary directory where PaSh can use for temporary files and logs
 export PASH_TMP_PREFIX="$(mktemp -d /tmp/pash_XXXXXXX)/"
 
diff --git a/requirements.txt b/requirements.txt
index 09124b40f..4fee3bebe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 graphviz
 libdash
-pash-annotations==0.2.0
+pash-annotations==0.2.2
 shasta==0.1.0
-sh-expand
+sh-expand>=0.1.6
diff --git a/runtime/wait_for_output_and_sigpipe_rest.sh b/runtime/wait_for_output_and_sigpipe_rest.sh
index a56bfb597..5974da51c 100755
--- a/runtime/wait_for_output_and_sigpipe_rest.sh
+++ b/runtime/wait_for_output_and_sigpipe_rest.sh
@@ -15,7 +15,7 @@ export internal_exec_status=$?
 # This value may contains multiple pids as a whitespace-separated string, and
 # we must split it as multiple pids by shell's field splitting.
 # shellcheck disable=SC2086
-(> /dev/null 2>&1 kill -SIGPIPE $pids_to_kill || true)
+(unset IFS; > /dev/null 2>&1 kill -SIGPIPE $pids_to_kill || true)
 
 ##
 ## Old way of waiting, very inefficient.
diff --git a/scripts/test_eval/logparser.py b/scripts/test_eval/logparser.py
index ea3a82872..a48e78cf1 100644
--- a/scripts/test_eval/logparser.py
+++ b/scripts/test_eval/logparser.py
@@ -6,19 +6,20 @@
 
 DEFAULT_LOG_FOLDER = "tmp_log/"
 
+
 class LogParser:
     """
     A class used to parse the pa.sh log files
 
-    All parse_* methods return a dataframe of only the files parsed in this call. 
+    All parse_* methods return a dataframe of only the files parsed in this call.
     Use get_df for all parsed files across multible calls to parse_*.
 
     Methods:
         parse_file: parses a log file
         parse_folder: parses log files in a folder
         parse_log: parses a given log string
-        get_df: returns a comprehensive dataframe of every 
-                log parsed (using any of the functions above) 
+        get_df: returns a comprehensive dataframe of every
+                log parsed (using any of the functions above)
                 during the function lifetime.
 
     Dataframe columns:
@@ -48,24 +49,44 @@ class LogParser:
 
     def __init__(self, df=None):
         self.df = df if df else pd.DataFrame()
-        
-    def parse_log(self, log: str)->pd.DataFrame:
+
+    def parse_log(self, log: str) -> pd.DataFrame:
         """
         Parses a pa.sh log with path file_path
         Return:
             A single entry pandas dataframe, or None if failed
         """
-        
-        border = "-"*40
+
+        border = "-" * 40
         argslog, pashlog, timelog = log.split(border)
 
-        args_of_interest = set(["input", "width", "output_time", "no_eager", "r_split", "r_split_batch_size", "IN", "dgsh_tee"])
+        args_of_interest = set(
+            [
+                "input",
+                "width",
+                "output_time",
+                "no_eager",
+                "r_split",
+                "r_split_batch_size",
+                "IN",
+                "dgsh_tee",
+            ]
+        )
         parsed_args = self.__parse_args__(argslog, args_of_interest)
 
-        tags_of_interest = set(["Execution time", "Backend time", "Compilation time", "Preprocessing time", "Eager nodes", "Compiler exited with code"])
+        tags_of_interest = set(
+            [
+                "Execution time",
+                "Backend time",
+                "Compilation time",
+                "Preprocessing time",
+                "Eager nodes",
+                "Compiler exited with code",
+            ]
+        )
         parsed_log = self.__parse_pash_log__(pashlog, tags_of_interest)
 
-        #can be empty
+        # can be empty
         parsed_time = self.__parse_time_log__(timelog)
 
         if not parsed_args["input"]:
@@ -77,23 +98,23 @@ def parse_log(self, log: str)->pd.DataFrame:
         split_type = "r-split" if parsed_args["r_split"] else "auto-split"
 
         data = {
-            #From Args
-            "test_name" : test_name,
+            # From Args
+            "test_name": test_name,
             "IN": os.path.basename(parsed_args["IN"]),
-            "split_type" : split_type,
-            "no_eager" : parsed_args["no_eager"],
+            "split_type": split_type,
+            "no_eager": parsed_args["no_eager"],
             "width": int(parsed_args["width"]),
             "r_split_batch_size": int(parsed_args["r_split_batch_size"]),
             "dgsh_tee": parsed_args["dgsh_tee"],
-            #From pash log
+            # From pash log
             "exec_time": parsed_log["Execution time"],
             "backend_time": parsed_log["Backend time"],
             "compilation_time": parsed_log["Compilation time"],
             "preprocess_time": parsed_log["Preprocessing time"],
             "eager_nodes": int(parsed_log["Eager nodes"]),
-            "compiler_exit" : parsed_log["Compiler exited with code"],
-            #From time
-            "gnu_real": parsed_time["gnu_real"], 
+            "compiler_exit": parsed_log["Compiler exited with code"],
+            # From time
+            "gnu_real": parsed_time["gnu_real"],
             "gnu_usr": parsed_time["user"],
             "gnu_sys": parsed_time["sys"],
             "cpu%": parsed_time["cpu%"],
@@ -103,12 +124,13 @@ def parse_log(self, log: str)->pd.DataFrame:
             "minor_pagefaults": int(parsed_time["minor_pagefaults"]),
         }
 
-        #update local and global df
+        # update local and global df
         df = df.append(data, ignore_index=True)
         self.df = self.df.append(data, ignore_index=True)
 
         return df
-    def parse_file(self, log_file: str)->pd.DataFrame:
+
+    def parse_file(self, log_file: str) -> pd.DataFrame:
         """
         Parses a pa.sh log with path file_path
         Return:
@@ -120,11 +142,10 @@ def parse_file(self, log_file: str)->pd.DataFrame:
                 df = self.parse_log(log)
                 return df
         except:
-                print("failed to parse", log_file)
-                return pd.DataFrame()
-        
+            print("failed to parse", log_file)
+            return pd.DataFrame()
 
-    def parse_folder(self, path: str)->pd.DataFrame:
+    def parse_folder(self, path: str) -> pd.DataFrame:
         """
         Parses all valid files ending with .log in the path directory.
         Params:
@@ -132,12 +153,14 @@ def parse_folder(self, path: str)->pd.DataFrame:
         Return:
             pandas dataframe with all parsed logs
         """
-        log_files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".log")]
+        log_files = [
+            os.path.join(path, f) for f in os.listdir(path) if f.endswith(".log")
+        ]
         ret_df = pd.DataFrame()
         for log_file in log_files:
-                df = self.parse_file(log_file)
-                ret_df = ret_df.append(df, ignore_index=True)
-                
+            df = self.parse_file(log_file)
+            ret_df = ret_df.append(df, ignore_index=True)
+
         return ret_df
 
     def get_df(self):
@@ -149,7 +172,7 @@ def get_df(self):
 
     def __parse_args__(self, args: str, args_of_interest):
         lines = args.split("\n")
-        args_dict = {i:False for i in args_of_interest}
+        args_dict = {i: False for i in args_of_interest}
         for line in lines:
             try:
                 arg, val = line.split(" ")
@@ -161,11 +184,11 @@ def __parse_args__(self, args: str, args_of_interest):
             except:
                 continue
         return args_dict
-    
-    def __parse_pash_log__(self, args: str, tags_of_interest) :
+
+    def __parse_pash_log__(self, args: str, tags_of_interest):
         lines = args.split("\n")
-        log_dict = {i:0 for i in tags_of_interest}
-        
+        log_dict = {i: 0 for i in tags_of_interest}
+
         for line in lines:
             try:
                 tag, val = line.split(": ")
@@ -180,50 +203,69 @@ def __parse_pash_log__(self, args: str, tags_of_interest) :
 
     def __parse_time_log__(self, timelog: str):
         data_start = timelog.find("Command being timed: ")
-        time_data = timelog[data_start: ]
+        time_data = timelog[data_start:]
 
-        lines = [line.split(": ")[1] for line in time_data.replace("\t", "").split("\n")[:-1]]
+        lines = [
+            line.split(": ")[1] for line in time_data.replace("\t", "").split("\n")[:-1]
+        ]
         if len(lines) < 23:
-            lines = [False]*23
+            lines = [False] * 23
         data = {
-            "command" : lines[0],
-            "user" : lines[1],
-            "sys" : lines[2],
-            "cpu%" : lines[3],
-            "gnu_real" : lines[4],
-            "max_resident" : lines[9],
+            "command": lines[0],
+            "user": lines[1],
+            "sys": lines[2],
+            "cpu%": lines[3],
+            "gnu_real": lines[4],
+            "max_resident": lines[9],
             "average_resident": lines[10],
-            "major_pagefaults" : lines[11],
-            "minor_pagefaults" : lines[12],
-            "exit_status" : lines[22]
+            "major_pagefaults": lines[11],
+            "minor_pagefaults": lines[12],
+            "exit_status": lines[22],
         }
         return data
 
-#can be used in case we only can parse the time (default commands)
+
+# can be used in case we only can parse the time (default commands)
 def process_gnu_time(time_data):
     data_start = time_data.find("Command being timed: ")
-    time_data = time_data[data_start: ]
-    lines = [line.split(": ")[1] for line in time_data.replace("\t", "").split("\n")[:-1]]
+    time_data = time_data[data_start:]
+    lines = [
+        line.split(": ")[1] for line in time_data.replace("\t", "").split("\n")[:-1]
+    ]
     data = {
-        "command" : lines[0],
-        "user" : lines[1],
-        "sys" : lines[2],
-        "cpu%" : lines[3],
-        "gnu_real" : lines[4],
-        "max_resident" : lines[9],
+        "command": lines[0],
+        "user": lines[1],
+        "sys": lines[2],
+        "cpu%": lines[3],
+        "gnu_real": lines[4],
+        "max_resident": lines[9],
         "average_resident": lines[10],
-        "major_pagefault" : lines[11],
-        "minor_pagefault" : lines[12],
-        "exit_status" : lines[22]
+        "major_pagefault": lines[11],
+        "minor_pagefault": lines[12],
+        "exit_status": lines[22],
     }
     return data
 
-if __name__ == '__main__':
-    #sample execution
+
+if __name__ == "__main__":
+    # sample execution
     log_parser = LogParser()
-    #can pass folder name in first argument
+    # can pass folder name in first argument
     if len(argv) > 1:
         df = log_parser.parse_folder(argv[1])
     else:
         df = log_parser.parse_folder(DEFAULT_LOG_FOLDER)
-    print(log_parser.get_df()[["test_name", "IN", "r_split_batch_size", "no_eager", "split_type", "exec_time", "cpu%", "width"]].to_string(index = False))
\ No newline at end of file
+    print(
+        log_parser.get_df()[
+            [
+                "test_name",
+                "IN",
+                "r_split_batch_size",
+                "no_eager",
+                "split_type",
+                "exec_time",
+                "cpu%",
+                "width",
+            ]
+        ].to_string(index=False)
+    )
diff --git a/scripts/test_eval/tester.py b/scripts/test_eval/tester.py
index fca5e8825..901d127ac 100644
--- a/scripts/test_eval/tester.py
+++ b/scripts/test_eval/tester.py
@@ -5,43 +5,76 @@
 import pandas as pd
 import uuid
 
-GIT_TOP_CMD = [ 'git', 'rev-parse', '--show-toplevel', '--show-superproject-working-tree']
-if 'PASH_TOP' in os.environ:
-    PASH_TOP = os.environ['PASH_TOP']
+GIT_TOP_CMD = [
+    "git",
+    "rev-parse",
+    "--show-toplevel",
+    "--show-superproject-working-tree",
+]
+if "PASH_TOP" in os.environ:
+    PASH_TOP = os.environ["PASH_TOP"]
 else:
-    PASH_TOP = run(GIT_TOP_CMD, stdout=PIPE, stderr=PIPE, universal_newlines=True).stdout.rstrip()
+    PASH_TOP = run(
+        GIT_TOP_CMD, stdout=PIPE, stderr=PIPE, universal_newlines=True
+    ).stdout.rstrip()
+
 
 class Tests(LogParser):
-    def __init__(self, in_file = None, batch_sz = 100000):
+    def __init__(self, in_file=None, batch_sz=100000):
         self.in_file = in_file
         self.batch_sz = str(batch_sz)
         self.log_parser = LogParser()
 
     def time(self, command, env, stdout=PIPE):
-        time_command = ["/usr/bin/time" , "-v", "bash"]
+        time_command = ["/usr/bin/time", "-v", "bash"]
         time_command.extend(command)
-        result = run(time_command, stdout=PIPE, universal_newlines=True, stdin=None, stderr=PIPE, env=env)
+        result = run(
+            time_command,
+            stdout=PIPE,
+            universal_newlines=True,
+            stdin=None,
+            stderr=PIPE,
+            env=env,
+        )
         return result
 
     def get_df(self):
         return self.log_parser.get_df()
 
-    def run_test(self, test_path, width = 2, r_split=False, batch_size=None, in_file=None, no_eager=False, dgsh_tee=False, log_folder=DEFAULT_LOG_FOLDER):
-        if in_file==None:
+    def run_test(
+        self,
+        test_path,
+        width=2,
+        r_split=False,
+        batch_size=None,
+        in_file=None,
+        no_eager=False,
+        dgsh_tee=False,
+        log_folder=DEFAULT_LOG_FOLDER,
+    ):
+        if in_file == None:
             in_file = self.in_file
-        
+
         new_env = os.environ.copy()
         if in_file == None:
             in_file = self.in_file
 
         new_env["IN"] = in_file
         new_env["PASH_TOP"] = PASH_TOP
-        
-        command = [f"{PASH_TOP}/pa.sh", test_path, "--output_time", f"-w {width}", "-d 1"]
+
+        command = [
+            f"{PASH_TOP}/pa.sh",
+            test_path,
+            "--output_time",
+            f"-w {width}",
+            "-d 1",
+        ]
 
         if r_split:
             command.append("--r_split")
-            batch_size = str(batch_size) if batch_size else self.batch_sz #str(int(os.path.getsize(in_file)/90))
+            batch_size = (
+                str(batch_size) if batch_size else self.batch_sz
+            )  # str(int(os.path.getsize(in_file)/90))
             command.append("--r_split_batch_size")
             command.append(batch_size)
         if no_eager:
@@ -50,43 +83,77 @@ def run_test(self, test_path, width = 2, r_split=False, batch_size=None, in_file
             command.append("--dgsh_tee")
 
         result = self.time(command, new_env)
-        
-        #add IN file to log
+
+        # add IN file to log
         result.stderr = f"IN {in_file}\n" + result.stderr
 
-        #write stderr to log_file if provided
+        # write stderr to log_file if provided
         log_file = self.__get_log_file__(test_path, log_folder)
-        with open(log_file, 'w') as f:
+        with open(log_file, "w") as f:
             f.write(result.stderr)
-            
 
         if result.returncode != 0:
             print(f"failed running: {test_path}")
             if log_file:
                 print(f"log in {log_file}")
-        
+
         df = self.log_parser.parse_log(result.stderr)
 
         return result, df
 
-    #Run provided tests in folder x with the env files
-    def run_folder_tests(self, tests, folder, width = 2, r_split=False, batch_size=None, in_file=None, no_eager=False, dgsh_tee=False, log_folder=None):
+    # Run provided tests in folder x with the env files
+    def run_folder_tests(
+        self,
+        tests,
+        folder,
+        width=2,
+        r_split=False,
+        batch_size=None,
+        in_file=None,
+        no_eager=False,
+        dgsh_tee=False,
+        log_folder=None,
+    ):
         pass
 
-    #run a list of tests, each test should be the full path of .sh file
-    #if log_folder provided it generates unique name for each log
-    def run_test_list(self, tests, width = 2, r_split=False, batch_size=None, in_file=None, no_eager=False, dgsh_tee=False, log_folder=DEFAULT_LOG_FOLDER):
+    # run a list of tests, each test should be the full path of .sh file
+    # if log_folder provided it generates unique name for each log
+    def run_test_list(
+        self,
+        tests,
+        width=2,
+        r_split=False,
+        batch_size=None,
+        in_file=None,
+        no_eager=False,
+        dgsh_tee=False,
+        log_folder=DEFAULT_LOG_FOLDER,
+    ):
         df = pd.DataFrame()
         for test in tests:
-            result, dfnew = self.run_test(test, width, r_split, batch_size, in_file, no_eager, dgsh_tee, log_folder)
+            result, dfnew = self.run_test(
+                test,
+                width,
+                r_split,
+                batch_size,
+                in_file,
+                no_eager,
+                dgsh_tee,
+                log_folder,
+            )
             df = df.append(dfnew, ignore_index=True)
 
         return df
-    
+
     def __get_log_file__(self, test_path, log_folder):
         if not os.path.exists(log_folder):
             os.makedirs(log_folder, exist_ok=True)
 
-        temp_filename = os.path.basename(test_path).replace(".sh", "") + "_" + str(uuid.uuid4()) + ".log"
+        temp_filename = (
+            os.path.basename(test_path).replace(".sh", "")
+            + "_"
+            + str(uuid.uuid4())
+            + ".log"
+        )
         log_file = os.path.join(log_folder, temp_filename)
-        return log_file
\ No newline at end of file
+        return log_file
diff --git a/scripts/ws-client.py b/scripts/ws-client.py
index b0f44a933..56a4aabcf 100644
--- a/scripts/ws-client.py
+++ b/scripts/ws-client.py
@@ -5,58 +5,77 @@
 
 from websocket import create_connection
 
-RESULT_POLLING_FREQUENCY=60
+RESULT_POLLING_FREQUENCY = 60
+
 
 def parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument("-b", "--target_branch", 
-                        help="the target branch to fork and run the tests on")
-    parser.add_argument("-c", "--target_commit", 
-                        help="the target commit to checkout to run the tests on")
-    parser.add_argument("-m", "--mode",
-                        help="the execution mode. `run` runs and waits until the results are there, `wait` just waits, and `check` just returns the current task",
-                        choices=['run', 'wait', 'check'],
-                        default='run')
+    parser.add_argument(
+        "-b", "--target_branch", help="the target branch to fork and run the tests on"
+    )
+    parser.add_argument(
+        "-c",
+        "--target_commit",
+        help="the target commit to checkout to run the tests on",
+    )
+    parser.add_argument(
+        "-m",
+        "--mode",
+        help="the execution mode. `run` runs and waits until the results are there, `wait` just waits, and `check` just returns the current task",
+        choices=["run", "wait", "check"],
+        default="run",
+    )
     args = parser.parse_args()
     return args
 
+
 def issue_test_run(websocket, target_commit, target_branch):
-    run_tests_req_data = {"cmd": {"job": "issue",
-                                "benchmark": "CORRECTNESS",
-                                "commit": target_commit,
-                                "branch": target_branch,
-                                }}
-    msg = json.dumps(run_tests_req_data) 
+    run_tests_req_data = {
+        "cmd": {
+            "job": "issue",
+            "benchmark": "CORRECTNESS",
+            "commit": target_commit,
+            "branch": target_branch,
+        }
+    }
+    msg = json.dumps(run_tests_req_data)
     websocket.send(msg)
-    print("POSIX Tests request made for branch:", target_branch, "and commit:", target_commit, file=sys.stderr)
+    print(
+        "POSIX Tests request made for branch:",
+        target_branch,
+        "and commit:",
+        target_commit,
+        file=sys.stderr,
+    )
+
 
 def fetch_runs(websocket):
-    data = {"cmd": {"job": "/fetch_runs", 
-                    "count": 50}}
-    msg = json.dumps(data) 
+    data = {"cmd": {"job": "/fetch_runs", "count": 50}}
+    msg = json.dumps(data)
     # print("Sending:", msg, file=sys.stderr)
     websocket.send(msg)
     # print("Sent!", file=sys.stderr)
     res = websocket.recv()
-    runs_data = json.loads(res)    
+    runs_data = json.loads(res)
     return runs_data
 
+
 def current_task(websocket):
     data = {"cmd": {"job": "/current_task"}}
-    msg = json.dumps(data) 
+    msg = json.dumps(data)
     # print("Sending:", msg, file=sys.stderr)
     websocket.send(msg)
     # print("Sent!", file=sys.stderr)
     res = websocket.recv()
-    res_data = json.loads(res)    
+    res_data = json.loads(res)
     return res_data
 
+
 def wait_for_result(websocket, target_commit):
     found = False
     sleep_duration = RESULT_POLLING_FREQUENCY
 
     while not found:
-
         ## Fetch all runs
         runs_data = fetch_runs(websocket)
         result_rows = runs_data["data"]["rows"]
@@ -96,7 +115,7 @@ def wait_for_result(websocket, target_commit):
     if args.mode == "run":
         ## Issue the POSIX tests requests
         issue_test_run(ws, target_commit, target_branch)
-    
+
     ##
     ## Wait until we have the POSIX test results
     ##
@@ -106,4 +125,4 @@ def wait_for_result(websocket, target_commit):
     print(result_row)
 
 
-ws.close()
\ No newline at end of file
+ws.close()