From f1bd730158e5071cbb4640877a4ceef7a5ac2b0f Mon Sep 17 00:00:00 2001 From: Sam Brockie Date: Fri, 25 Aug 2023 01:36:22 +0200 Subject: [PATCH 01/10] Implement algorithm for faster Jacobian's using hSAD/forward accumulation --- opty/utils.py | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/opty/utils.py b/opty/utils.py index 7d73473..03e21f9 100644 --- a/opty/utils.py +++ b/opty/utils.py @@ -11,9 +11,12 @@ from distutils.ccompiler import new_compiler from distutils.errors import CompileError from distutils.sysconfig import customize_compiler +from collections import Counter +from timeit import default_timer as timer import numpy as np import sympy as sm +from sympy.utilities.iterables import numbered_symbols try: plt = sm.external.import_module('matplotlib.pyplot', __import__kwargs={'fromlist': ['']}, @@ -24,6 +27,123 @@ catch=(RuntimeError,)) +def forward_jacobian(expr, wrt): + + def add_to_cache(node): + if node in expr_to_replacement_cache: + replacement_symbol = expr_to_replacement_cache[node] + return replacement_symbol, replacement_to_reduced_expr_cache[replacement_symbol] + elif node in replacement_to_reduced_expr_cache: + return node, replacement_to_reduced_expr_cache[node] + elif isinstance(node, sm.Tuple): + return None, None + elif not node.free_symbols: + return node, node + + replacement_symbol = replacement_symbols.__next__() + replaced_subexpr = node.xreplace(expr_to_replacement_cache) + replacement_to_reduced_expr_cache[replacement_symbol] = replaced_subexpr + expr_to_replacement_cache[node] = replacement_symbol + return replacement_symbol, replaced_subexpr + + if not isinstance(expr, sm.ImmutableDenseMatrix): + msg = ( + 'The forward Jacobian differentiation algorithm can only be used ' + 'to differentiate a single matrix expression at a time.' + ) + raise NotImplementedError(msg) + elif expr.shape[1] != 1: + msg = 'Can only compute the Jacobian for column matrices.' + raise NotImplementedError(msg) + elif not isinstance(wrt, sm.ImmutableDenseMatrix) or wrt.shape[1] != 1: + msg = ( + 'The forward Jacobian differentiation algorithm can compute ' + 'Jacobians with respect to column matrices.' + ) + + replacement_symbols = numbered_symbols( + prefix='_z', + cls=sm.Symbol, + exclude=expr.free_symbols, + ) + + expr_to_replacement_cache = {} + replacement_to_reduced_expr_cache = {} + + print('Adding expression nodes to cache...') + start = timer() + for node in sm.postorder_traversal(expr.args[2], keys=True): + if isinstance(node, sm.ImmutableDenseMatrix): + break + add_to_cache(node) + finish = timer() + print(f'Completed in {finish - start:.2f}s') + + reduced_matrix = node.xreplace(expr_to_replacement_cache) + reduced_exprs = [reduced_matrix] + replacements = list(replacement_to_reduced_expr_cache.items()) + + partial_derivative_mapping = {} + absolute_derivative_mapping = {} + for i, wrt_symbol in enumerate(wrt.args[2]): + absolute_derivative = [sm.S.Zero] * len(wrt) + absolute_derivative[i] = sm.S.One + absolute_derivative_mapping[wrt_symbol] = sm.ImmutableDenseMatrix([absolute_derivative]) + new_replacements_mapping = {} + + print('Differentiating expression nodes...') + start = timer() + zeros = sm.ImmutableDenseMatrix.zeros(1, len(wrt)) + for symbol, subexpr in replacements: + free_symbols = subexpr.free_symbols + absolute_derivative = zeros + for free_symbol in free_symbols: + replacement_symbol, partial_derivative = add_to_cache(subexpr.diff(free_symbol)) + absolute_derivative += partial_derivative * absolute_derivative_mapping.get(free_symbol, zeros) + absolute_derivative_mapping[symbol] = sm.ImmutableDenseMatrix([[add_to_cache(a)[0] for a in absolute_derivative]]) + + replaced_jacobian = sm.ImmutableDenseMatrix.vstack(*[absolute_derivative_mapping[e] for e in reduced_matrix]) + finish = timer() + print(f'Completed in {finish - start:.2f}s') + + print('Determining required replacements...') + start = timer() + required_replacement_symbols = set() + stack = [entry for entry in replaced_jacobian if entry.free_symbols] + while stack: + entry = stack.pop() + if entry in required_replacement_symbols or entry in wrt: + continue + children = list(replacement_to_reduced_expr_cache.get(entry, entry).free_symbols) + stack.extend(children) + required_replacement_symbols.add(entry) + finish = timer() + print(f'Completed in {finish - start:.2f}s') + + required_replacements_dense = { + replacement_symbol: replaced_subexpr + for replacement_symbol, replaced_subexpr in replacement_to_reduced_expr_cache.items() + if replacement_symbol in required_replacement_symbols + } + + counter = Counter(replaced_jacobian.free_symbols) + for replaced_subexpr in required_replacements_dense.values(): + counter.update(replaced_subexpr.free_symbols) + + print('Substituting required replacements...') + required_replacements = {} + unrequired_replacements = {} + for replacement_symbol, replaced_subexpr in required_replacements_dense.items(): + if isinstance(replaced_subexpr, sm.Symbol) or counter[replacement_symbol] == 1: + unrequired_replacements[replacement_symbol] = replaced_subexpr.xreplace(unrequired_replacements) + else: + required_replacements[replacement_symbol] = replaced_subexpr.xreplace(unrequired_replacements) + finish = timer() + print(f'Completed in {finish - start:.2f}s') + + return (list(required_replacements.items()), [replaced_jacobian.xreplace(unrequired_replacements)]) + + def building_docs(): if 'READTHEDOCS' in os.environ: return True From 02d9b0ea04ca8f8bb03a40a23acbc84b36816fe7 Mon Sep 17 00:00:00 2001 From: Sam Brockie Date: Fri, 25 Aug 2023 01:36:55 +0200 Subject: [PATCH 02/10] Make ufuncify_matrix compatible with pre-cse'd expressions --- opty/utils.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/opty/utils.py b/opty/utils.py index 03e21f9..05c0ec7 100644 --- a/opty/utils.py +++ b/opty/utils.py @@ -350,7 +350,14 @@ def ufuncify_matrix(args, expr, const=None, tmp_dir=None, parallel=False): # not sure if this current version counts sequentially. global module_counter - matrix_size = expr.shape[0] * expr.shape[1] + if hasattr(expr, 'shape'): + num_rows = expr.shape[0] + num_cols = expr.shape[1] + else: + num_rows = expr[1][0].shape[0] + num_cols = expr[1][0].shape[1] + + matrix_size = num_rows * num_cols file_prefix_base = 'ufuncify_matrix' file_prefix = '{}_{}'.format(file_prefix_base, module_counter) @@ -377,8 +384,8 @@ def ufuncify_matrix(args, expr, const=None, tmp_dir=None, parallel=False): d = {'routine_name': 'eval_matrix', 'file_prefix': file_prefix, 'matrix_output_size': matrix_size, - 'num_rows': expr.shape[0], - 'num_cols': expr.shape[1]} + 'num_rows': num_rows, + 'num_cols': num_cols} if parallel: if openmp_installed(): @@ -400,9 +407,12 @@ def ufuncify_matrix(args, expr, const=None, tmp_dir=None, parallel=False): d['compile_args'] = "" d['link_args'] = "" - matrix_sym = sm.MatrixSymbol('matrix', expr.shape[0], expr.shape[1]) + matrix_sym = sm.MatrixSymbol('matrix', num_rows, num_cols) - sub_exprs, simple_mat = sm.cse(expr, sm.numbered_symbols('z_')) + if isinstance(expr, tuple) and len(expr) == 2: + sub_exprs, simple_mat = expr + else: + sub_exprs, simple_mat = sm.cse(expr, sm.numbered_symbols('z_')) sub_expr_code = '\n'.join(['double ' + sm.ccode(sub_expr[1], sub_expr[0]) for sub_expr in sub_exprs]) From 963d80024c73b016a81ff927c752d47dbcdc8b8f Mon Sep 17 00:00:00 2001 From: Sam Brockie Date: Fri, 25 Aug 2023 01:37:24 +0200 Subject: [PATCH 03/10] Refactor to use forward_jacobian, add logging --- opty/direct_collocation.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/opty/direct_collocation.py b/opty/direct_collocation.py index 94ed801..d0c4bbb 100644 --- a/opty/direct_collocation.py +++ b/opty/direct_collocation.py @@ -15,7 +15,7 @@ import_kwargs={'fromlist': ['']}, catch=(RuntimeError,)) -from .utils import ufuncify_matrix, parse_free, _optional_plt_dep +from .utils import ufuncify_matrix, parse_free, _optional_plt_dep, forward_jacobian __all__ = ['Problem', 'ConstraintCollocator'] @@ -115,7 +115,9 @@ def __init__(self, obj, obj_grad, *args, **kwargs): self.obj = obj self.obj_grad = obj_grad self.con = self.collocator.generate_constraint_function() + print('Constraint function generated.') self.con_jac = self.collocator.generate_jacobian_function() + print('Jacobian function generated.') self.con_jac_rows, self.con_jac_cols = \ self.collocator.jacobian_indices() @@ -1261,19 +1263,31 @@ def _gen_multi_arg_con_jac_func(self): # This creates a matrix with all of the symbolic partial derivatives # necessary to compute the full Jacobian. - symbolic_partials = self.discrete_eom.jacobian(wrt) + print('Starting the Jacobian differentiation.') + # symbolic_partials = self.discrete_eom.jacobian(wrt) + discrete_eom_matrix = sm.ImmutableDenseMatrix(self.discrete_eom) + wrt_matrix = sm.ImmutableDenseMatrix([list(wrt)]) + symbolic_partials = forward_jacobian(discrete_eom_matrix, wrt_matrix) + print('Jacobian differentiation finished.') # This generates a numerical function that evaluates the matrix of # partial derivatives. This function returns the non-zero elements # needed to build the sparse constraint Jacobian. + print('Starting the Jacobian compilation.') eval_partials = ufuncify_matrix(args, symbolic_partials, const=constant_syms + (h_sym,), tmp_dir=self.tmp_dir, parallel=self.parallel) + print('Jacobian compilation finished.') + if isinstance(symbolic_partials, tuple) and len(symbolic_partials) == 2: + num_rows = symbolic_partials[1][0].shape[0] + num_cols = symbolic_partials[1][0].shape[1] + else: + num_rows = symbolic_partials.shape[0] + num_cols = symbolic_partials.shape[1] result = np.empty((self.num_collocation_nodes - 1, - symbolic_partials.shape[0] * - symbolic_partials.shape[1])) + num_rows * num_cols)) def constraints_jacobian(state_values, specified_values, parameter_values, interval_value): @@ -1443,11 +1457,13 @@ def constraints(free): def generate_constraint_function(self): """Returns a function which evaluates the constraints given the array of free optimization variables.""" + print('Generating constraint function.') self._gen_multi_arg_con_func() return self._wrap_constraint_funcs(self._multi_arg_con_func, 'con') def generate_jacobian_function(self): """Returns a function which evaluates the Jacobian of the constraints given the array of free optimization variables.""" + print('Generating jacobian function.') self._gen_multi_arg_con_jac_func() return self._wrap_constraint_funcs(self._multi_arg_con_jac_func, 'jac') From e72393e27a39440ac764f44f4108f30434bc1264 Mon Sep 17 00:00:00 2001 From: Sam Brockie Date: Fri, 25 Aug 2023 02:29:35 +0200 Subject: [PATCH 04/10] Optimize performance of postorder traversal for caching nodes --- opty/utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/opty/utils.py b/opty/utils.py index 05c0ec7..66d3c3d 100644 --- a/opty/utils.py +++ b/opty/utils.py @@ -72,15 +72,20 @@ def add_to_cache(node): print('Adding expression nodes to cache...') start = timer() - for node in sm.postorder_traversal(expr.args[2], keys=True): - if isinstance(node, sm.ImmutableDenseMatrix): - break - add_to_cache(node) + replacements, reduced_exprs = sm.cse(expr.args[2], replacement_symbols) + for replacement_symbol, reduced_subexpr in replacements: + replaced_subexpr = reduced_subexpr.xreplace(expr_to_replacement_cache) + replacement_to_reduced_expr_cache[replacement_symbol] = replaced_subexpr + expr_to_replacement_cache[reduced_subexpr] = replacement_symbol + for node in sm.postorder_traversal(reduced_subexpr): + _ = add_to_cache(node) + for reduced_expr in reduced_exprs: + for node in reduced_expr: + _ = add_to_cache(node) finish = timer() print(f'Completed in {finish - start:.2f}s') - reduced_matrix = node.xreplace(expr_to_replacement_cache) - reduced_exprs = [reduced_matrix] + reduced_matrix = sm.ImmutableDenseMatrix(reduced_exprs).xreplace(expr_to_replacement_cache) replacements = list(replacement_to_reduced_expr_cache.items()) partial_derivative_mapping = {} @@ -89,7 +94,6 @@ def add_to_cache(node): absolute_derivative = [sm.S.Zero] * len(wrt) absolute_derivative[i] = sm.S.One absolute_derivative_mapping[wrt_symbol] = sm.ImmutableDenseMatrix([absolute_derivative]) - new_replacements_mapping = {} print('Differentiating expression nodes...') start = timer() @@ -115,7 +119,9 @@ def add_to_cache(node): if entry in required_replacement_symbols or entry in wrt: continue children = list(replacement_to_reduced_expr_cache.get(entry, entry).free_symbols) - stack.extend(children) + for child in children: + if child not in required_replacement_symbols and child not in wrt: + stack.append(child) required_replacement_symbols.add(entry) finish = timer() print(f'Completed in {finish - start:.2f}s') From 6a295f5b7b1bcbcfb90db4febd6fc377f18c1cca Mon Sep 17 00:00:00 2001 From: Sam Brockie Date: Fri, 25 Aug 2023 13:01:38 +0200 Subject: [PATCH 05/10] Add missing raise NotImplementedError --- opty/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/opty/utils.py b/opty/utils.py index 66d3c3d..93503d4 100644 --- a/opty/utils.py +++ b/opty/utils.py @@ -60,6 +60,7 @@ def add_to_cache(node): 'The forward Jacobian differentiation algorithm can compute ' 'Jacobians with respect to column matrices.' ) + raise NotImplementedError replacement_symbols = numbered_symbols( prefix='_z', From 5ceb7a1dd10313584eb98e6d45bd76112b93459f Mon Sep 17 00:00:00 2001 From: Sam Brockie Date: Fri, 25 Aug 2023 13:05:29 +0200 Subject: [PATCH 06/10] Ensure wrt is column matrix --- opty/direct_collocation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opty/direct_collocation.py b/opty/direct_collocation.py index d0c4bbb..e66a368 100644 --- a/opty/direct_collocation.py +++ b/opty/direct_collocation.py @@ -1267,7 +1267,7 @@ def _gen_multi_arg_con_jac_func(self): # symbolic_partials = self.discrete_eom.jacobian(wrt) discrete_eom_matrix = sm.ImmutableDenseMatrix(self.discrete_eom) wrt_matrix = sm.ImmutableDenseMatrix([list(wrt)]) - symbolic_partials = forward_jacobian(discrete_eom_matrix, wrt_matrix) + symbolic_partials = forward_jacobian(discrete_eom_matrix, wrt_matrix.T) print('Jacobian differentiation finished.') # This generates a numerical function that evaluates the matrix of From 2f287a1ea14d0e312b19ba639577dae43177fc09 Mon Sep 17 00:00:00 2001 From: Sam Brockie Date: Fri, 25 Aug 2023 15:56:57 +0200 Subject: [PATCH 07/10] Unpin Cython --- .github/workflows/run-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 1929a3a..c3996b8 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -33,7 +33,7 @@ jobs: conda update -q conda conda create -q -n test-env python=${{ matrix.python-version }} conda activate test-env - conda install -y -q sympy 'cython<3.0' scipy cyipopt pytest pytest-cov coverage sphinx matplotlib-base openmp + conda install -y -q sympy cython scipy cyipopt pytest pytest-cov coverage sphinx matplotlib-base openmp conda info -a conda list python setup.py install From 9bc3e8cd4623dce4bc1db070c3644476dfe34d4a Mon Sep 17 00:00:00 2001 From: Sam Brockie Date: Fri, 25 Aug 2023 16:09:05 +0200 Subject: [PATCH 08/10] Add Python 3.10 to matrix, use libmamba solver --- .github/workflows/run-tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index c3996b8..a25a1ab 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.6, 3.7, 3.8, 3.9, "3.10"] steps: - uses: actions/checkout@v2 @@ -31,6 +31,8 @@ jobs: conda config --set always_yes yes --set changeps1 no conda config --add channels conda-forge conda update -q conda + conda install -n base conda-libmamba-solver + conda config --set solver libmamba conda create -q -n test-env python=${{ matrix.python-version }} conda activate test-env conda install -y -q sympy cython scipy cyipopt pytest pytest-cov coverage sphinx matplotlib-base openmp From 4e58fd1768c6a9b6ab9acaa9dc6ead4de548bc5b Mon Sep 17 00:00:00 2001 From: "Jason K. Moore" Date: Sat, 2 Sep 2023 21:44:33 +0200 Subject: [PATCH 09/10] Use pycompilation if installed. --- opty/utils.py | 58 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/opty/utils.py b/opty/utils.py index 93503d4..ad71e7a 100644 --- a/opty/utils.py +++ b/opty/utils.py @@ -26,6 +26,8 @@ import_kwargs={'fromlist': ['']}, catch=(RuntimeError,)) +pycompilation = sm.external.import_module('pycompilation') + def forward_jacobian(expr, wrt): @@ -461,27 +463,43 @@ def ufuncify_matrix(args, expr, const=None, tmp_dir=None, parallel=False): files[d['file_prefix'] + '.pyx'] = _cython_template.format(**d) files[d['file_prefix'] + '_setup.py'] = _setup_template.format(**d) - workingdir = os.getcwd() - os.chdir(codedir) + if pycompilation: + sources = [ + (d['file_prefix'] + '_h.h', files[d['file_prefix'] + '_h.h']), + (d['file_prefix'] + '_c.c', files[d['file_prefix'] + '_c.c']), + (d['file_prefix'] + '.pyx', files[d['file_prefix'] + '.pyx']), + ] - try: - sys.path.append(codedir) - for filename, code in files.items(): - with open(filename, 'w') as f: - f.write(code) - cmd = [sys.executable, d['file_prefix'] + '_setup.py', 'build_ext', - '--inplace'] - subprocess.call(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) - cython_module = importlib.import_module(d['file_prefix']) - finally: - module_counter += 1 - sys.path.remove(codedir) - os.chdir(workingdir) - if tmp_dir is None: - # NOTE : I can't figure out how to get rmtree to work on Windows, - # so I don't delete the directory on Windows. - if sys.platform != "win32": - shutil.rmtree(codedir) + options = ['warn', 'pic'] + if parallel: + options += ['openmp'] + + cython_module = pycompilation.compile_link_import_strings( + sources, options=options, std='c99', logger=True, + include_dirs=[np.get_include()], build_dir=codedir) + else: + workingdir = os.getcwd() + os.chdir(codedir) + + try: + sys.path.append(codedir) + for filename, code in files.items(): + with open(filename, 'w') as f: + f.write(code) + cmd = [sys.executable, d['file_prefix'] + '_setup.py', 'build_ext', + '--inplace'] + subprocess.call(cmd, stderr=subprocess.STDOUT, + stdout=subprocess.PIPE) + cython_module = importlib.import_module(d['file_prefix']) + finally: + module_counter += 1 + sys.path.remove(codedir) + os.chdir(workingdir) + if tmp_dir is None: + # NOTE : I can't figure out how to get rmtree to work on Windows, + # so I don't delete the directory on Windows. + if sys.platform != "win32": + shutil.rmtree(codedir) return getattr(cython_module, d['routine_name'] + '_loop') From 3423090f8d9de6c4feb2bdd660a9b909051fc52c Mon Sep 17 00:00:00 2001 From: "Jason K. Moore" Date: Sun, 3 Sep 2023 06:37:59 +0200 Subject: [PATCH 10/10] Drop use of header file, pycompilation now works. --- opty/utils.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/opty/utils.py b/opty/utils.py index ad71e7a..fbd1196 100644 --- a/opty/utils.py +++ b/opty/utils.py @@ -232,7 +232,6 @@ def parse_free(free, n, q, N): _c_template = """\ #include -#include "{file_prefix}_h.h" void {routine_name}(double matrix[{matrix_output_size}], {input_args}) {{ @@ -240,18 +239,13 @@ def parse_free(free, n, q, N): }} """ -_h_template = """\ -void {routine_name}(double matrix[{matrix_output_size}], {input_args}); -""" - _cython_template = """\ import numpy as np from cython.parallel import prange cimport numpy as np cimport cython -cdef extern from "{file_prefix}_h.h"{head_gil}: - void {routine_name}(double matrix[{matrix_output_size}], {input_args}) +cdef extern void c_{routine_name} "{routine_name}" (double matrix[{matrix_output_size}], {input_args}){head_gil} @cython.boundscheck(False) @cython.wraparound(False) @@ -262,7 +256,7 @@ def {routine_name}_loop(np.ndarray[np.double_t, ndim=2] matrix, {numpy_typed_inp cdef int i for i in {loop_sig}: - {routine_name}(&matrix[i, 0], {indexed_input_args}) + c_{routine_name}(&matrix[i, 0], {indexed_input_args}) return matrix.reshape(n, {num_rows}, {num_cols}) """ @@ -459,23 +453,21 @@ def ufuncify_matrix(args, expr, const=None, tmp_dir=None, parallel=False): files = {} files[d['file_prefix'] + '_c.c'] = _c_template.format(**d) - files[d['file_prefix'] + '_h.h'] = _h_template.format(**d) files[d['file_prefix'] + '.pyx'] = _cython_template.format(**d) files[d['file_prefix'] + '_setup.py'] = _setup_template.format(**d) if pycompilation: sources = [ - (d['file_prefix'] + '_h.h', files[d['file_prefix'] + '_h.h']), (d['file_prefix'] + '_c.c', files[d['file_prefix'] + '_c.c']), (d['file_prefix'] + '.pyx', files[d['file_prefix'] + '.pyx']), ] - options = ['warn', 'pic'] + options = [] if parallel: options += ['openmp'] cython_module = pycompilation.compile_link_import_strings( - sources, options=options, std='c99', logger=True, + sources, options=options, std='c99', include_dirs=[np.get_include()], build_dir=codedir) else: workingdir = os.getcwd()