From e5690487f2ae0f02648083acb14ff4e98cc0fd87 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 28 Feb 2024 13:25:07 -0600 Subject: [PATCH 01/41] WIP bazel --- WORKSPACE.bazel | 12 +++++- bazel/third_party/softblas/BUILD.bazel | 0 bazel/third_party/softblas/softblas.BUILD | 52 +++++++++++++++++++++++ pkg/noun/BUILD.bazel | 1 + pkg/noun/jets/q.h | 2 + pkg/noun/jets/tree.c | 7 +++ pkg/noun/jets/w.h | 2 + 7 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 bazel/third_party/softblas/BUILD.bazel create mode 100644 bazel/third_party/softblas/softblas.BUILD diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index d3c4848fd1..5553f2357c 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -291,6 +291,14 @@ versioned_http_archive( version = "2.14", ) +versioned_http_archive( + name = "softblas", + build_file = "//bazel/third_party/softblas:softblas.BUILD", + # sha256 = "", + url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", + version = "bd637fdb23ac4ebd4048eb546633262ddf647b18", +) + versioned_http_archive( name = "softfloat", build_file = "//bazel/third_party/softfloat:softfloat.BUILD", @@ -354,10 +362,10 @@ versioned_http_archive( versioned_http_archive( name = "zlib", build_file = "//bazel/third_party/zlib:zlib.BUILD", - sha256 = "ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e", + sha256 = "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23", strip_prefix = "zlib-{version}", url = "https://www.zlib.net/zlib-{version}.tar.gz", - version = "1.3", + version = "1.3.1", ) # diff --git a/bazel/third_party/softblas/BUILD.bazel b/bazel/third_party/softblas/BUILD.bazel new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bazel/third_party/softblas/softblas.BUILD b/bazel/third_party/softblas/softblas.BUILD new file mode 100644 index 0000000000..8d669a7f77 --- /dev/null +++ b/bazel/third_party/softblas/softblas.BUILD @@ -0,0 +1,52 @@ +# FILEPATH: /home/neal/lagoon/vere/bazel/third_party/softblas/softblas.BUILD + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +cc_library( + name = "softblas", + visibility = ["//visibility:public"], + hdrs = ["include/softblas.h"], + # includes = ["include"], + srcs = ["src/blas/level1/sasum.c", + "src/blas/level1/dasum.c", + "src/blas/level1/hasum.c", + "src/blas/level1/qasum.c", + "src/blas/level1/saxpy.c", + "src/blas/level1/daxpy.c", + "src/blas/level1/haxpy.c", + "src/blas/level1/qaxpy.c", + "src/blas/level1/scopy.c", + "src/blas/level1/dcopy.c", + "src/blas/level1/hcopy.c", + "src/blas/level1/qcopy.c", + "src/blas/level1/sdot.c", + "src/blas/level1/ddot.c", + "src/blas/level1/hdot.c", + "src/blas/level1/qdot.c", + "src/blas/level1/snrm2.c", + "src/blas/level1/dnrm2.c", + "src/blas/level1/hnrm2.c", + "src/blas/level1/qnrm2.c", + "src/blas/level1/sscal.c", + "src/blas/level1/dscal.c", + "src/blas/level1/hscal.c", + "src/blas/level1/qscal.c", + "src/blas/level1/sswap.c", + "src/blas/level1/dswap.c", + "src/blas/level1/hswap.c", + "src/blas/level1/qswap.c", + "src/blas/level1/isamax.c", + "src/blas/level1/idamax.c", + "src/blas/level1/ihamax.c", + "src/blas/level1/iqamax.c", + "src/blas/level2/sgemv.c", + "src/blas/level2/dgemv.c", + "src/blas/level2/hgemv.c", + "src/blas/level2/qgemv.c", + "src/blas/level3/sgemm.c", + "src/blas/level3/dgemm.c", + "src/blas/level3/hgemm.c", + "src/blas/level3/qgemm.c" + ], + deps = ["@softfloat"], +) diff --git a/pkg/noun/BUILD.bazel b/pkg/noun/BUILD.bazel index a6b8de6d7a..1e16d21573 100644 --- a/pkg/noun/BUILD.bazel +++ b/pkg/noun/BUILD.bazel @@ -38,6 +38,7 @@ vere_library( "@openssl", "@pdjson", "@sigsegv", + "@softblas", "@softfloat", "@urcrypt", ] + select({ diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 33c63ef42b..8b99103e56 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -247,6 +247,8 @@ u3_noun u3qfp_nepo(u3_noun, u3_noun); u3_noun u3qfp_rake(u3_noun); + u3_noun u3qf_la_add_real(u3_noun, u3_noun, u3_noun); + # define u3qfu_van_fan 28 # define u3qfu_van_rib 58 # define u3qfu_van_vet 59 diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index b0264327d1..0499a5202d 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2147,6 +2147,11 @@ static u3j_core _139_hex_json_d[] = {} }; +/* linear algebra jets +*/ + +static u3j_harm _139_hex_lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; + static u3j_core _139_hex_d[] = { { "lore", 63, _140_hex_lore_a, 0, no_hashes }, { "leer", 63, _140_hex_leer_a, 0, no_hashes }, @@ -2165,6 +2170,8 @@ static u3j_core _139_hex_d[] = { "secp", 6, 0, _140_hex_secp_d, no_hashes }, { "mimes", 31, 0, _140_hex_mimes_d, no_hashes }, { "json", 31, 0, _139_hex_json_d, no_hashes }, + + { "add", 7, _139_hex_lagoon_add_a, 0, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index d838416c03..5cc40f1975 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -331,4 +331,6 @@ u3_noun u3wfu_repo(u3_noun); u3_noun u3wfu_rest(u3_noun); + u3_noun u3wf_la_add(u3_noun); + #endif /* ifndef U3_JETS_W_H */ From 3cf0ad60ffb2a15f965e7819deaacb580447076b Mon Sep 17 00:00:00 2001 From: Matthew LeVan Date: Wed, 28 Feb 2024 14:37:22 -0500 Subject: [PATCH 02/41] WIP bazel builds but `qgemm.c` needs `#include ` --- WORKSPACE.bazel | 1 + bazel/third_party/softblas/softblas.BUILD | 84 ++++++++++++----------- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index 5553f2357c..ddb83b8361 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -294,6 +294,7 @@ versioned_http_archive( versioned_http_archive( name = "softblas", build_file = "//bazel/third_party/softblas:softblas.BUILD", + strip_prefix = "SoftBLAS-{version}", # sha256 = "", url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", version = "bd637fdb23ac4ebd4048eb546633262ddf647b18", diff --git a/bazel/third_party/softblas/softblas.BUILD b/bazel/third_party/softblas/softblas.BUILD index 8d669a7f77..2ca6b46b1a 100644 --- a/bazel/third_party/softblas/softblas.BUILD +++ b/bazel/third_party/softblas/softblas.BUILD @@ -6,47 +6,49 @@ cc_library( name = "softblas", visibility = ["//visibility:public"], hdrs = ["include/softblas.h"], - # includes = ["include"], - srcs = ["src/blas/level1/sasum.c", - "src/blas/level1/dasum.c", - "src/blas/level1/hasum.c", - "src/blas/level1/qasum.c", - "src/blas/level1/saxpy.c", - "src/blas/level1/daxpy.c", - "src/blas/level1/haxpy.c", - "src/blas/level1/qaxpy.c", - "src/blas/level1/scopy.c", - "src/blas/level1/dcopy.c", - "src/blas/level1/hcopy.c", - "src/blas/level1/qcopy.c", - "src/blas/level1/sdot.c", - "src/blas/level1/ddot.c", - "src/blas/level1/hdot.c", - "src/blas/level1/qdot.c", - "src/blas/level1/snrm2.c", - "src/blas/level1/dnrm2.c", - "src/blas/level1/hnrm2.c", - "src/blas/level1/qnrm2.c", - "src/blas/level1/sscal.c", - "src/blas/level1/dscal.c", - "src/blas/level1/hscal.c", - "src/blas/level1/qscal.c", - "src/blas/level1/sswap.c", - "src/blas/level1/dswap.c", - "src/blas/level1/hswap.c", - "src/blas/level1/qswap.c", - "src/blas/level1/isamax.c", - "src/blas/level1/idamax.c", - "src/blas/level1/ihamax.c", - "src/blas/level1/iqamax.c", - "src/blas/level2/sgemv.c", - "src/blas/level2/dgemv.c", - "src/blas/level2/hgemv.c", - "src/blas/level2/qgemv.c", - "src/blas/level3/sgemm.c", - "src/blas/level3/dgemm.c", - "src/blas/level3/hgemm.c", - "src/blas/level3/qgemm.c" + includes = ["include"], + srcs = [ + "include/softblas.h", + "src/blas/level1/sasum.c", + "src/blas/level1/dasum.c", + "src/blas/level1/hasum.c", + "src/blas/level1/qasum.c", + "src/blas/level1/saxpy.c", + "src/blas/level1/daxpy.c", + "src/blas/level1/haxpy.c", + "src/blas/level1/qaxpy.c", + "src/blas/level1/scopy.c", + "src/blas/level1/dcopy.c", + "src/blas/level1/hcopy.c", + "src/blas/level1/qcopy.c", + "src/blas/level1/sdot.c", + "src/blas/level1/ddot.c", + "src/blas/level1/hdot.c", + "src/blas/level1/qdot.c", + "src/blas/level1/snrm2.c", + "src/blas/level1/dnrm2.c", + "src/blas/level1/hnrm2.c", + "src/blas/level1/qnrm2.c", + "src/blas/level1/sscal.c", + "src/blas/level1/dscal.c", + "src/blas/level1/hscal.c", + "src/blas/level1/qscal.c", + "src/blas/level1/sswap.c", + "src/blas/level1/dswap.c", + "src/blas/level1/hswap.c", + "src/blas/level1/qswap.c", + "src/blas/level1/isamax.c", + "src/blas/level1/idamax.c", + "src/blas/level1/ihamax.c", + "src/blas/level1/iqamax.c", + "src/blas/level2/sgemv.c", + "src/blas/level2/dgemv.c", + "src/blas/level2/hgemv.c", + "src/blas/level2/qgemv.c", + "src/blas/level3/sgemm.c", + "src/blas/level3/dgemm.c", + "src/blas/level3/hgemm.c", + "src/blas/level3/qgemm.c" ], deps = ["@softfloat"], ) From 1beb9c35dd9a0d77daac95a5e1ad2b5c02ecb997 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 28 Feb 2024 13:49:18 -0600 Subject: [PATCH 03/41] Include new commit hash. --- WORKSPACE.bazel | 2 +- bazel/third_party/softblas/softblas.BUILD | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index 5553f2357c..3938734312 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -296,7 +296,7 @@ versioned_http_archive( build_file = "//bazel/third_party/softblas:softblas.BUILD", # sha256 = "", url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", - version = "bd637fdb23ac4ebd4048eb546633262ddf647b18", + version = "cbf3dfff5882fd03f28a74c7c0c6ef4c27ec176d", ) versioned_http_archive( diff --git a/bazel/third_party/softblas/softblas.BUILD b/bazel/third_party/softblas/softblas.BUILD index 8d669a7f77..101487d86a 100644 --- a/bazel/third_party/softblas/softblas.BUILD +++ b/bazel/third_party/softblas/softblas.BUILD @@ -6,7 +6,7 @@ cc_library( name = "softblas", visibility = ["//visibility:public"], hdrs = ["include/softblas.h"], - # includes = ["include"], + includes = ["include"], srcs = ["src/blas/level1/sasum.c", "src/blas/level1/dasum.c", "src/blas/level1/hasum.c", From e91829d00b15de10c582696548bd2dc5968b24b6 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Thu, 29 Feb 2024 10:30:41 -0600 Subject: [PATCH 04/41] Post jet fundamentals. --- pkg/c3/motes.h | 6 ++ pkg/noun/jets/f/lagoon.c | 157 +++++++++++++++++++++++++++++++++++++++ pkg/noun/jets/q.h | 2 +- pkg/noun/jets/tree.c | 10 ++- 4 files changed, 171 insertions(+), 4 deletions(-) create mode 100644 pkg/noun/jets/f/lagoon.c diff --git a/pkg/c3/motes.h b/pkg/c3/motes.h index db17834057..3cbce8c48c 100644 --- a/pkg/c3/motes.h +++ b/pkg/c3/motes.h @@ -258,6 +258,7 @@ # define c3__corp c3_s4('c','o','r','p') # define c3__corp c3_s4('c','o','r','p') # define c3__cow c3_s3('c','o','w') +# define c3__cplx c3_s3('c','p','l','x') # define c3__cpu c3_s3('c','p','u') # define c3__crad c3_s4('c','r','a','d') # define c3__cram c3_s4('c','r','a','m') @@ -430,6 +431,7 @@ # define c3__fit c3_s3('f','i','t') # define c3__fits c3_s4('f','i','t','s') # define c3__fix c3_s3('f','i','x') +# define c3__fixp c3_s3('f','i','x','p') # define c3__fl c3_s2('f','l') # define c3__flac c3_s4('f','l','a','c') # define c3__flag c3_s4('f','l','a','g') @@ -602,6 +604,7 @@ # define c3__info c3_s4('i','n','f','o') # define c3__init c3_s4('i','n','i','t') # define c3__ins c3_s3('i','n','s') +# define c3__int2 c3_s4('i','n','t','2') # define c3__into c3_s4('i','n','t','o') # define c3__intr c3_s4('i','n','t','r') # define c3__inuk c3_s4('i','n','u','k') @@ -970,6 +973,7 @@ # define c3__rasp c3_s4('r','a','s','p') # define c3__raw c3_s3('r','a','w') # define c3__read c3_s4('r','e','a','d') +# define c3__real c3_s4('r','e','a','l') # define c3__reck c3_s4('r','e','c','k') # define c3__reef c3_s4('r','e','e','f') # define c3__resd c3_s4('r','e','s','d') @@ -1229,11 +1233,13 @@ # define c3__ubin c3_s4('u','b','i','n') # define c3__ubit c3_s4('u','b','i','t') # define c3__ud c3_s2('u','d') +# define c3__uint c3_s4('u','i','n','t') # define c3__ulib c3_s4('u','l','i','b') # define c3__un c3_s2('u','n') # define c3__uniq c3_s4('u','n','i','q') # define c3__unix c3_s4('u','n','i','x') # define c3__unt c3_s3('u','n','t') +# define c3__unum c3_s3('u','n','u','m') # define c3__up c3_s2('u','p') # define c3__url c3_s3('u','r','l') # define c3__urth c3_s4('u','r','t','h') diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c new file mode 100644 index 0000000000..e45b1ce124 --- /dev/null +++ b/pkg/noun/jets/f/lagoon.c @@ -0,0 +1,157 @@ +/// @file + +#include "jets/q.h" +#include "jets/w.h" + +#include "noun.h" +#include "softfloat.h" +#include "softblas.h" + +#include + + union half { + float16_t h; + c3_w c; + }; + + union sing { + float32_t s; + c3_w c; + }; + + union doub { + float64_t d; + c3_d c; + }; + + union quad { + float128_t q; + c3_d c[2]; + }; + + static inline void + _set_rounding(c3_w a) + { + switch ( a ) + { + default: + u3m_bail(c3__fail); + break; + case c3__n: + softfloat_roundingMode = softfloat_round_near_even; + break; + case c3__z: + softfloat_roundingMode = softfloat_round_minMag; + break; + case c3__u: + softfloat_roundingMode = softfloat_round_max; + break; + case c3__d: + softfloat_roundingMode = softfloat_round_min; + break; + } + } + +/* add +*/ + u3_noun + u3qf_la_add_real(u3_noun a_data, + u3_noun b_data, + u3_noun shape, + u3_noun bloq, + u3_noun rnd) + { + + fprintf(stderr, ">> u3qf_la_add_real\n"); + + // SoftBLAS needs to be used here. + return u3_none; + + // // Split a into component atoms. + // // (roll shape mul) => 2 x 3 = 6 + // c3_w size = 1; + // u3_atom shp = shape; + // while (u3_nul != shp) { + // shp = u3t(shp); + // size *= shp; + // } + + + + + // return u3i_word(len_w); + + + // union sing c, d, e; + // _set_rounding(r); + // c.c = u3r_word(0, a); + // d.c = u3r_word(0, b); + // e.s = _nan_unify_s(f32_add(c.s, d.s)); + + // return u3i_words(1, &e.c); + } + + u3_noun + u3wf_la_add(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, a_data, + b_meta, b_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &a_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &b_data, + 0) || + c3n == u3ud(a_data) || + c3n == u3ud(b_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun a_shape, a_bloq, a_kind, + b_shape, b_bloq, b_kind, + rnd, fxp; + if ( c3n == u3r_mean(a_meta, + 2, &a_shape, + 6, &a_bloq, + 7, &a_kind, + 0) || + c3n == u3r_mean(b_meta, + 2, &b_shape, + 6, &b_bloq, + 7, &b_kind, + 0) || + c3n == u3r_sing(a_shape, b_shape) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + c3n == u3r_mean(cor, 60, &rnd, 61, &fxp, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + return u3qf_la_add_real(a_data, b_data, a_shape, a_bloq, rnd); + + // case c3__int2: + // return u3qf_la_add_int2(a_data, b_data, a_shape, a_bloq); + + // case c3__uint: + // return u3qf_la_add_uint(a_data, b_data, a_shape, a_bloq); + + // case c3__cplx: + // return u3qf_la_add_cplx(a_data, b_data, a_shape, a_bloq, rnd); + + // case c3__unum: + // return u3qf_la_add_unum(a_data, b_data, a_shape, a_bloq); + + // case c3__fixp: + // return u3qf_la_add_fixp(a_data, b_data, a_shape, a_bloq); + + default: + return u3_none; + } + } + } + } diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 8b99103e56..c7cb5ccfd0 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -247,7 +247,7 @@ u3_noun u3qfp_nepo(u3_noun, u3_noun); u3_noun u3qfp_rake(u3_noun); - u3_noun u3qf_la_add_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_add_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); # define u3qfu_van_fan 28 # define u3qfu_van_rib 58 diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 0499a5202d..375f0e1610 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2148,9 +2148,13 @@ static u3j_core _139_hex_json_d[] = }; /* linear algebra jets + XX move to outer _hep_ core for /lib? */ - -static u3j_harm _139_hex_lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; +static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; +static u3j_core _139_hex__lagoon_d[] = + { { "add-rays", 7, _139_hex__lagoon_add_a, 0, no_hashes }, + {} + }; static u3j_core _139_hex_d[] = { { "lore", 63, _140_hex_lore_a, 0, no_hashes }, @@ -2171,7 +2175,7 @@ static u3j_core _139_hex_d[] = { "mimes", 31, 0, _140_hex_mimes_d, no_hashes }, { "json", 31, 0, _139_hex_json_d, no_hashes }, - { "add", 7, _139_hex_lagoon_add_a, 0, no_hashes }, + { "lagoon", 31, 0, _139_hex__lagoon_d, no_hashes }, {} }; From 4be17fc8b90eb7c6b68f4510b3541f50f2806647 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Thu, 29 Feb 2024 13:11:05 -0600 Subject: [PATCH 05/41] Hints for Lagoon work. --- pkg/noun/jets/f/lagoon.c | 29 +++++++++++++++++------------ pkg/noun/jets/tree.c | 10 ++++++++-- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index e45b1ce124..5b27ccb31e 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -97,6 +97,7 @@ // Each argument is a ray, [=meta data=@ux] u3_noun a_meta, a_data, b_meta, b_data; + fprintf(stderr, "\n>> u3wf_la_add\n"); if ( c3n == u3r_mean(cor, u3x_sam_4, &a_meta, @@ -109,27 +110,31 @@ { return u3m_bail(c3__exit); } else { - u3_noun a_shape, a_bloq, a_kind, - b_shape, b_bloq, b_kind, - rnd, fxp; + u3_noun a_shape, a_bloq, a_kind, a_fxp, + b_shape, b_bloq, b_kind, b_fxp, + rnd; if ( c3n == u3r_mean(a_meta, - 2, &a_shape, - 6, &a_bloq, - 7, &a_kind, - 0) || + 2, &a_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || c3n == u3r_mean(b_meta, - 2, &b_shape, - 6, &b_bloq, - 7, &b_kind, - 0) || + 2, &b_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || c3n == u3r_sing(a_shape, b_shape) || c3n == u3r_sing(a_bloq, b_bloq) || c3n == u3r_sing(a_kind, b_kind) || - c3n == u3r_mean(cor, 60, &rnd, 61, &fxp, 0) + // fxp does not need to match so no check + c3n == u3r_mean(cor, 31, &rnd, 0) ) { return u3m_bail(c3__exit); } else { + fprintf(stderr, ">> u3wf_la_add: a_kind: %x\n", a_kind); switch (a_kind) { case c3__real: return u3qf_la_add_real(a_data, b_data, a_shape, a_bloq, rnd); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 375f0e1610..b6597ffa7f 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2148,14 +2148,20 @@ static u3j_core _139_hex_json_d[] = }; /* linear algebra jets - XX move to outer _hep_ core for /lib? + XX move to outer _sep_ core for /lib? eventually +static u3j_core _139_sep_d[] = */ static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; -static u3j_core _139_hex__lagoon_d[] = +static u3j_core _139_hex__la_core_d[] = { { "add-rays", 7, _139_hex__lagoon_add_a, 0, no_hashes }, {} }; +static u3j_core _139_hex__lagoon_d[] = + { { "la-core", 7, 0, _139_hex__la_core_d, no_hashes }, + {} + }; + static u3j_core _139_hex_d[] = { { "lore", 63, _140_hex_lore_a, 0, no_hashes }, { "leer", 63, _140_hex_leer_a, 0, no_hashes }, From 81718e2d75e4e49471be12ae697ca10fe574878e Mon Sep 17 00:00:00 2001 From: Sigilante Date: Thu, 29 Feb 2024 14:04:23 -0600 Subject: [PATCH 06/41] ++add for Lagoon working. --- pkg/noun/jets/f/lagoon.c | 89 +++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 19 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 5b27ccb31e..3aa27e63d3 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -52,6 +52,18 @@ } } +/* shape +*/ + static inline uint64_t _get_shape(u3_noun shape) + { + uint64_t res = 1; + while (u3_nul != shape) { + res = res * u3h(shape); + shape = u3t(shape); + } + return res; + } + /* add */ u3_noun @@ -64,31 +76,72 @@ fprintf(stderr, ">> u3qf_la_add_real\n"); - // SoftBLAS needs to be used here. - return u3_none; + // Unpack the data as a byte array for SoftBLAS. + uint64_t len_a = _get_shape(shape); + uint8_t* a_bytes = (uint8_t*)malloc(len_a*sizeof(uint8_t)); + u3r_bytes(0, len_a, a_bytes, a_data); + uint8_t* b_bytes = (uint8_t*)malloc(len_a*sizeof(uint8_t)); + u3r_bytes(0, len_a, b_bytes, b_data); + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: + haxpy(len_a, (float16_t){SB_REAL16_ONE}, (float16_t*)a_bytes, 1, (float16_t*)b_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes(len_a, b_bytes); - // // Split a into component atoms. - // // (roll shape mul) => 2 x 3 = 6 - // c3_w size = 1; - // u3_atom shp = shape; - // while (u3_nul != shp) { - // shp = u3t(shp); - // size *= shp; - // } + // Clean up. + free(a_bytes); + free(b_bytes); + return u3nc(a_data, r_data); + break; + case 5: + saxpy(len_a, (float32_t){SB_REAL32_ONE}, (float32_t*)a_bytes, 1, (float32_t*)b_bytes, 1); + // Unpack the result back into a noun. + r_data = u3i_bytes(len_a, b_bytes); - // return u3i_word(len_w); + // Clean up. + free(a_bytes); + free(b_bytes); + return u3nc(a_data, r_data); + break; - // union sing c, d, e; - // _set_rounding(r); - // c.c = u3r_word(0, a); - // d.c = u3r_word(0, b); - // e.s = _nan_unify_s(f32_add(c.s, d.s)); + case 6: + daxpy(len_a, (float64_t){SB_REAL64_ONE}, (float64_t*)a_bytes, 1, (float64_t*)b_bytes, 1); - // return u3i_words(1, &e.c); + // Unpack the result back into a noun. + r_data = u3i_bytes(len_a, b_bytes); + + // Clean up. + free(a_bytes); + free(b_bytes); + + return u3nc(a_data, r_data); + break; + + case 7: + qaxpy(len_a, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)a_bytes, 1, (float128_t*)b_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes(len_a, b_bytes); + + // Clean up. + free(a_bytes); + free(b_bytes); + + return u3nc(a_data, r_data); + break; + + default: + return u3_none; + } } u3_noun @@ -97,7 +150,6 @@ // Each argument is a ray, [=meta data=@ux] u3_noun a_meta, a_data, b_meta, b_data; - fprintf(stderr, "\n>> u3wf_la_add\n"); if ( c3n == u3r_mean(cor, u3x_sam_4, &a_meta, @@ -134,7 +186,6 @@ { return u3m_bail(c3__exit); } else { - fprintf(stderr, ">> u3wf_la_add: a_kind: %x\n", a_kind); switch (a_kind) { case c3__real: return u3qf_la_add_real(a_data, b_data, a_shape, a_bloq, rnd); From 1268c90a89c9eb64cb68c68b565e563bb4bda370 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Tue, 5 Mar 2024 15:52:40 -0600 Subject: [PATCH 07/41] Works with SoftBLAS. --- MODULE.bazel | 6 + WORKSPACE.bazel | 2 +- bazel/third_party/softblas/softblas.BUILD | 1 + pkg/noun/jets/f/lagoon.c | 256 ++++++++++++++++++---- pkg/noun/jets/q.h | 3 +- pkg/noun/jets/tree.c | 4 +- pkg/noun/jets/w.h | 1 + 7 files changed, 232 insertions(+), 41 deletions(-) create mode 100644 MODULE.bazel diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000000..00bb18361f --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,6 @@ +############################################################################### +# Bazel now uses Bzlmod by default to manage external dependencies. +# Please consider migrating your external dependencies from WORKSPACE to MODULE.bazel. +# +# For more details, please check https://github.com/bazelbuild/bazel/issues/18958 +############################################################################### diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index 85ba2a48b0..3b0296fd0f 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -297,7 +297,7 @@ versioned_http_archive( strip_prefix = "SoftBLAS-{version}", # sha256 = "", url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", - version = "cbf3dfff5882fd03f28a74c7c0c6ef4c27ec176d", + version = "bace30db3944c0f2bb2b6cac0db9965675ad842e", ) versioned_http_archive( diff --git a/bazel/third_party/softblas/softblas.BUILD b/bazel/third_party/softblas/softblas.BUILD index 2ca6b46b1a..3442c5da45 100644 --- a/bazel/third_party/softblas/softblas.BUILD +++ b/bazel/third_party/softblas/softblas.BUILD @@ -9,6 +9,7 @@ cc_library( includes = ["include"], srcs = [ "include/softblas.h", + "src/softblas_state.c", "src/blas/level1/sasum.c", "src/blas/level1/dasum.c", "src/blas/level1/hasum.c", diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 3aa27e63d3..3622165141 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -7,6 +7,7 @@ #include "softfloat.h" #include "softblas.h" +#include #include union half { @@ -29,59 +30,75 @@ c3_d c[2]; }; + // $?(%n %u %d %z %a) static inline void _set_rounding(c3_w a) { + // We could use SoftBLAS set_rounding() to set the SoftFloat + // mode as well, but it's more explicit to do it here since + // we may use SoftFloat in any given Lagoon jet and we want + // you, dear developer, to see this set here. + fprintf(stderr, "%x %c\n", a, a); switch ( a ) { default: u3m_bail(c3__fail); break; + // %n - near case c3__n: softfloat_roundingMode = softfloat_round_near_even; + softblas_roundingMode = 'n'; break; + // %z - zero case c3__z: softfloat_roundingMode = softfloat_round_minMag; + softblas_roundingMode = 'z'; break; + // %u - up case c3__u: softfloat_roundingMode = softfloat_round_max; + softblas_roundingMode = 'u'; break; + // %d - down case c3__d: softfloat_roundingMode = softfloat_round_min; + softblas_roundingMode = 'd'; + break; + // %a - away + case c3__a: + softfloat_roundingMode = softfloat_round_near_maxMag; + softblas_roundingMode = 'a'; break; } } /* shape */ - static inline uint64_t _get_shape(u3_noun shape) + static inline uint64_t _get_length(u3_noun shape) { - uint64_t res = 1; + uint64_t len = 1; while (u3_nul != shape) { - res = res * u3h(shape); + len = len * u3h(shape); shape = u3t(shape); } - return res; + return len; } -/* add +/* add - axpy = 1*x+y */ u3_noun u3qf_la_add_real(u3_noun a_data, u3_noun b_data, u3_noun shape, - u3_noun bloq, - u3_noun rnd) + u3_noun bloq) { - - fprintf(stderr, ">> u3qf_la_add_real\n"); - - // Unpack the data as a byte array for SoftBLAS. - uint64_t len_a = _get_shape(shape); - uint8_t* a_bytes = (uint8_t*)malloc(len_a*sizeof(uint8_t)); - u3r_bytes(0, len_a, a_bytes, a_data); - uint8_t* b_bytes = (uint8_t*)malloc(len_a*sizeof(uint8_t)); - u3r_bytes(0, len_a, b_bytes, b_data); + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * bloq; + uint8_t* a_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, a_bytes, a_data); + uint8_t* b_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, b_bytes, b_data); u3_noun r_data; @@ -91,55 +108,145 @@ haxpy(len_a, (float16_t){SB_REAL16_ONE}, (float16_t*)a_bytes, 1, (float16_t*)b_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(len_a, b_bytes); + r_data = u3i_bytes(siz_a*sizeof(uint8_t), b_bytes); // Clean up. - free(a_bytes); - free(b_bytes); + u3a_free(a_bytes); + u3a_free(b_bytes); - return u3nc(a_data, r_data); - break; + return r_data; case 5: saxpy(len_a, (float32_t){SB_REAL32_ONE}, (float32_t*)a_bytes, 1, (float32_t*)b_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(len_a, b_bytes); + r_data = u3i_bytes(siz_a*sizeof(uint8_t), b_bytes); // Clean up. - free(a_bytes); - free(b_bytes); + u3a_free(a_bytes); + u3a_free(b_bytes); - return u3nc(a_data, r_data); - break; + return r_data; case 6: daxpy(len_a, (float64_t){SB_REAL64_ONE}, (float64_t*)a_bytes, 1, (float64_t*)b_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(len_a, b_bytes); + r_data = u3i_bytes(siz_a*sizeof(uint8_t), b_bytes); // Clean up. - free(a_bytes); - free(b_bytes); + u3a_free(a_bytes); + u3a_free(b_bytes); - return u3nc(a_data, r_data); - break; + return r_data; case 7: qaxpy(len_a, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)a_bytes, 1, (float128_t*)b_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(len_a, b_bytes); + r_data = u3i_bytes(siz_a*sizeof(uint8_t), b_bytes); + + // Clean up. + u3a_free(a_bytes); + u3a_free(b_bytes); + + return r_data; + + default: + u3a_free(a_bytes); + u3a_free(b_bytes); + + return u3_none; + } + } + +/* mmul +*/ + u3_noun + u3qf_la_mmul_real(u3_noun a_data, + u3_noun b_data, + u3_noun a_shape, + u3_noun b_shape, + u3_noun bloq) + { + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t M = u3h(a_shape); + uint64_t Na = u3h(u3t(a_shape)); + uint64_t Nb = u3h(b_shape); + uint64_t P = u3h(u3t(b_shape)); + + assert(u3_nul == u3t(u3t(a_shape))); + assert(Na == Nb); + uint64_t N = Na; + assert(u3_nul == u3t(u3t(b_shape))); + + uint8_t* a_bytes = (uint8_t*)u3a_malloc((M*N)*sizeof(uint8_t)); + u3r_bytes(0, M*N, a_bytes, a_data); + uint8_t* b_bytes = (uint8_t*)u3a_malloc((N*P)*sizeof(uint8_t)); + u3r_bytes(0, N*P, b_bytes, b_data); + uint8_t* c_bytes = (uint8_t*)u3a_malloc((M*P)*sizeof(uint8_t)); + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: + hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)a_bytes, N, (float16_t*)b_bytes, N, (float16_t){SB_REAL16_ZERO}, (float16_t*)c_bytes, P); + + // Unpack the result back into a noun. + r_data = u3i_bytes(M*P, c_bytes); + + // Clean up. + u3a_free(a_bytes); + u3a_free(b_bytes); + u3a_free(c_bytes); + + return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + + case 5: + sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)a_bytes, N, (float32_t*)b_bytes, N, (float32_t){SB_REAL32_ZERO}, (float32_t*)c_bytes, P); + + // Unpack the result back into a noun. + r_data = u3i_bytes(M*P, c_bytes); + + // Clean up. + u3a_free(a_bytes); + u3a_free(b_bytes); + u3a_free(c_bytes); + + return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + + case 6: + dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)a_bytes, N, (float64_t*)b_bytes, N, (float64_t){SB_REAL64_ZERO}, (float64_t*)c_bytes, P); + + // Unpack the result back into a noun. + r_data = u3i_bytes(M*P, c_bytes); // Clean up. - free(a_bytes); - free(b_bytes); + u3a_free(a_bytes); + u3a_free(b_bytes); + u3a_free(c_bytes); - return u3nc(a_data, r_data); - break; + return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + + case 7: + qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)a_bytes, N, (float128_t*)b_bytes, N, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)c_bytes, P); + + // Unpack the result back into a noun. + r_data = u3i_bytes(M*P, c_bytes); + + // Clean up. + u3a_free(a_bytes); + u3a_free(b_bytes); + u3a_free(c_bytes); + + return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); default: + u3a_free(a_bytes); + u3a_free(b_bytes); + u3a_free(c_bytes); + return u3_none; } } @@ -181,14 +288,87 @@ c3n == u3r_sing(a_bloq, b_bloq) || c3n == u3r_sing(a_kind, b_kind) || // fxp does not need to match so no check - c3n == u3r_mean(cor, 31, &rnd, 0) + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_add_real(a_data, b_data, a_shape, a_bloq); + return u3nc(u3nq(a_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + // case c3__int2: + // return u3qf_la_add_int2(a_data, b_data, a_shape, a_bloq); + + // case c3__uint: + // return u3qf_la_add_uint(a_data, b_data, a_shape, a_bloq); + + // case c3__cplx: + // _set_rounding(rnd); + // return u3qf_la_add_cplx(a_data, b_data, a_shape, a_bloq); + + // case c3__unum: + // return u3qf_la_add_unum(a_data, b_data, a_shape, a_bloq); + + // case c3__fixp: + // return u3qf_la_add_fixp(a_data, b_data, a_shape, a_bloq); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_mmul(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, a_data, + b_meta, b_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &a_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &b_data, + 0) || + c3n == u3ud(a_data) || + c3n == u3ud(b_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun a_shape, a_bloq, a_kind, a_fxp, + b_shape, b_bloq, b_kind, b_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &a_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + c3n == u3r_mean(b_meta, + 2, &b_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) ) { return u3m_bail(c3__exit); } else { switch (a_kind) { case c3__real: - return u3qf_la_add_real(a_data, b_data, a_shape, a_bloq, rnd); + _set_rounding(rnd); + return u3qf_la_mmul_real(a_data, b_data, a_shape, b_shape, a_bloq); + break; // case c3__int2: // return u3qf_la_add_int2(a_data, b_data, a_shape, a_bloq); diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index c7cb5ccfd0..ddeed9afcf 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -247,7 +247,8 @@ u3_noun u3qfp_nepo(u3_noun, u3_noun); u3_noun u3qfp_rake(u3_noun); - u3_noun u3qf_la_add_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_add_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); # define u3qfu_van_fan 28 # define u3qfu_van_rib 58 diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index b6597ffa7f..e70c4073f5 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2152,8 +2152,10 @@ static u3j_core _139_hex_json_d[] = static u3j_core _139_sep_d[] = */ static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; +static u3j_harm _139_hex__lagoon_mmul_a[] = {{".2", u3wf_la_mmul}, {}}; static u3j_core _139_hex__la_core_d[] = - { { "add-rays", 7, _139_hex__lagoon_add_a, 0, no_hashes }, + { { "add-rays", 7, _139_hex__lagoon_add_a, 0, no_hashes }, + { "mmul", 7, _139_hex__lagoon_mmul_a, 0, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 5cc40f1975..23aaab9938 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -332,5 +332,6 @@ u3_noun u3wfu_rest(u3_noun); u3_noun u3wf_la_add(u3_noun); + u3_noun u3wf_la_mmul(u3_noun); #endif /* ifndef U3_JETS_W_H */ From 171fa5c121c24f5b2b23dfb0818c9f4da534a721 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Tue, 5 Mar 2024 16:24:07 -0600 Subject: [PATCH 08/41] Post ++sub jet. --- pkg/noun/jets/f/lagoon.c | 320 ++++++++++++++++++++++++++++----------- pkg/noun/jets/q.h | 1 + pkg/noun/jets/tree.c | 2 + pkg/noun/jets/w.h | 1 + 4 files changed, 238 insertions(+), 86 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 3622165141..07e0e5dd13 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -87,74 +87,150 @@ /* add - axpy = 1*x+y */ u3_noun - u3qf_la_add_real(u3_noun a_data, - u3_noun b_data, + u3qf_la_add_real(u3_noun x_data, + u3_noun y_data, u3_noun shape, u3_noun bloq) { // Unpack the data as a byte array. We assume total length < 2**64. uint64_t len_a = _get_length(shape); uint64_t siz_a = len_a * bloq; - uint8_t* a_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, a_bytes, a_data); - uint8_t* b_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, b_bytes, b_data); + uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, y_bytes, y_data); u3_noun r_data; // Switch on the block size. switch (bloq) { case 4: - haxpy(len_a, (float16_t){SB_REAL16_ONE}, (float16_t*)a_bytes, 1, (float16_t*)b_bytes, 1); + haxpy(len_a, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), b_bytes); + r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); // Clean up. - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); return r_data; case 5: - saxpy(len_a, (float32_t){SB_REAL32_ONE}, (float32_t*)a_bytes, 1, (float32_t*)b_bytes, 1); + saxpy(len_a, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), b_bytes); + r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); // Clean up. - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); return r_data; case 6: - daxpy(len_a, (float64_t){SB_REAL64_ONE}, (float64_t*)a_bytes, 1, (float64_t*)b_bytes, 1); + daxpy(len_a, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), b_bytes); + r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); // Clean up. - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); return r_data; case 7: - qaxpy(len_a, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)a_bytes, 1, (float128_t*)b_bytes, 1); + qaxpy(len_a, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), b_bytes); + r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); // Clean up. - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); return r_data; default: - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); + + return u3_none; + } + } + +/* sub - axpy = -1*y+x +*/ + u3_noun + u3qf_la_sub_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * bloq; + uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, y_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, y_bytes, x_data); + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: + haxpy(len_a, (float16_t){SB_REAL16_NEGONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 5: + saxpy(len_a, (float32_t){SB_REAL32_NEGONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 6: + daxpy(len_a, (float64_t){SB_REAL64_NEGONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 7: + qaxpy(len_a, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + default: + u3a_free(x_bytes); + u3a_free(y_bytes); return u3_none; } @@ -163,27 +239,27 @@ /* mmul */ u3_noun - u3qf_la_mmul_real(u3_noun a_data, - u3_noun b_data, - u3_noun a_shape, - u3_noun b_shape, + u3qf_la_mmul_real(u3_noun x_data, + u3_noun y_data, + u3_noun x_shape, + u3_noun y_shape, u3_noun bloq) { // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t M = u3h(a_shape); - uint64_t Na = u3h(u3t(a_shape)); - uint64_t Nb = u3h(b_shape); - uint64_t P = u3h(u3t(b_shape)); + uint64_t M = u3h(x_shape); + uint64_t Na = u3h(u3t(x_shape)); + uint64_t Nb = u3h(y_shape); + uint64_t P = u3h(u3t(y_shape)); - assert(u3_nul == u3t(u3t(a_shape))); + assert(u3_nul == u3t(u3t(x_shape))); assert(Na == Nb); uint64_t N = Na; - assert(u3_nul == u3t(u3t(b_shape))); + assert(u3_nul == u3t(u3t(y_shape))); - uint8_t* a_bytes = (uint8_t*)u3a_malloc((M*N)*sizeof(uint8_t)); - u3r_bytes(0, M*N, a_bytes, a_data); - uint8_t* b_bytes = (uint8_t*)u3a_malloc((N*P)*sizeof(uint8_t)); - u3r_bytes(0, N*P, b_bytes, b_data); + uint8_t* x_bytes = (uint8_t*)u3a_malloc((M*N)*sizeof(uint8_t)); + u3r_bytes(0, M*N, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc((N*P)*sizeof(uint8_t)); + u3r_bytes(0, N*P, y_bytes, y_data); uint8_t* c_bytes = (uint8_t*)u3a_malloc((M*P)*sizeof(uint8_t)); u3_noun r_data; @@ -191,60 +267,60 @@ // Switch on the block size. switch (bloq) { case 4: - hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)a_bytes, N, (float16_t*)b_bytes, N, (float16_t){SB_REAL16_ZERO}, (float16_t*)c_bytes, P); + hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, N, (float16_t){SB_REAL16_ZERO}, (float16_t*)c_bytes, P); // Unpack the result back into a noun. r_data = u3i_bytes(M*P, c_bytes); // Clean up. - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); u3a_free(c_bytes); return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); case 5: - sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)a_bytes, N, (float32_t*)b_bytes, N, (float32_t){SB_REAL32_ZERO}, (float32_t*)c_bytes, P); + sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, N, (float32_t){SB_REAL32_ZERO}, (float32_t*)c_bytes, P); // Unpack the result back into a noun. r_data = u3i_bytes(M*P, c_bytes); // Clean up. - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); u3a_free(c_bytes); return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); case 6: - dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)a_bytes, N, (float64_t*)b_bytes, N, (float64_t){SB_REAL64_ZERO}, (float64_t*)c_bytes, P); + dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, N, (float64_t){SB_REAL64_ZERO}, (float64_t*)c_bytes, P); // Unpack the result back into a noun. r_data = u3i_bytes(M*P, c_bytes); // Clean up. - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); u3a_free(c_bytes); return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); case 7: - qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)a_bytes, N, (float128_t*)b_bytes, N, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)c_bytes, P); + qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, N, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)c_bytes, P); // Unpack the result back into a noun. r_data = u3i_bytes(M*P, c_bytes); // Clean up. - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); u3a_free(c_bytes); return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); default: - u3a_free(a_bytes); - u3a_free(b_bytes); + u3a_free(x_bytes); + u3a_free(y_bytes); u3a_free(c_bytes); return u3_none; @@ -255,36 +331,108 @@ u3wf_la_add(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, a_data, - b_meta, b_data; + u3_noun a_meta, x_data, + b_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + c3n == u3r_mean(b_meta, + 2, &y_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, a_bloq); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + // case c3__int2: + // return u3qf_la_add_int2(x_data, y_data, x_shape, a_bloq); + + // case c3__uint: + // return u3qf_la_add_uint(x_data, y_data, x_shape, a_bloq); + + // case c3__cplx: + // _set_rounding(rnd); + // return u3qf_la_add_cplx(x_data, y_data, x_shape, a_bloq); + + // case c3__unum: + // return u3qf_la_add_unum(x_data, y_data, x_shape, a_bloq); + + // case c3__fixp: + // return u3qf_la_add_fixp(x_data, y_data, x_shape, a_bloq); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_sub(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, + b_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &a_meta, - u3x_sam_5, &a_data, + u3x_sam_5, &x_data, u3x_sam_6, &b_meta, - u3x_sam_7, &b_data, + u3x_sam_7, &y_data, 0) || - c3n == u3ud(a_data) || - c3n == u3ud(b_data) ) + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun a_shape, a_bloq, a_kind, a_fxp, - b_shape, b_bloq, b_kind, b_fxp, + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, rnd; if ( c3n == u3r_mean(a_meta, - 2, &a_shape, + 2, &x_shape, 6, &a_bloq, 14, &a_kind, 15, &a_fxp, 0) || c3n == u3r_mean(b_meta, - 2, &b_shape, + 2, &y_shape, 6, &b_bloq, 14, &b_kind, 15, &b_fxp, 0) || - c3n == u3r_sing(a_shape, b_shape) || + c3n == u3r_sing(x_shape, y_shape) || c3n == u3r_sing(a_bloq, b_bloq) || c3n == u3r_sing(a_kind, b_kind) || // fxp does not need to match so no check @@ -296,25 +444,25 @@ switch (a_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_add_real(a_data, b_data, a_shape, a_bloq); - return u3nc(u3nq(a_shape, a_bloq, a_kind, a_fxp), r_data); + u3_noun r_data = u3qf_la_sub_real(x_data, y_data, x_shape, a_bloq); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); break; // case c3__int2: - // return u3qf_la_add_int2(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_sub_int2(x_data, y_data, x_shape, a_bloq); // case c3__uint: - // return u3qf_la_add_uint(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_sub_uint(x_data, y_data, x_shape, a_bloq); // case c3__cplx: // _set_rounding(rnd); - // return u3qf_la_add_cplx(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_sub_cplx(x_data, y_data, x_shape, a_bloq); // case c3__unum: - // return u3qf_la_add_unum(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_sub_unum(x_data, y_data, x_shape, a_bloq); // case c3__fixp: - // return u3qf_la_add_fixp(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_sub_fixp(x_data, y_data, x_shape, a_bloq); default: return u3_none; @@ -327,31 +475,31 @@ u3wf_la_mmul(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, a_data, - b_meta, b_data; + u3_noun a_meta, x_data, + b_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &a_meta, - u3x_sam_5, &a_data, + u3x_sam_5, &x_data, u3x_sam_6, &b_meta, - u3x_sam_7, &b_data, + u3x_sam_7, &y_data, 0) || - c3n == u3ud(a_data) || - c3n == u3ud(b_data) ) + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun a_shape, a_bloq, a_kind, a_fxp, - b_shape, b_bloq, b_kind, b_fxp, + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, rnd; if ( c3n == u3r_mean(a_meta, - 2, &a_shape, + 2, &x_shape, 6, &a_bloq, 14, &a_kind, 15, &a_fxp, 0) || c3n == u3r_mean(b_meta, - 2, &b_shape, + 2, &y_shape, 6, &b_bloq, 14, &b_kind, 15, &b_fxp, @@ -367,23 +515,23 @@ switch (a_kind) { case c3__real: _set_rounding(rnd); - return u3qf_la_mmul_real(a_data, b_data, a_shape, b_shape, a_bloq); + return u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, a_bloq); break; // case c3__int2: - // return u3qf_la_add_int2(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_add_int2(x_data, y_data, x_shape, a_bloq); // case c3__uint: - // return u3qf_la_add_uint(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_add_uint(x_data, y_data, x_shape, a_bloq); // case c3__cplx: - // return u3qf_la_add_cplx(a_data, b_data, a_shape, a_bloq, rnd); + // return u3qf_la_add_cplx(x_data, y_data, x_shape, a_bloq, rnd); // case c3__unum: - // return u3qf_la_add_unum(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_add_unum(x_data, y_data, x_shape, a_bloq); // case c3__fixp: - // return u3qf_la_add_fixp(a_data, b_data, a_shape, a_bloq); + // return u3qf_la_add_fixp(x_data, y_data, x_shape, a_bloq); default: return u3_none; diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index ddeed9afcf..371cebb228 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -248,6 +248,7 @@ u3_noun u3qfp_rake(u3_noun); u3_noun u3qf_la_add_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_sub_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); # define u3qfu_van_fan 28 diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index e70c4073f5..035dad2305 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2152,9 +2152,11 @@ static u3j_core _139_hex_json_d[] = static u3j_core _139_sep_d[] = */ static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; +static u3j_harm _139_hex__lagoon_sub_a[] = {{".2", u3wf_la_sub}, {}}; static u3j_harm _139_hex__lagoon_mmul_a[] = {{".2", u3wf_la_mmul}, {}}; static u3j_core _139_hex__la_core_d[] = { { "add-rays", 7, _139_hex__lagoon_add_a, 0, no_hashes }, + { "sub-rays", 7, _139_hex__lagoon_sub_a, 0, no_hashes }, { "mmul", 7, _139_hex__lagoon_mmul_a, 0, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 23aaab9938..f7d4ef6a53 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -332,6 +332,7 @@ u3_noun u3wfu_rest(u3_noun); u3_noun u3wf_la_add(u3_noun); + u3_noun u3wf_la_sub(u3_noun); u3_noun u3wf_la_mmul(u3_noun); #endif /* ifndef U3_JETS_W_H */ From 315b4f5249326d78e53a8d4a435ee46aaf3c569c Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 27 Mar 2024 10:18:41 -0500 Subject: [PATCH 09/41] Post fixed scalar jets using ?scal. --- pkg/noun/jets/f/lagoon.c | 1257 +++++++++++++++++++++++++++++++------- pkg/noun/jets/q.h | 6 + pkg/noun/jets/tree.c | 12 + pkg/noun/jets/w.h | 6 + 4 files changed, 1051 insertions(+), 230 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 07e0e5dd13..ba55c5b4db 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -8,6 +8,7 @@ #include "softblas.h" #include +#include // for pow() #include union half { @@ -94,11 +95,11 @@ { // Unpack the data as a byte array. We assume total length < 2**64. uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * bloq; + uint64_t siz_a = len_a * pow(2, bloq - 3); uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); u3r_bytes(0, siz_a, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, y_bytes, y_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a+1, y_bytes, y_data); u3_noun r_data; @@ -108,7 +109,7 @@ haxpy(len_a, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); @@ -120,7 +121,7 @@ saxpy(len_a, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); @@ -132,7 +133,7 @@ daxpy(len_a, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); @@ -144,7 +145,7 @@ qaxpy(len_a, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); @@ -170,11 +171,11 @@ { // Unpack the data as a byte array. We assume total length < 2**64. uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * bloq; + uint64_t siz_a = len_a * pow(2, bloq - 3); uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, y_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, y_bytes, x_data); + u3r_bytes(0, siz_a, x_bytes, y_data); // XXX + uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a+1, y_bytes, x_data); // XXX u3_noun r_data; @@ -184,7 +185,7 @@ haxpy(len_a, (float16_t){SB_REAL16_NEGONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); @@ -196,7 +197,7 @@ saxpy(len_a, (float32_t){SB_REAL32_NEGONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); @@ -208,7 +209,7 @@ daxpy(len_a, (float64_t){SB_REAL64_NEGONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); @@ -220,7 +221,7 @@ qaxpy(len_a, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); // Unpack the result back into a noun. - r_data = u3i_bytes(siz_a*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); @@ -236,302 +237,1098 @@ } } -/* mmul +/* mul - x.*y + elementwise multiplication */ u3_noun - u3qf_la_mmul_real(u3_noun x_data, - u3_noun y_data, - u3_noun x_shape, - u3_noun y_shape, - u3_noun bloq) + u3qf_la_mul_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) { // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t M = u3h(x_shape); - uint64_t Na = u3h(u3t(x_shape)); - uint64_t Nb = u3h(y_shape); - uint64_t P = u3h(u3t(y_shape)); - - assert(u3_nul == u3t(u3t(x_shape))); - assert(Na == Nb); - uint64_t N = Na; - assert(u3_nul == u3t(u3t(y_shape))); - - uint8_t* x_bytes = (uint8_t*)u3a_malloc((M*N)*sizeof(uint8_t)); - u3r_bytes(0, M*N, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc((N*P)*sizeof(uint8_t)); - u3r_bytes(0, N*P, y_bytes, y_data); - uint8_t* c_bytes = (uint8_t*)u3a_malloc((M*P)*sizeof(uint8_t)); + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * pow(2, bloq - 3); + uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a+1, y_bytes, y_data); u3_noun r_data; // Switch on the block size. switch (bloq) { case 4: - hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, N, (float16_t){SB_REAL16_ZERO}, (float16_t*)c_bytes, P); + for (uint64_t i = 0; i < len_a; i++) { + ((float16_t*)y_bytes)[i] = f16_mul(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); + } // Unpack the result back into a noun. - r_data = u3i_bytes(M*P, c_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); u3a_free(y_bytes); - u3a_free(c_bytes); - return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + return r_data; case 5: - sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, N, (float32_t){SB_REAL32_ZERO}, (float32_t*)c_bytes, P); + for (uint64_t i = 0; i < len_a; i++) { + ((float32_t*)y_bytes)[i] = f32_mul(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]); + } // Unpack the result back into a noun. - r_data = u3i_bytes(M*P, c_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); u3a_free(y_bytes); - u3a_free(c_bytes); - return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + return r_data; case 6: - dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, N, (float64_t){SB_REAL64_ZERO}, (float64_t*)c_bytes, P); + for (uint64_t i = 0; i < len_a; i++) { + ((float64_t*)y_bytes)[i] = f64_mul(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]); + } // Unpack the result back into a noun. - r_data = u3i_bytes(M*P, c_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); u3a_free(y_bytes); - u3a_free(c_bytes); - return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + return r_data; case 7: - qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, N, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)c_bytes, P); + for (uint64_t i = 0; i < len_a; i++) { + f128M_mul(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); + } // Unpack the result back into a noun. - r_data = u3i_bytes(M*P, c_bytes); + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); // Clean up. u3a_free(x_bytes); u3a_free(y_bytes); - u3a_free(c_bytes); - return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + return r_data; default: u3a_free(x_bytes); u3a_free(y_bytes); - u3a_free(c_bytes); - + return u3_none; } } +/* div - x/y + elementwise division +*/ u3_noun - u3wf_la_add(u3_noun cor) + u3qf_la_div_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) { - // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; - - if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, - u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, - u3x_sam_7, &y_data, - 0) || - c3n == u3ud(x_data) || - c3n == u3ud(y_data) ) - { - return u3m_bail(c3__exit); - } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, - rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - c3n == u3r_mean(b_meta, - 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, - 0) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) - ) - { - return u3m_bail(c3__exit); - } else { - switch (a_kind) { - case c3__real: - _set_rounding(rnd); - u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, a_bloq); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * pow(2, bloq - 3); + uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a+1, y_bytes, y_data); - // case c3__int2: - // return u3qf_la_add_int2(x_data, y_data, x_shape, a_bloq); + u3_noun r_data; - // case c3__uint: - // return u3qf_la_add_uint(x_data, y_data, x_shape, a_bloq); + // Switch on the block size. + switch (bloq) { + case 4: + for (uint64_t i = 0; i < len_a; i++) { + ((float16_t*)y_bytes)[i] = f16_div(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); + } - // case c3__cplx: - // _set_rounding(rnd); - // return u3qf_la_add_cplx(x_data, y_data, x_shape, a_bloq); + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - // case c3__unum: - // return u3qf_la_add_unum(x_data, y_data, x_shape, a_bloq); + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); - // case c3__fixp: - // return u3qf_la_add_fixp(x_data, y_data, x_shape, a_bloq); + return r_data; - default: - return u3_none; + case 5: + for (uint64_t i = 0; i < len_a; i++) { + ((float32_t*)y_bytes)[i] = f32_div(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]); } - } - } - } - u3_noun - u3wf_la_sub(u3_noun cor) - { - // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, - u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, - u3x_sam_7, &y_data, - 0) || - c3n == u3ud(x_data) || - c3n == u3ud(y_data) ) - { - return u3m_bail(c3__exit); - } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, - rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - c3n == u3r_mean(b_meta, - 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, - 0) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) - ) - { - return u3m_bail(c3__exit); - } else { - switch (a_kind) { - case c3__real: - _set_rounding(rnd); - u3_noun r_data = u3qf_la_sub_real(x_data, y_data, x_shape, a_bloq); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); - // case c3__int2: - // return u3qf_la_sub_int2(x_data, y_data, x_shape, a_bloq); + return r_data; - // case c3__uint: - // return u3qf_la_sub_uint(x_data, y_data, x_shape, a_bloq); + case 6: + for (uint64_t i = 0; i < len_a; i++) { + ((float64_t*)y_bytes)[i] = f64_div(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]); + } - // case c3__cplx: - // _set_rounding(rnd); - // return u3qf_la_sub_cplx(x_data, y_data, x_shape, a_bloq); + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - // case c3__unum: - // return u3qf_la_sub_unum(x_data, y_data, x_shape, a_bloq); + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); - // case c3__fixp: - // return u3qf_la_sub_fixp(x_data, y_data, x_shape, a_bloq); + return r_data; - default: - return u3_none; + case 7: + for (uint64_t i = 0; i < len_a; i++) { + f128M_div(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); } - } + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + default: + u3a_free(x_bytes); + u3a_free(y_bytes); + + return u3_none; } } +/* adds - axpy = 1*x+n +*/ u3_noun - u3wf_la_mmul(u3_noun cor) + u3qf_la_adds_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq, + u3_noun n) { - // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * pow(2, bloq - 3); + uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, - u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, - u3x_sam_7, &y_data, - 0) || - c3n == u3ud(x_data) || - c3n == u3ud(y_data) ) - { - return u3m_bail(c3__exit); - } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, - rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - c3n == u3r_mean(b_meta, - 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, - 0) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) - ) - { - return u3m_bail(c3__exit); - } else { - switch (a_kind) { - case c3__real: - _set_rounding(rnd); - return u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, a_bloq); - break; + float16_t n16; + float32_t n32; + float64_t n64; + float128_t n128; - // case c3__int2: - // return u3qf_la_add_int2(x_data, y_data, x_shape, a_bloq); + u3_noun r_data; - // case c3__uint: - // return u3qf_la_add_uint(x_data, y_data, x_shape, a_bloq); + // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. + switch (bloq) { + case 4: + u3r_bytes(0, 2, (uint8_t*)&n16, n); + // set y to [n] + for (uint64_t i = 0; i < len_a; i++) { + ((float16_t*)y_bytes)[i] = n16; + } + y_bytes[siz_a] = 1; // pin head + haxpy(len_a, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); - // case c3__cplx: - // return u3qf_la_add_cplx(x_data, y_data, x_shape, a_bloq, rnd); + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 5: + u3r_bytes(0, 4, (uint8_t*)&n32, n); + // set y to [n] + for (uint64_t i = 0; i < len_a; i++) { + ((float32_t*)y_bytes)[i] = n32; + } + y_bytes[siz_a] = 1; // pin head + saxpy(len_a, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 6: + u3r_bytes(0, 8, (uint8_t*)&n64, n); + // set y to [n] + for (uint64_t i = 0; i < len_a; i++) { + ((float64_t*)y_bytes)[i] = n64; + } + y_bytes[siz_a] = 1; // pin head + daxpy(len_a, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 7: + u3r_bytes(0, 16, (uint8_t*)&n128, n); + // set y to [n] + for (uint64_t i = 0; i < len_a; i++) { + ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; + } + y_bytes[siz_a] = 1; // pin head + qaxpy(len_a, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + default: + u3a_free(x_bytes); + u3a_free(y_bytes); + + return u3_none; + } + } + +/* subs - axpy = -1*n+x +*/ + u3_noun + u3qf_la_subs_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq, + u3_noun n) + { + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * pow(2, bloq - 3); + uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a+1, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + + float16_t n16; + float32_t n32; + float64_t n64; + float128_t n128; + + u3_noun r_data; + + // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. + switch (bloq) { + case 4: + u3r_bytes(0, 2, (uint8_t*)&n16, n); + // set y to [n] + for (uint64_t i = 0; i < len_a; i++) { + ((float16_t*)y_bytes)[i] = n16; + } + haxpy(len_a, (float16_t){SB_REAL16_NEGONE}, (float16_t*)y_bytes, 1, (float16_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 5: + u3r_bytes(0, 4, (uint8_t*)&n32, n); + // set y to [n] + for (uint64_t i = 0; i < len_a; i++) { + ((float32_t*)y_bytes)[i] = n32; + } + saxpy(len_a, (float32_t){SB_REAL32_NEGONE}, (float32_t*)y_bytes, 1, (float32_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 6: + u3r_bytes(0, 8, (uint8_t*)&n64, n); + // set y to [n] + for (uint64_t i = 0; i < len_a; i++) { + ((float64_t*)y_bytes)[i] = n64; + } + daxpy(len_a, (float64_t){SB_REAL64_NEGONE}, (float64_t*)y_bytes, 1, (float64_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 7: + u3r_bytes(0, 16, (uint8_t*)&n128, n); + // set y to [n] + for (uint64_t i = 0; i < len_a; i++) { + ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; + } + qaxpy(len_a, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)y_bytes, 1, (float128_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + default: + u3a_free(x_bytes); + u3a_free(y_bytes); + + return u3_none; + } + } + +/* muls - x.*[n] + elementwise multiplication +*/ + u3_noun + u3qf_la_muls_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq, + u3_noun n) + { + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * pow(2, bloq - 3); + uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, x_data); + x_bytes[siz_a] = 1; // pin head + + float16_t n16; + float32_t n32; + float64_t n64; + float128_t n128; + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: + u3r_bytes(0, 2, (uint8_t*)&n16, n); + hscal(len_a, n16, (float16_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + + return r_data; + + case 5: + u3r_bytes(0, 4, (uint8_t*)&n32, n); + sscal(len_a, n32, (float32_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + + return r_data; + + case 6: + u3r_bytes(0, 8, (uint8_t*)&n64, n); + dscal(len_a, n64, (float64_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + + return r_data; + + case 7: + u3r_bytes(0, 16, (uint8_t*)&(n128.v[0]), n); + qscal(len_a, n128, (float128_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + + return r_data; + + default: + u3a_free(x_bytes); + + return u3_none; + } + } + +/* divs - x/[n] + elementwise multiplication +*/ + u3_noun + u3qf_la_divs_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq, + u3_noun n) + { + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * pow(2, bloq - 3); + uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, x_data); + x_bytes[siz_a] = 1; // pin head + + float16_t n16; + float32_t n32; + float64_t n64; + float128_t n128; + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: + u3r_bytes(0, 2, (uint8_t*)&n16, n); + n16 = f16_div((float16_t){SB_REAL16_ONE}, n16); + hscal(len_a, n16, (float16_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + + return r_data; + + case 5: + u3r_bytes(0, 4, (uint8_t*)&n32, n); + n32 = f32_div((float32_t){SB_REAL32_ONE}, n32); + sscal(len_a, n32, (float32_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + + return r_data; + + case 6: + u3r_bytes(0, 8, (uint8_t*)&n64, n); + n64 = f64_div((float64_t){SB_REAL64_ONE}, n64); + dscal(len_a, n64, (float64_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + + return r_data; + + case 7: + // u3r_bytes(0, 16, (uint8_t*)&(n128.v[0]), n); + u3l_log("divs: n", n); + u3r_bytes(0, 16, (uint8_t*)&n128, n); + fprintf(stderr, "n128: %lx %lx\r\n", n128.v[0], n128.v[1]); + f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}), &n128, &n128); + fprintf(stderr, "one: %lx %lx\r\n", SB_REAL128L_ONE, SB_REAL128U_ONE); + fprintf(stderr, "n128: %lx %lx\r\n", n128.v[0], n128.v[1]); + qscal(len_a, n128, (float128_t*)x_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); + + // Clean up. + u3a_free(x_bytes); + + return r_data; + + default: + u3a_free(x_bytes); + + return u3_none; + } + } + +/* mmul +*/ + u3_noun + u3qf_la_mmul_real(u3_noun x_data, + u3_noun y_data, + u3_noun x_shape, + u3_noun y_shape, + u3_noun bloq) + { + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t M = u3h(x_shape); + uint64_t Na = u3h(u3t(x_shape)); + uint64_t Nb = u3h(y_shape); + uint64_t P = u3h(u3t(y_shape)); + + assert(u3_nul == u3t(u3t(x_shape))); + assert(Na == Nb); + uint64_t N = Na; + assert(u3_nul == u3t(u3t(y_shape))); + + uint8_t* x_bytes = (uint8_t*)u3a_malloc((M*N)*sizeof(uint8_t)); + u3r_bytes(0, M*N, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc((N*P)*sizeof(uint8_t)); + u3r_bytes(0, N*P, y_bytes, y_data); + uint8_t* c_bytes = (uint8_t*)u3a_malloc((M*P)*sizeof(uint8_t)); + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: + hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, N, (float16_t){SB_REAL16_ZERO}, (float16_t*)c_bytes, P); + + // Unpack the result back into a noun. + r_data = u3i_bytes(M*P, c_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(c_bytes); + + return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + + case 5: + sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, N, (float32_t){SB_REAL32_ZERO}, (float32_t*)c_bytes, P); + + // Unpack the result back into a noun. + r_data = u3i_bytes(M*P, c_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(c_bytes); + + return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + + case 6: + dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, N, (float64_t){SB_REAL64_ZERO}, (float64_t*)c_bytes, P); + + // Unpack the result back into a noun. + r_data = u3i_bytes(M*P, c_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(c_bytes); + + return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + + case 7: + qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, N, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)c_bytes, P); + + // Unpack the result back into a noun. + r_data = u3i_bytes(M*P, c_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(c_bytes); + + return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + + default: + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(c_bytes); + + return u3_none; + } + } + + u3_noun + u3wf_la_add(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, + b_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + c3n == u3r_mean(b_meta, + 2, &y_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, a_bloq); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_sub(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, + b_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + c3n == u3r_mean(b_meta, + 2, &y_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_sub_real(x_data, y_data, x_shape, a_bloq); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_mul(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, + b_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + c3n == u3r_mean(b_meta, + 2, &y_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_mul_real(x_data, y_data, x_shape, a_bloq); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_div(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, + b_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + c3n == u3r_mean(b_meta, + 2, &y_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_div_real(x_data, y_data, x_shape, a_bloq); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_adds(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + // shape does not matter so no check + // bloq does not matter so no check + // kind does not matter so no check + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_adds_real(x_data, x_shape, a_bloq, n); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_subs(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + // shape does not matter so no check + // bloq does not matter so no check + // kind does not matter so no check + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_subs_real(x_data, x_shape, a_bloq, n); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_muls(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + // shape does not matter so no check + // bloq does not matter so no check + // kind does not matter so no check + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_muls_real(x_data, x_shape, a_bloq, n); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_divs(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + // shape does not matter so no check + // bloq does not matter so no check + // kind does not matter so no check + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_divs_real(x_data, x_shape, a_bloq, n); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } - // case c3__unum: - // return u3qf_la_add_unum(x_data, y_data, x_shape, a_bloq); + u3_noun + u3wf_la_mmul(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, + b_meta, y_data; - // case c3__fixp: - // return u3qf_la_add_fixp(x_data, y_data, x_shape, a_bloq); + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + c3n == u3r_mean(b_meta, + 2, &y_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + // fxp does not need to match so no check + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + return u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, a_bloq); + break; default: return u3_none; diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 371cebb228..807f7bdcff 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -249,6 +249,12 @@ u3_noun u3qf_la_add_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_sub_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_mul_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_div_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_adds_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_subs_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_muls_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_divs_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); # define u3qfu_van_fan 28 diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 035dad2305..cf82124dac 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2153,10 +2153,22 @@ static u3j_core _139_sep_d[] = */ static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; static u3j_harm _139_hex__lagoon_sub_a[] = {{".2", u3wf_la_sub}, {}}; +static u3j_harm _139_hex__lagoon_mul_a[] = {{".2", u3wf_la_mul}, {}}; +static u3j_harm _139_hex__lagoon_div_a[] = {{".2", u3wf_la_div}, {}}; +static u3j_harm _139_hex__lagoon_adds_a[] = {{".2", u3wf_la_adds}, {}}; +static u3j_harm _139_hex__lagoon_subs_a[] = {{".2", u3wf_la_subs}, {}}; +static u3j_harm _139_hex__lagoon_muls_a[] = {{".2", u3wf_la_muls}, {}}; +static u3j_harm _139_hex__lagoon_divs_a[] = {{".2", u3wf_la_divs}, {}}; static u3j_harm _139_hex__lagoon_mmul_a[] = {{".2", u3wf_la_mmul}, {}}; static u3j_core _139_hex__la_core_d[] = { { "add-rays", 7, _139_hex__lagoon_add_a, 0, no_hashes }, { "sub-rays", 7, _139_hex__lagoon_sub_a, 0, no_hashes }, + { "mul-rays", 7, _139_hex__lagoon_mul_a, 0, no_hashes }, + { "div-rays", 7, _139_hex__lagoon_div_a, 0, no_hashes }, + { "add-scal", 7, _139_hex__lagoon_adds_a, 0, no_hashes }, + { "sub-scal", 7, _139_hex__lagoon_subs_a, 0, no_hashes }, + { "mul-scal", 7, _139_hex__lagoon_muls_a, 0, no_hashes }, + { "div-scal", 7, _139_hex__lagoon_divs_a, 0, no_hashes }, { "mmul", 7, _139_hex__lagoon_mmul_a, 0, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index f7d4ef6a53..b833d2ca49 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -333,6 +333,12 @@ u3_noun u3wf_la_add(u3_noun); u3_noun u3wf_la_sub(u3_noun); + u3_noun u3wf_la_mul(u3_noun); + u3_noun u3wf_la_div(u3_noun); + u3_noun u3wf_la_adds(u3_noun); + u3_noun u3wf_la_subs(u3_noun); + u3_noun u3wf_la_muls(u3_noun); + u3_noun u3wf_la_divs(u3_noun); u3_noun u3wf_la_mmul(u3_noun); #endif /* ifndef U3_JETS_W_H */ From 0a42728a19bbb866f373cf4b93f8ad96dd2415bf Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 27 Mar 2024 12:10:48 -0500 Subject: [PATCH 10/41] Add trace/diag/dot --- pkg/noun/jets/f/lagoon.c | 299 +++++++++++++++++++++++++++++++++++++-- pkg/noun/jets/q.h | 3 + pkg/noun/jets/tree.c | 14 +- pkg/noun/jets/w.h | 3 + 4 files changed, 303 insertions(+), 16 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index ba55c5b4db..b6db643273 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -85,6 +85,23 @@ return len; } + static inline uint64_t* _get_dims(u3_noun shape) + { + uint64_t len = u3kb_lent(shape); + uint64_t* dims = (uint64_t*)u3a_malloc(len*sizeof(uint64_t)); + for (uint64_t i = 0; i < len; i++) { + dims[i] = u3h(shape); + shape = u3t(shape); + } + return dims; + } + + static inline size_t _get_array_length(uint64_t* array) + { + size_t n = sizeof(array)/sizeof(array[0]); + return n; + } + /* add - axpy = 1*x+y */ u3_noun @@ -611,7 +628,7 @@ } } -/* muls - x.*[n] +/* muls - ?scal n * x elementwise multiplication */ u3_noun @@ -691,8 +708,8 @@ } } -/* divs - x/[n] - elementwise multiplication +/* divs - ?scal 1/n * x + elementwise division */ u3_noun u3qf_la_divs_real(u3_noun x_data, @@ -717,7 +734,7 @@ // Switch on the block size. switch (bloq) { case 4: - u3r_bytes(0, 2, (uint8_t*)&n16, n); + u3r_bytes(0, 2, (uint8_t*)&(n16.v), n); n16 = f16_div((float16_t){SB_REAL16_ONE}, n16); hscal(len_a, n16, (float16_t*)x_bytes, 1); @@ -730,7 +747,7 @@ return r_data; case 5: - u3r_bytes(0, 4, (uint8_t*)&n32, n); + u3r_bytes(0, 4, (uint8_t*)&(n32.v), n); n32 = f32_div((float32_t){SB_REAL32_ONE}, n32); sscal(len_a, n32, (float32_t*)x_bytes, 1); @@ -743,7 +760,7 @@ return r_data; case 6: - u3r_bytes(0, 8, (uint8_t*)&n64, n); + u3r_bytes(0, 8, (uint8_t*)&(n64.v), n); n64 = f64_div((float64_t){SB_REAL64_ONE}, n64); dscal(len_a, n64, (float64_t*)x_bytes, 1); @@ -756,13 +773,8 @@ return r_data; case 7: - // u3r_bytes(0, 16, (uint8_t*)&(n128.v[0]), n); - u3l_log("divs: n", n); - u3r_bytes(0, 16, (uint8_t*)&n128, n); - fprintf(stderr, "n128: %lx %lx\r\n", n128.v[0], n128.v[1]); + u3r_bytes(0, 16, (uint8_t*)&(n128.v[0]), n); f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}), &n128, &n128); - fprintf(stderr, "one: %lx %lx\r\n", SB_REAL128L_ONE, SB_REAL128U_ONE); - fprintf(stderr, "n128: %lx %lx\r\n", n128.v[0], n128.v[1]); qscal(len_a, n128, (float128_t*)x_bytes, 1); // Unpack the result back into a noun. @@ -780,6 +792,130 @@ } } +/* dot - ?dot = x · y +*/ + u3_noun + u3qf_la_dot_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * pow(2, bloq - 3); + uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a+1, y_bytes, y_data); + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: + hdot(len_a, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 5: + sdot(len_a, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 6: + ddot(len_a, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + case 7: + qdot(len_a, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + + // Clean up. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + + default: + u3a_free(x_bytes); + u3a_free(y_bytes); + + return u3_none; + } + } + +/* diag - diag(x) +*/ + u3_noun + u3qf_la_diag(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Unpack shape into an array of dimensions. + uint64_t* dims = _get_dims(shape); + // Assert length of dims is 2. + assert(dims[0] == dims[1]); + assert(_get_array_length(dims) == 2); + + // Unpack the data as a byte array. We assume total length < 2**64. + uint64_t len_a = _get_length(shape); + uint64_t siz_a = len_a * pow(2, bloq - 3); + uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a, x_bytes, x_data); + uint8_t* y_bytes = (uint8_t*)u3a_malloc((dims[0]*dims[1]+1)*sizeof(uint8_t)); + + u3_noun r_data; + + for (uint64_t i = 0; i < dims[0]; i++) { + y_bytes[i] = x_bytes[i*dims[0] + i]; + } + y_bytes[dims[0]*dims[1]] = 1; // pin head + + // Unpack the result back into a noun. + r_data = u3i_bytes((dims[0]*dims[1]+1)*sizeof(uint8_t), y_bytes); + + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(dims); + + return r_data; + } + +/* trace - tr(x) +*/ + u3_noun + u3qf_la_trace_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + u3_noun diag_data = u3qf_la_diag(x_data, shape, bloq); + return u3qf_la_dot_real(diag_data, diag_data, shape, bloq); + } + /* mmul */ u3_noun @@ -1283,6 +1419,145 @@ } } + u3_noun + u3wf_la_dot(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data, + b_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &b_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + y_shape, b_bloq, b_kind, b_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) || + c3n == u3r_mean(b_meta, + 2, &y_shape, + 6, &b_bloq, + 14, &b_kind, + 15, &b_fxp, + 0) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(a_bloq, b_bloq) || + c3n == u3r_sing(a_kind, b_kind) || + c3n == u3r_sing(a_fxp, b_fxp) || + c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_dot_real(x_data, y_data, x_shape, a_bloq); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_diag(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) + // c3n == u3r_sing(x_shape, y_shape) || + // c3n == u3r_sing(a_bloq, b_bloq) || + // c3n == u3r_sing(a_kind, b_kind) || + // c3n == u3r_sing(a_fxp, b_fxp) || + // c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + u3_noun r_data = u3qf_la_diag(x_data, x_shape, a_bloq); + return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + } + } + } + + u3_noun + u3wf_la_trace(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun a_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &a_meta, + u3x_sam_5, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, a_bloq, a_kind, a_fxp, + rnd; + if ( c3n == u3r_mean(a_meta, + 2, &x_shape, + 6, &a_bloq, + 14, &a_kind, + 15, &a_fxp, + 0) + // c3n == u3r_sing(x_shape, y_shape) || + // c3n == u3r_sing(a_bloq, b_bloq) || + // c3n == u3r_sing(a_kind, b_kind) || + // c3n == u3r_sing(a_fxp, b_fxp) || + // c3n == u3r_mean(cor, 30, &rnd, 0) + ) + { + return u3m_bail(c3__exit); + } else { + switch (a_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_trace_real(x_data, x_shape, a_bloq); + uint64_t len_x0 = _get_dims(x_shape)[0]; + return u3nc(u3nq(len_x0, a_bloq, a_kind, a_fxp), r_data); + break; + + default: + return u3_none; + } + } + } + } + u3_noun u3wf_la_mmul(u3_noun cor) { diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 807f7bdcff..db84af39d1 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -255,6 +255,9 @@ u3_noun u3qf_la_subs_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_muls_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_divs_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_dot_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_trace_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); # define u3qfu_van_fan 28 diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index cf82124dac..084ba01e98 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2151,14 +2151,17 @@ static u3j_core _139_hex_json_d[] = XX move to outer _sep_ core for /lib? eventually static u3j_core _139_sep_d[] = */ -static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; -static u3j_harm _139_hex__lagoon_sub_a[] = {{".2", u3wf_la_sub}, {}}; -static u3j_harm _139_hex__lagoon_mul_a[] = {{".2", u3wf_la_mul}, {}}; -static u3j_harm _139_hex__lagoon_div_a[] = {{".2", u3wf_la_div}, {}}; +static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; +static u3j_harm _139_hex__lagoon_sub_a[] = {{".2", u3wf_la_sub}, {}}; +static u3j_harm _139_hex__lagoon_mul_a[] = {{".2", u3wf_la_mul}, {}}; +static u3j_harm _139_hex__lagoon_div_a[] = {{".2", u3wf_la_div}, {}}; static u3j_harm _139_hex__lagoon_adds_a[] = {{".2", u3wf_la_adds}, {}}; static u3j_harm _139_hex__lagoon_subs_a[] = {{".2", u3wf_la_subs}, {}}; static u3j_harm _139_hex__lagoon_muls_a[] = {{".2", u3wf_la_muls}, {}}; static u3j_harm _139_hex__lagoon_divs_a[] = {{".2", u3wf_la_divs}, {}}; +static u3j_harm _139_hex__lagoon_dot_a[] = {{".2", u3wf_la_dot}, {}}; +static u3j_harm _139_hex__lagoon_diag_a[] = {{".2", u3wf_la_diag}, {}}; +static u3j_harm _139_hex__lagoon_trace_a[]= {{".2", u3wf_la_trace}, {}}; static u3j_harm _139_hex__lagoon_mmul_a[] = {{".2", u3wf_la_mmul}, {}}; static u3j_core _139_hex__la_core_d[] = { { "add-rays", 7, _139_hex__lagoon_add_a, 0, no_hashes }, @@ -2169,6 +2172,9 @@ static u3j_core _139_hex__la_core_d[] = { "sub-scal", 7, _139_hex__lagoon_subs_a, 0, no_hashes }, { "mul-scal", 7, _139_hex__lagoon_muls_a, 0, no_hashes }, { "div-scal", 7, _139_hex__lagoon_divs_a, 0, no_hashes }, + { "dot", 7, _139_hex__lagoon_dot_a, 0, no_hashes }, + { "diag", 7, _139_hex__lagoon_diag_a, 0, no_hashes }, + { "trace", 7, _139_hex__lagoon_trace_a,0, no_hashes }, { "mmul", 7, _139_hex__lagoon_mmul_a, 0, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index b833d2ca49..2bd87e6551 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -339,6 +339,9 @@ u3_noun u3wf_la_subs(u3_noun); u3_noun u3wf_la_muls(u3_noun); u3_noun u3wf_la_divs(u3_noun); + u3_noun u3wf_la_dot(u3_noun); + u3_noun u3wf_la_diag(u3_noun); + u3_noun u3wf_la_trace(u3_noun); u3_noun u3wf_la_mmul(u3_noun); #endif /* ifndef U3_JETS_W_H */ From ebe6dd381397b909cf29f28954dda1d11ac7a737 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 27 Mar 2024 20:36:30 -0500 Subject: [PATCH 11/41] WIP shape+diag error --- pkg/noun/jets/f/lagoon.c | 72 +++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index b6db643273..baaac63331 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -73,32 +73,42 @@ } } -/* shape +/* length of shape = x * y * z * w * ... */ - static inline uint64_t _get_length(u3_noun shape) + static inline c3_d _get_length(u3_noun shape) { - uint64_t len = 1; + c3_d len = 1; while (u3_nul != shape) { - len = len * u3h(shape); + len = len * u3x_atom(u3h(shape)); shape = u3t(shape); } return len; } - static inline uint64_t* _get_dims(u3_noun shape) +/* get dims from shape as array [x y z w ...] +*/ + static inline c3_d* _get_dims(u3_noun shape) { - uint64_t len = u3kb_lent(shape); - uint64_t* dims = (uint64_t*)u3a_malloc(len*sizeof(uint64_t)); - for (uint64_t i = 0; i < len; i++) { - dims[i] = u3h(shape); + u3_atom len = u3qb_lent(shape); + c3_d len_d = u3r_chub(0, len); + c3_d* dims = (c3_d*)u3a_malloc(len_d*sizeof(c3_d)); + for (uint64_t i = 0; i < len_d; i++) { + dims[i] = u3r_chub(0, u3x_atom(u3h(shape))); shape = u3t(shape); } + u3z(len); return dims; } +/* +*/ static inline size_t _get_array_length(uint64_t* array) { size_t n = sizeof(array)/sizeof(array[0]); + for (size_t i = 0; i < n; i++) { + fprintf(stderr, "%x ", array[i]); + } + fprintf(stderr, " => %x \n", n); return n; } @@ -434,7 +444,9 @@ { // Unpack the data as a byte array. We assume total length < 2**64. uint64_t len_a = _get_length(shape); + fprintf(stderr, "len_a: %d 0x%x units\r\n", len_a, len_a); uint64_t siz_a = len_a * pow(2, bloq - 3); + fprintf(stderr, "siz_a: %d 0x%x bytes\r\n", siz_a, siz_a); uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); u3r_bytes(0, siz_a, x_bytes, x_data); uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); @@ -875,28 +887,33 @@ u3_noun shape, u3_noun bloq) { - // Unpack shape into an array of dimensions. - uint64_t* dims = _get_dims(shape); // Assert length of dims is 2. + assert(u3qb_lent(shape) == 2); + // Unpack shape into an array of dimensions. + uint64_t *dims = _get_dims(shape); assert(dims[0] == dims[1]); - assert(_get_array_length(dims) == 2); // Unpack the data as a byte array. We assume total length < 2**64. uint64_t len_a = _get_length(shape); uint64_t siz_a = len_a * pow(2, bloq - 3); + uint64_t stride = dims[0] * pow(2, bloq - 3); uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc((dims[0]*dims[1]+1)*sizeof(uint8_t)); + u3r_bytes(0, siz_a+1, x_bytes, x_data); + uint64_t siz_b = stride * dims[1]; + uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_b+1)*sizeof(uint8_t)); u3_noun r_data; - for (uint64_t i = 0; i < dims[0]; i++) { - y_bytes[i] = x_bytes[i*dims[0] + i]; + for (uint64_t i = 0; i < dims[1]; i++) { + for (uint64_t j = 0; j < stride; j++) { + fprintf(stderr, "i*s+j = %d*%d+%d = %d // x_bytes[i]: %lx\r\n", i, stride, j, i*stride+j, x_bytes[i*stride+j + i]); + y_bytes[i*stride+j] = x_bytes[i*stride+j + i]; + } } - y_bytes[dims[0]*dims[1]] = 1; // pin head + y_bytes[siz_b] = 1; // pin head // Unpack the result back into a noun. - r_data = u3i_bytes((dims[0]*dims[1]+1)*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_b+1)*sizeof(uint8_t), y_bytes); u3a_free(x_bytes); u3a_free(y_bytes); @@ -913,7 +930,8 @@ u3_noun bloq) { u3_noun diag_data = u3qf_la_diag(x_data, shape, bloq); - return u3qf_la_dot_real(diag_data, diag_data, shape, bloq); + uint64_t len_x0 = _get_dims(shape)[0]; + return u3qf_la_dot_real(diag_data, diag_data, u3nt(len_x0, 0x1, u3_nul), bloq); } /* mmul @@ -1053,7 +1071,7 @@ case c3__real: _set_rounding(rnd); u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, a_bloq); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + return u3nc(u3nq(y_shape, a_bloq, a_kind, a_fxp), r_data); break; default: @@ -1482,8 +1500,8 @@ u3_noun a_meta, x_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, - u3x_sam_5, &x_data, + u3x_sam_2, &a_meta, + u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { @@ -1507,7 +1525,8 @@ return u3m_bail(c3__exit); } else { u3_noun r_data = u3qf_la_diag(x_data, x_shape, a_bloq); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); + uint64_t len_x0 = _get_dims(x_shape)[0]; + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), a_bloq, a_kind, a_fxp), r_data); } } } @@ -1519,8 +1538,8 @@ u3_noun a_meta, x_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, - u3x_sam_5, &x_data, + u3x_sam_2, &a_meta, + u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { @@ -1547,8 +1566,7 @@ case c3__real: _set_rounding(rnd); u3_noun r_data = u3qf_la_trace_real(x_data, x_shape, a_bloq); - uint64_t len_x0 = _get_dims(x_shape)[0]; - return u3nc(u3nq(len_x0, a_bloq, a_kind, a_fxp), r_data); + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), a_bloq, a_kind, a_fxp), r_data); break; default: From b59f33f9870373dce53c38628943faa219e6aa90 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Fri, 12 Apr 2024 17:10:08 -0500 Subject: [PATCH 12/41] Bump SoftBLAS to fix memory error and add progress on jets. --- WORKSPACE.bazel | 2 +- pkg/noun/jets/f/lagoon.c | 1557 ++++++++++++++++++-------------------- pkg/noun/jets/q.h | 1 + pkg/noun/jets/tree.c | 2 + pkg/noun/jets/w.h | 1 + 5 files changed, 761 insertions(+), 802 deletions(-) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index 3b0296fd0f..de3564b253 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -297,7 +297,7 @@ versioned_http_archive( strip_prefix = "SoftBLAS-{version}", # sha256 = "", url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", - version = "bace30db3944c0f2bb2b6cac0db9965675ad842e", + version = "3af44d8cbf0d61e31946af9127099257160d0451", ) versioned_http_archive( diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index baaac63331..84c58bd191 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -38,8 +38,7 @@ // We could use SoftBLAS set_rounding() to set the SoftFloat // mode as well, but it's more explicit to do it here since // we may use SoftFloat in any given Lagoon jet and we want - // you, dear developer, to see this set here. - fprintf(stderr, "%x %c\n", a, a); + // you, dear developer, to see it set here. switch ( a ) { default: @@ -92,7 +91,7 @@ u3_atom len = u3qb_lent(shape); c3_d len_d = u3r_chub(0, len); c3_d* dims = (c3_d*)u3a_malloc(len_d*sizeof(c3_d)); - for (uint64_t i = 0; i < len_d; i++) { + for (c3_d i = 0; i < len_d; i++) { dims[i] = u3r_chub(0, u3x_atom(u3h(shape))); shape = u3t(shape); } @@ -102,7 +101,7 @@ /* */ - static inline size_t _get_array_length(uint64_t* array) + static inline size_t _get_array_length(c3_d* array) { size_t n = sizeof(array)/sizeof(array[0]); for (size_t i = 0; i < n; i++) { @@ -118,74 +117,56 @@ u3qf_la_add_real(u3_noun x_data, u3_noun y_data, u3_noun shape, - u3_noun bloq) + u3_noun bloq + ) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a+1, y_bytes, y_data); + // len_x is length in base units + c3_d len_x = _get_length(shape); - u3_noun r_data; + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, y_bytes, y_data); + // Switch on the block size. switch (bloq) { case 4: - haxpy(len_a, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + break; case 5: - saxpy(len_a, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + saxpy(len_x, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + break; case 6: - daxpy(len_a, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + daxpy(len_x, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + break; case 7: - qaxpy(len_a, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); + qaxpy(len_x, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + break; + } - return r_data; + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); - default: - u3a_free(x_bytes); - u3a_free(y_bytes); + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); - return u3_none; - } + return r_data; } /* sub - axpy = -1*y+x @@ -194,76 +175,59 @@ u3qf_la_sub_real(u3_noun x_data, u3_noun y_data, u3_noun shape, - u3_noun bloq) + u3_noun bloq + ) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, y_data); // XXX - uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a+1, y_bytes, x_data); // XXX + // len_x is length in base units + c3_d len_x = _get_length(shape); - u3_noun r_data; + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, y_bytes, y_data); + // Switch on the block size. switch (bloq) { case 4: - haxpy(len_a, (float16_t){SB_REAL16_NEGONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + break; case 5: - saxpy(len_a, (float32_t){SB_REAL32_NEGONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + saxpy(len_x, (float32_t){SB_REAL32_NEGONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + break; case 6: - daxpy(len_a, (float64_t){SB_REAL64_NEGONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + daxpy(len_x, (float64_t){SB_REAL64_NEGONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + break; case 7: - qaxpy(len_a, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); + qaxpy(len_x, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + break; + } - return r_data; + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); - default: - u3a_free(x_bytes); - u3a_free(y_bytes); + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); - return u3_none; - } + return r_data; } + /* mul - x.*y elementwise multiplication */ @@ -273,80 +237,61 @@ u3_noun shape, u3_noun bloq) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a+1, y_bytes, y_data); + // len_x is length in base units + c3_d len_x = _get_length(shape); - u3_noun r_data; + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, y_bytes, y_data); // Switch on the block size. switch (bloq) { case 4: - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = f16_mul(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); } - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + break; case 5: - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = f32_mul(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]); } - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + break; case 6: - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = f64_mul(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]); } - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + break; case 7: - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { f128M_mul(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); } + break; + } - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); - default: - u3a_free(x_bytes); - u3a_free(y_bytes); + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); - return u3_none; - } + return r_data; } /* div - x/y @@ -358,286 +303,347 @@ u3_noun shape, u3_noun bloq) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a+1, y_bytes, y_data); + // len_x is length in base units + c3_d len_x = _get_length(shape); - u3_noun r_data; + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, y_bytes, y_data); // Switch on the block size. switch (bloq) { case 4: - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = f16_div(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); } - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + break; case 5: - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = f32_div(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]); } - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + break; case 6: - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = f64_div(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]); } - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + break; case 7: - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { f128M_div(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); } + break; + } - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); - return r_data; + return r_data; + } - default: - u3a_free(x_bytes); - u3a_free(y_bytes); +/* mod - x % y = x - r*floor(x/r) + remainder after division +*/ + u3_noun + u3qf_la_mod_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq, + u3_noun rnd) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } - return u3_none; + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (bloq) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + // Perform division x/n + float16_t div_result16 = f16_div(x_val16, y_val16); + // Compute floor of the division result + int64_t floor_result16 = f16_to_i64(div_result16, rnd, false); + float16_t floor_float16 = i64_to_f16(floor_result16); + // Multiply n by floor(x/n) + float16_t mult_result16 = f16_mul(y_val16, floor_float16); + // Compute remainder: x - n * floor(x/n) + ((float16_t*)y_bytes)[i] = f16_sub(x_val16, mult_result16); + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + // Perform division x/n + float32_t div_result32 = f32_div(x_val32, y_val32); + // Compute floor of the division result + int64_t floor_result32 = f32_to_i64(div_result32, rnd, false); + float32_t floor_float32 = i64_to_f32(floor_result32); + // Multiply n by floor(x/n) + float32_t mult_result32 = f32_mul(y_val32, floor_float32); + // Compute remainder: x - n * floor(x/n) + ((float32_t*)y_bytes)[i] = f32_sub(x_val32, mult_result32); + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + // Perform division x/n + float64_t div_result64 = f64_div(x_val64, y_val64); + // Compute floor of the division result + int64_t floor_result64 = f64_to_i64(div_result64, rnd, false); + float64_t floor_float64 = i64_to_f64(floor_result64); + // Multiply n by floor(x/n) + float64_t mult_result64 = f64_mul(y_val64, floor_float64); + // Compute remainder: x - n * floor(x/n) + ((float64_t*)y_bytes)[i] = f64_sub(x_val64, mult_result64); + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + fprintf(stderr, "x_val128: %llx %llx\r\n", x_val128.v[0], x_val128.v[1]); + fprintf(stderr, "y_val128: %llx %llx\r\n", y_val128.v[0], y_val128.v[1]); + // Perform division x/n + float128_t div_result128; + // float128_t div_result128 = f128_div(x_val128, y_val128); + f128M_div((float128_t*)&x_val128, (float128_t*)&y_val128, (float128_t*)&div_result128); + fprintf(stderr, "div_result128: %llx %llx\r\n", div_result128.v[0], div_result128.v[1]); + // Compute floor of the division result + int64_t floor_result128 = f128_to_i64(div_result128, softfloat_round_minMag, false); + fprintf(stderr, "floor_result128: %llx\r\n", floor_result128); + float128_t floor_float128 = i64_to_f128(floor_result128); + fprintf(stderr, "floor_float128: %llx %llx\r\n", floor_float128.v[0], floor_float128.v[1]); + // Multiply n by floor(x/n) + float128_t mult_result128; + // float128_t mult_result128 = f128_mul(y_val128, floor_float128); + f128M_mul(((float128_t*)&y_val128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); + fprintf(stderr, "mult_result128: %llx %llx\r\n", mult_result128.v[0], mult_result128.v[1]); + // Compute remainder: x - n * floor(x/n) + // ((float128_t*)y_bytes)[i] = f128_sub(x_val128, mult_result128); + f128M_div(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)y_bytes)[i])); + fprintf(stderr, "y_bytes: %llx %llx\r\n", ((float128_t*)y_bytes)[i].v[0], ((float128_t*)y_bytes)[i].v[1]); + } + // for (c3_d i = 0; i < len_x; i++) { + // f128M_div(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); + // } + break; } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; } -/* adds - axpy = 1*x+n +/* adds - axpy = 1*x+[n] */ u3_noun u3qf_la_adds_real(u3_noun x_data, + u3_noun n, u3_noun shape, - u3_noun bloq, - u3_noun n) + u3_noun bloq) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - fprintf(stderr, "len_a: %d 0x%x units\r\n", len_a, len_a); - uint64_t siz_a = len_a * pow(2, bloq - 3); - fprintf(stderr, "siz_a: %d 0x%x bytes\r\n", siz_a, siz_a); - uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); float16_t n16; float32_t n32; float64_t n64; float128_t n128; - u3_noun r_data; - // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. switch (bloq) { case 4: - u3r_bytes(0, 2, (uint8_t*)&n16, n); + u3r_bytes(0, 2, (c3_y*)&n16, n); // set y to [n] - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = n16; } - y_bytes[siz_a] = 1; // pin head - haxpy(len_a, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + break; case 5: - u3r_bytes(0, 4, (uint8_t*)&n32, n); + u3r_bytes(0, 4, (c3_y*)&n32, n); // set y to [n] - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = n32; } - y_bytes[siz_a] = 1; // pin head - saxpy(len_a, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + saxpy(len_x, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + break; case 6: - u3r_bytes(0, 8, (uint8_t*)&n64, n); + u3r_bytes(0, 8, (c3_y*)&n64, n); // set y to [n] - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = n64; } - y_bytes[siz_a] = 1; // pin head - daxpy(len_a, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + daxpy(len_x, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + break; case 7: - u3r_bytes(0, 16, (uint8_t*)&n128, n); + u3r_bytes(0, 16, (c3_y*)&n128, n); // set y to [n] - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; } - y_bytes[siz_a] = 1; // pin head - qaxpy(len_a, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); + qaxpy(len_x, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + break; + } - return r_data; + // r_data is the result noun of [data] + y_bytes[siz_x] = 1; // pin head + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); - default: - u3a_free(x_bytes); - u3a_free(y_bytes); + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); - return u3_none; - } + return r_data; } -/* subs - axpy = -1*n+x +/* subs - axpy = -1*[n]+x */ u3_noun u3qf_la_subs_real(u3_noun x_data, + u3_noun n, u3_noun shape, - u3_noun bloq, - u3_noun n) + u3_noun bloq) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a+1, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + + // y_bytes is the data array (w/o leading 0x1) + c3_y* y_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); float16_t n16; float32_t n32; float64_t n64; float128_t n128; - u3_noun r_data; - // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. switch (bloq) { case 4: - u3r_bytes(0, 2, (uint8_t*)&n16, n); + u3r_bytes(0, 2, (c3_y*)&n16, n); // set y to [n] - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = n16; } - haxpy(len_a, (float16_t){SB_REAL16_NEGONE}, (float16_t*)y_bytes, 1, (float16_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)y_bytes, 1, (float16_t*)x_bytes, 1); + break; case 5: - u3r_bytes(0, 4, (uint8_t*)&n32, n); + u3r_bytes(0, 4, (c3_y*)&n32, n); // set y to [n] - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = n32; } - saxpy(len_a, (float32_t){SB_REAL32_NEGONE}, (float32_t*)y_bytes, 1, (float32_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + saxpy(len_x, (float32_t){SB_REAL32_NEGONE}, (float32_t*)y_bytes, 1, (float32_t*)x_bytes, 1); + break; case 6: - u3r_bytes(0, 8, (uint8_t*)&n64, n); + u3r_bytes(0, 8, (c3_y*)&n64, n); // set y to [n] - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = n64; } - daxpy(len_a, (float64_t){SB_REAL64_NEGONE}, (float64_t*)y_bytes, 1, (float64_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + daxpy(len_x, (float64_t){SB_REAL64_NEGONE}, (float64_t*)y_bytes, 1, (float64_t*)x_bytes, 1); + break; case 7: - u3r_bytes(0, 16, (uint8_t*)&n128, n); + u3r_bytes(0, 16, (c3_y*)&n128, n); // set y to [n] - for (uint64_t i = 0; i < len_a; i++) { + for (c3_d i = 0; i < len_x; i++) { ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; } - qaxpy(len_a, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)y_bytes, 1, (float128_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); + qaxpy(len_x, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)y_bytes, 1, (float128_t*)x_bytes, 1); + break; + } - return r_data; + // r_data is the result noun of [data] + x_bytes[siz_x] = 1; // pin head + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), x_bytes); - default: - u3a_free(x_bytes); - u3a_free(y_bytes); + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); - return u3_none; - } + return r_data; } /* muls - ?scal n * x @@ -645,79 +651,62 @@ */ u3_noun u3qf_la_muls_real(u3_noun x_data, + u3_noun n, u3_noun shape, - u3_noun bloq, - u3_noun n) + u3_noun bloq) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, x_data); - x_bytes[siz_a] = 1; // pin head + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + x_bytes[siz_x] = 1; // pin head float16_t n16; float32_t n32; float64_t n64; float128_t n128; - u3_noun r_data; - // Switch on the block size. switch (bloq) { case 4: - u3r_bytes(0, 2, (uint8_t*)&n16, n); - hscal(len_a, n16, (float16_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - - return r_data; + u3r_bytes(0, 2, (c3_y*)&n16, n); + hscal(len_x, n16, (float16_t*)x_bytes, 1); + break; case 5: - u3r_bytes(0, 4, (uint8_t*)&n32, n); - sscal(len_a, n32, (float32_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - - return r_data; + u3r_bytes(0, 4, (c3_y*)&n32, n); + sscal(len_x, n32, (float32_t*)x_bytes, 1); + break; case 6: - u3r_bytes(0, 8, (uint8_t*)&n64, n); - dscal(len_a, n64, (float64_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - - return r_data; + u3r_bytes(0, 8, (c3_y*)&n64, n); + dscal(len_x, n64, (float64_t*)x_bytes, 1); + break; case 7: - u3r_bytes(0, 16, (uint8_t*)&(n128.v[0]), n); - qscal(len_a, n128, (float128_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); + qscal(len_x, n128, (float128_t*)x_bytes, 1); + break; + } - return r_data; + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), x_bytes); - default: - u3a_free(x_bytes); + // Clean up and return. + u3a_free(x_bytes); - return u3_none; - } + return r_data; } /* divs - ?scal 1/n * x @@ -725,83 +714,66 @@ */ u3_noun u3qf_la_divs_real(u3_noun x_data, + u3_noun n, u3_noun shape, - u3_noun bloq, - u3_noun n) + u3_noun bloq) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, x_data); - x_bytes[siz_a] = 1; // pin head + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + x_bytes[siz_x] = 1; // pin head float16_t n16; float32_t n32; float64_t n64; float128_t n128; - u3_noun r_data; - // Switch on the block size. switch (bloq) { case 4: - u3r_bytes(0, 2, (uint8_t*)&(n16.v), n); + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); n16 = f16_div((float16_t){SB_REAL16_ONE}, n16); - hscal(len_a, n16, (float16_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - - return r_data; + hscal(len_x, n16, (float16_t*)x_bytes, 1); + break; case 5: - u3r_bytes(0, 4, (uint8_t*)&(n32.v), n); + u3r_bytes(0, 4, (c3_y*)&(n32.v), n); n32 = f32_div((float32_t){SB_REAL32_ONE}, n32); - sscal(len_a, n32, (float32_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - - return r_data; + sscal(len_x, n32, (float32_t*)x_bytes, 1); + break; case 6: - u3r_bytes(0, 8, (uint8_t*)&(n64.v), n); + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); n64 = f64_div((float64_t){SB_REAL64_ONE}, n64); - dscal(len_a, n64, (float64_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); - - return r_data; + dscal(len_x, n64, (float64_t*)x_bytes, 1); + break; case 7: - u3r_bytes(0, 16, (uint8_t*)&(n128.v[0]), n); + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}), &n128, &n128); - qscal(len_a, n128, (float128_t*)x_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), x_bytes); - - // Clean up. - u3a_free(x_bytes); + qscal(len_x, n128, (float128_t*)x_bytes, 1); + break; + } - return r_data; + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), x_bytes); - default: - u3a_free(x_bytes); + // Clean up and return. + u3a_free(x_bytes); - return u3_none; - } + return r_data; } /* dot - ?dot = x · y @@ -812,72 +784,53 @@ u3_noun shape, u3_noun bloq) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc(siz_a*sizeof(uint8_t)); - u3r_bytes(0, siz_a, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a+1, y_bytes, y_data); + // len_x is length in base units + c3_d len_x = _get_length(shape); - u3_noun r_data; + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); + u3r_bytes(0, siz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, y_bytes, y_data); // Switch on the block size. switch (bloq) { case 4: - hdot(len_a, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + hdot(len_x, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + break; case 5: - sdot(len_a, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + sdot(len_x, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + break; case 6: - ddot(len_a, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - - return r_data; + ddot(len_x, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + break; case 7: - qdot(len_a, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); - - // Unpack the result back into a noun. - r_data = u3i_bytes((siz_a+1)*sizeof(uint8_t), y_bytes); - - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); + qdot(len_x, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + break; + } - return r_data; + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); - default: - u3a_free(x_bytes); - u3a_free(y_bytes); + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); - return u3_none; - } + return r_data; } /* diag - diag(x) @@ -890,30 +843,30 @@ // Assert length of dims is 2. assert(u3qb_lent(shape) == 2); // Unpack shape into an array of dimensions. - uint64_t *dims = _get_dims(shape); + c3_d *dims = _get_dims(shape); assert(dims[0] == dims[1]); // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t len_a = _get_length(shape); - uint64_t siz_a = len_a * pow(2, bloq - 3); - uint64_t stride = dims[0] * pow(2, bloq - 3); - uint8_t* x_bytes = (uint8_t*)u3a_malloc((siz_a+1)*sizeof(uint8_t)); - u3r_bytes(0, siz_a+1, x_bytes, x_data); - uint64_t siz_b = stride * dims[1]; - uint8_t* y_bytes = (uint8_t*)u3a_malloc((siz_b+1)*sizeof(uint8_t)); + c3_d len_x = _get_length(shape); + c3_d siz_x = len_x * pow(2, bloq - 3); + c3_d stride = dims[0] * pow(2, bloq - 3); + c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, x_bytes, x_data); + c3_d siz_y = stride * dims[1]; + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_y+1)*sizeof(c3_y)); u3_noun r_data; - for (uint64_t i = 0; i < dims[1]; i++) { - for (uint64_t j = 0; j < stride; j++) { + for (c3_d i = 0; i < dims[1]; i++) { + for (c3_d j = 0; j < stride; j++) { fprintf(stderr, "i*s+j = %d*%d+%d = %d // x_bytes[i]: %lx\r\n", i, stride, j, i*stride+j, x_bytes[i*stride+j + i]); y_bytes[i*stride+j] = x_bytes[i*stride+j + i]; } } - y_bytes[siz_b] = 1; // pin head + y_bytes[siz_y] = 1; // pin head // Unpack the result back into a noun. - r_data = u3i_bytes((siz_b+1)*sizeof(uint8_t), y_bytes); + r_data = u3i_bytes((siz_y+1)*sizeof(c3_y), y_bytes); u3a_free(x_bytes); u3a_free(y_bytes); @@ -930,7 +883,7 @@ u3_noun bloq) { u3_noun diag_data = u3qf_la_diag(x_data, shape, bloq); - uint64_t len_x0 = _get_dims(shape)[0]; + c3_d len_x0 = _get_dims(shape)[0]; return u3qf_la_dot_real(diag_data, diag_data, u3nt(len_x0, 0x1, u3_nul), bloq); } @@ -944,21 +897,21 @@ u3_noun bloq) { // Unpack the data as a byte array. We assume total length < 2**64. - uint64_t M = u3h(x_shape); - uint64_t Na = u3h(u3t(x_shape)); - uint64_t Nb = u3h(y_shape); - uint64_t P = u3h(u3t(y_shape)); + c3_d M = u3h(x_shape); + c3_d Na = u3h(u3t(x_shape)); + c3_d Nb = u3h(y_shape); + c3_d P = u3h(u3t(y_shape)); assert(u3_nul == u3t(u3t(x_shape))); assert(Na == Nb); - uint64_t N = Na; + c3_d N = Na; assert(u3_nul == u3t(u3t(y_shape))); - uint8_t* x_bytes = (uint8_t*)u3a_malloc((M*N)*sizeof(uint8_t)); + c3_y* x_bytes = (c3_y*)u3a_malloc((M*N)*sizeof(c3_y)); u3r_bytes(0, M*N, x_bytes, x_data); - uint8_t* y_bytes = (uint8_t*)u3a_malloc((N*P)*sizeof(uint8_t)); + c3_y* y_bytes = (c3_y*)u3a_malloc((N*P)*sizeof(c3_y)); u3r_bytes(0, N*P, y_bytes, y_data); - uint8_t* c_bytes = (uint8_t*)u3a_malloc((M*P)*sizeof(uint8_t)); + c3_y* c_bytes = (c3_y*)u3a_malloc((M*P)*sizeof(c3_y)); u3_noun r_data; @@ -1029,13 +982,13 @@ u3wf_la_add(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; + u3_noun x_meta, x_data, + y_meta, y_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, + u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3ud(x_data) || @@ -1043,36 +996,35 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - c3n == u3r_mean(b_meta, - 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, - 0) || + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) + // fxp does not need to match here so no check ) { return u3m_bail(c3__exit); } else { - switch (a_kind) { + switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, a_bloq); - return u3nc(u3nq(y_shape, a_bloq, a_kind, a_fxp), r_data); - break; + u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; @@ -1084,14 +1036,14 @@ u3_noun u3wf_la_sub(u3_noun cor) { - // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, + u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3ud(x_data) || @@ -1099,36 +1051,35 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - c3n == u3r_mean(b_meta, - 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, - 0) || + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) + // fxp does not need to match here so no check ) { return u3m_bail(c3__exit); } else { - switch (a_kind) { + switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_sub_real(x_data, y_data, x_shape, a_bloq); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; + u3_noun r_data = u3qf_la_sub_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; @@ -1140,14 +1091,14 @@ u3_noun u3wf_la_mul(u3_noun cor) { - // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, + u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3ud(x_data) || @@ -1155,36 +1106,35 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - c3n == u3r_mean(b_meta, - 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, - 0) || + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) + // fxp does not need to match here so no check ) { return u3m_bail(c3__exit); } else { - switch (a_kind) { + switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_mul_real(x_data, y_data, x_shape, a_bloq); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; + u3_noun r_data = u3qf_la_mul_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; @@ -1196,14 +1146,14 @@ u3_noun u3wf_la_div(u3_noun cor) { - // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, + u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3ud(x_data) || @@ -1211,36 +1161,35 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - c3n == u3r_mean(b_meta, - 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, - 0) || + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) + // fxp does not need to match here so no check ) { return u3m_bail(c3__exit); } else { - switch (a_kind) { + switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_div_real(x_data, y_data, x_shape, a_bloq); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; + u3_noun r_data = u3qf_la_div_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; @@ -1250,44 +1199,52 @@ } u3_noun - u3wf_la_adds(u3_noun cor) + u3wf_la_mod(u3_noun cor) { - // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, n; + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, - u3x_sam_3, &n, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, 0) || c3n == u3ud(x_data) || - c3n == u3ud(n) ) + c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - // shape does not matter so no check - // bloq does not matter so no check - // kind does not matter so no check - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) + // fxp does not need to match here so no check ) { return u3m_bail(c3__exit); } else { - switch (a_kind) { - case c3__real: - _set_rounding(rnd); - u3_noun r_data = u3qf_la_adds_real(x_data, x_shape, a_bloq, n); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; + switch (x_kind) { + case c3__real: ; // XX satisfy label + // Global rounding mode is ignored by SoftFloat conversions so we pass it in. + u3_noun r_data = u3qf_la_mod_real(x_data, y_data, x_shape, x_bloq, rnd); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; @@ -1297,13 +1254,13 @@ } u3_noun - u3wf_la_subs(u3_noun cor) + u3wf_la_adds(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, n; + u3_noun x_meta, x_data, n; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_3, &n, 0) || @@ -1312,33 +1269,56 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - // shape does not matter so no check - // bloq does not matter so no check - // kind does not matter so no check - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) - ) - { - return u3m_bail(c3__exit); - } else { - switch (a_kind) { - case c3__real: - _set_rounding(rnd); - u3_noun r_data = u3qf_la_subs_real(x_data, x_shape, a_bloq, n); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_adds_real(x_data, n, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } - default: - return u3_none; - } + u3_noun + u3wf_la_subs(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_subs_real(x_data, n, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; } } } @@ -1347,10 +1327,10 @@ u3wf_la_muls(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, n; + u3_noun x_meta, x_data, n; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_3, &n, 0) || @@ -1359,33 +1339,21 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - // shape does not matter so no check - // bloq does not matter so no check - // kind does not matter so no check - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) - ) - { - return u3m_bail(c3__exit); - } else { - switch (a_kind) { - case c3__real: - _set_rounding(rnd); - u3_noun r_data = u3qf_la_muls_real(x_data, x_shape, a_bloq, n); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; - - default: - return u3_none; - } + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_muls_real(x_data, n, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; } } } @@ -1394,10 +1362,10 @@ u3wf_la_divs(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, n; + u3_noun x_meta, x_data, n; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_3, &n, 0) || @@ -1406,33 +1374,21 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - // shape does not matter so no check - // bloq does not matter so no check - // kind does not matter so no check - // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) - ) - { - return u3m_bail(c3__exit); - } else { - switch (a_kind) { - case c3__real: - _set_rounding(rnd); - u3_noun r_data = u3qf_la_divs_real(x_data, x_shape, a_bloq, n); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; - - default: - return u3_none; - } + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_divs_real(x_data, n, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; } } } @@ -1441,13 +1397,13 @@ u3wf_la_dot(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; + u3_noun x_meta, x_data, + y_meta, y_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, + u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3ud(x_data) || @@ -1455,36 +1411,35 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, rnd; - if ( c3n == u3r_mean(a_meta, - 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, - 0) || - c3n == u3r_mean(b_meta, - 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, - 0) || + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || - c3n == u3r_sing(a_fxp, b_fxp) || - c3n == u3r_mean(cor, 30, &rnd, 0) + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) || + c3n == u3r_sing(x_fxp, y_fxp) ) { return u3m_bail(c3__exit); } else { - switch (a_kind) { + switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_dot_real(x_data, y_data, x_shape, a_bloq); - return u3nc(u3nq(x_shape, a_bloq, a_kind, a_fxp), r_data); - break; + u3_noun r_data = u3qf_la_dot_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; @@ -1497,36 +1452,36 @@ u3wf_la_diag(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data; + u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, - u3x_sam_2, &a_meta, + u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; - if ( c3n == u3r_mean(a_meta, + if ( c3n == u3r_mean(x_meta, 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, + 6, &x_bloq, + 14, &x_kind, + 15, &x_fxp, 0) // c3n == u3r_sing(x_shape, y_shape) || - // c3n == u3r_sing(a_bloq, b_bloq) || - // c3n == u3r_sing(a_kind, b_kind) || - // c3n == u3r_sing(a_fxp, b_fxp) || - // c3n == u3r_mean(cor, 30, &rnd, 0) + // c3n == u3r_sing(x_bloq, y_bloq) || + // c3n == u3r_sing(x_kind, y_kind) || + // c3n == u3r_sing(x_fxp, y_fxp) || + // c3n == u3r_mean(cor, u3x_con_sam, &rnd, 0) ) { return u3m_bail(c3__exit); } else { - u3_noun r_data = u3qf_la_diag(x_data, x_shape, a_bloq); - uint64_t len_x0 = _get_dims(x_shape)[0]; - return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), a_bloq, a_kind, a_fxp), r_data); + u3_noun r_data = u3qf_la_diag(x_data, x_shape, x_bloq); + c3_d len_x0 = _get_dims(x_shape)[0]; + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), x_bloq, x_kind, x_fxp), r_data); } } } @@ -1535,38 +1490,38 @@ u3wf_la_trace(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data; + u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, - u3x_sam_2, &a_meta, + u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; - if ( c3n == u3r_mean(a_meta, + if ( c3n == u3r_mean(x_meta, 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, + 6, &x_bloq, + 14, &x_kind, + 15, &x_fxp, 0) // c3n == u3r_sing(x_shape, y_shape) || - // c3n == u3r_sing(a_bloq, b_bloq) || - // c3n == u3r_sing(a_kind, b_kind) || - // c3n == u3r_sing(a_fxp, b_fxp) || - // c3n == u3r_mean(cor, 30, &rnd, 0) + // c3n == u3r_sing(x_bloq, y_bloq) || + // c3n == u3r_sing(x_kind, y_kind) || + // c3n == u3r_sing(x_fxp, y_fxp) || + // c3n == u3r_mean(cor, u3x_con_sam, &rnd, 0) ) { return u3m_bail(c3__exit); } else { - switch (a_kind) { + switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_trace_real(x_data, x_shape, a_bloq); - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), a_bloq, a_kind, a_fxp), r_data); + u3_noun r_data = u3qf_la_trace_real(x_data, x_shape, x_bloq); + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), x_bloq, x_kind, x_fxp), r_data); break; default: @@ -1580,13 +1535,13 @@ u3wf_la_mmul(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] - u3_noun a_meta, x_data, - b_meta, y_data; + u3_noun x_meta, x_data, + y_meta, y_data; if ( c3n == u3r_mean(cor, - u3x_sam_4, &a_meta, + u3x_sam_4, &x_meta, u3x_sam_5, &x_data, - u3x_sam_6, &b_meta, + u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3ud(x_data) || @@ -1594,33 +1549,33 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, a_bloq, a_kind, a_fxp, - y_shape, b_bloq, b_kind, b_fxp, + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, rnd; - if ( c3n == u3r_mean(a_meta, + if ( c3n == u3r_mean(x_meta, 2, &x_shape, - 6, &a_bloq, - 14, &a_kind, - 15, &a_fxp, + 6, &x_bloq, + 14, &x_kind, + 15, &x_fxp, 0) || - c3n == u3r_mean(b_meta, + c3n == u3r_mean(y_meta, 2, &y_shape, - 6, &b_bloq, - 14, &b_kind, - 15, &b_fxp, + 6, &y_bloq, + 14, &y_kind, + 15, &y_fxp, 0) || - c3n == u3r_sing(a_bloq, b_bloq) || - c3n == u3r_sing(a_kind, b_kind) || + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) || // fxp does not need to match so no check - c3n == u3r_mean(cor, 30, &rnd, 0) + c3n == u3r_mean(cor, u3x_con_sam, &rnd, 0) ) { return u3m_bail(c3__exit); } else { - switch (a_kind) { + switch (x_kind) { case c3__real: _set_rounding(rnd); - return u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, a_bloq); + return u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq); break; default: diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index db84af39d1..63d0bb724e 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -251,6 +251,7 @@ u3_noun u3qf_la_sub_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_mul_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_div_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_mod_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_adds_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_subs_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_muls_real(u3_noun, u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 084ba01e98..d8375dc4fc 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2155,6 +2155,7 @@ static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; static u3j_harm _139_hex__lagoon_sub_a[] = {{".2", u3wf_la_sub}, {}}; static u3j_harm _139_hex__lagoon_mul_a[] = {{".2", u3wf_la_mul}, {}}; static u3j_harm _139_hex__lagoon_div_a[] = {{".2", u3wf_la_div}, {}}; +static u3j_harm _139_hex__lagoon_mod_a[] = {{".2", u3wf_la_mod}, {}}; static u3j_harm _139_hex__lagoon_adds_a[] = {{".2", u3wf_la_adds}, {}}; static u3j_harm _139_hex__lagoon_subs_a[] = {{".2", u3wf_la_subs}, {}}; static u3j_harm _139_hex__lagoon_muls_a[] = {{".2", u3wf_la_muls}, {}}; @@ -2168,6 +2169,7 @@ static u3j_core _139_hex__la_core_d[] = { "sub-rays", 7, _139_hex__lagoon_sub_a, 0, no_hashes }, { "mul-rays", 7, _139_hex__lagoon_mul_a, 0, no_hashes }, { "div-rays", 7, _139_hex__lagoon_div_a, 0, no_hashes }, + { "mod-rays", 7, _139_hex__lagoon_mod_a, 0, no_hashes }, { "add-scal", 7, _139_hex__lagoon_adds_a, 0, no_hashes }, { "sub-scal", 7, _139_hex__lagoon_subs_a, 0, no_hashes }, { "mul-scal", 7, _139_hex__lagoon_muls_a, 0, no_hashes }, diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 2bd87e6551..0d8dadbf2f 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -335,6 +335,7 @@ u3_noun u3wf_la_sub(u3_noun); u3_noun u3wf_la_mul(u3_noun); u3_noun u3wf_la_div(u3_noun); + u3_noun u3wf_la_mod(u3_noun); u3_noun u3wf_la_adds(u3_noun); u3_noun u3wf_la_subs(u3_noun); u3_noun u3wf_la_muls(u3_noun); From ee9d1e10f19852a242a964b3974a0df51fd9f394 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Fri, 12 Apr 2024 19:27:09 -0500 Subject: [PATCH 13/41] Posting some jets. --- pkg/noun/jets/f/lagoon.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 84c58bd191..9edb40095a 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -99,18 +99,6 @@ return dims; } -/* -*/ - static inline size_t _get_array_length(c3_d* array) - { - size_t n = sizeof(array)/sizeof(array[0]); - for (size_t i = 0; i < n; i++) { - fprintf(stderr, "%x ", array[i]); - } - fprintf(stderr, " => %x \n", n); - return n; - } - /* add - axpy = 1*x+y */ u3_noun @@ -444,31 +432,18 @@ for (c3_d i = 0; i < len_x; i++) { float128_t x_val128 = ((float128_t*)x_bytes)[i]; float128_t y_val128 = ((float128_t*)y_bytes)[i]; - fprintf(stderr, "x_val128: %llx %llx\r\n", x_val128.v[0], x_val128.v[1]); - fprintf(stderr, "y_val128: %llx %llx\r\n", y_val128.v[0], y_val128.v[1]); // Perform division x/n float128_t div_result128; - // float128_t div_result128 = f128_div(x_val128, y_val128); f128M_div((float128_t*)&x_val128, (float128_t*)&y_val128, (float128_t*)&div_result128); - fprintf(stderr, "div_result128: %llx %llx\r\n", div_result128.v[0], div_result128.v[1]); // Compute floor of the division result - int64_t floor_result128 = f128_to_i64(div_result128, softfloat_round_minMag, false); - fprintf(stderr, "floor_result128: %llx\r\n", floor_result128); + int64_t floor_result128 = f128_to_i64(div_result128, rnd, false); float128_t floor_float128 = i64_to_f128(floor_result128); - fprintf(stderr, "floor_float128: %llx %llx\r\n", floor_float128.v[0], floor_float128.v[1]); // Multiply n by floor(x/n) float128_t mult_result128; - // float128_t mult_result128 = f128_mul(y_val128, floor_float128); f128M_mul(((float128_t*)&y_val128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); - fprintf(stderr, "mult_result128: %llx %llx\r\n", mult_result128.v[0], mult_result128.v[1]); // Compute remainder: x - n * floor(x/n) - // ((float128_t*)y_bytes)[i] = f128_sub(x_val128, mult_result128); - f128M_div(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)y_bytes)[i])); - fprintf(stderr, "y_bytes: %llx %llx\r\n", ((float128_t*)y_bytes)[i].v[0], ((float128_t*)y_bytes)[i].v[1]); + f128M_sub(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)y_bytes)[i])); } - // for (c3_d i = 0; i < len_x; i++) { - // f128M_div(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); - // } break; } From a5c79a519c476edf0ec040567e9542618c063da8 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Sat, 13 Apr 2024 12:37:46 -0500 Subject: [PATCH 14/41] Post mod/mods jets. --- pkg/noun/jets/f/lagoon.c | 217 ++++++++++++++++++++++++++++++++++----- pkg/noun/jets/q.h | 3 +- pkg/noun/jets/tree.c | 2 + pkg/noun/jets/w.h | 1 + 4 files changed, 198 insertions(+), 25 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 9edb40095a..8ae1e77e03 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -355,8 +355,7 @@ u3qf_la_mod_real(u3_noun x_data, u3_noun y_data, u3_noun shape, - u3_noun bloq, - u3_noun rnd) + u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { @@ -387,7 +386,7 @@ // Perform division x/n float16_t div_result16 = f16_div(x_val16, y_val16); // Compute floor of the division result - int64_t floor_result16 = f16_to_i64(div_result16, rnd, false); + int64_t floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); float16_t floor_float16 = i64_to_f16(floor_result16); // Multiply n by floor(x/n) float16_t mult_result16 = f16_mul(y_val16, floor_float16); @@ -403,7 +402,7 @@ // Perform division x/n float32_t div_result32 = f32_div(x_val32, y_val32); // Compute floor of the division result - int64_t floor_result32 = f32_to_i64(div_result32, rnd, false); + int64_t floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); float32_t floor_float32 = i64_to_f32(floor_result32); // Multiply n by floor(x/n) float32_t mult_result32 = f32_mul(y_val32, floor_float32); @@ -419,7 +418,7 @@ // Perform division x/n float64_t div_result64 = f64_div(x_val64, y_val64); // Compute floor of the division result - int64_t floor_result64 = f64_to_i64(div_result64, rnd, false); + int64_t floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); float64_t floor_float64 = i64_to_f64(floor_result64); // Multiply n by floor(x/n) float64_t mult_result64 = f64_mul(y_val64, floor_float64); @@ -436,7 +435,7 @@ float128_t div_result128; f128M_div((float128_t*)&x_val128, (float128_t*)&y_val128, (float128_t*)&div_result128); // Compute floor of the division result - int64_t floor_result128 = f128_to_i64(div_result128, rnd, false); + int64_t floor_result128 = f128_to_i64(div_result128, softfloat_round_minMag, false); float128_t floor_float128 = i64_to_f128(floor_result128); // Multiply n by floor(x/n) float128_t mult_result128; @@ -710,35 +709,170 @@ u3r_bytes(0, siz_x, x_bytes, x_data); x_bytes[siz_x] = 1; // pin head - float16_t n16; - float32_t n32; - float64_t n64; - float128_t n128; + float16_t in16; + float32_t in32; + float64_t in64; + float128_t in128; // Switch on the block size. switch (bloq) { case 4: - u3r_bytes(0, 2, (c3_y*)&(n16.v), n); - n16 = f16_div((float16_t){SB_REAL16_ONE}, n16); - hscal(len_x, n16, (float16_t*)x_bytes, 1); + // XX note that in16 is doing double duty here + u3r_bytes(0, 2, (c3_y*)&(in16.v), n); + in16 = f16_div((float16_t){SB_REAL16_ONE}, in16); + hscal(len_x, in16, (float16_t*)x_bytes, 1); break; case 5: + // XX note that in32 is doing double duty here + u3r_bytes(0, 4, (c3_y*)&(in32.v), n); + in32 = f32_div((float32_t){SB_REAL32_ONE}, in32); + sscal(len_x, in32, (float32_t*)x_bytes, 1); + break; + + case 6: + // XX note that in64 is doing double duty here + u3r_bytes(0, 8, (c3_y*)&(in64.v), n); + in64 = f64_div((float64_t){SB_REAL64_ONE}, in64); + dscal(len_x, in64, (float64_t*)x_bytes, 1); + break; + + case 7: + // XX note that in128 is doing double duty here + u3r_bytes(0, 16, (c3_y*)&(in128.v[0]), n); + f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}), &in128, &in128); + qscal(len_x, in128, (float128_t*)x_bytes, 1); + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), x_bytes); + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* mods - x % [n] = x - r*floor(x/r) + remainder after scalar division +*/ + u3_noun + u3qf_la_mods_real(u3_noun x_data, + u3_noun n, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + fprintf(stderr, "len_x: %ld\r\n", len_x); + + // siz_x is length in bytes + c3_d siz_x = len_x * pow(2, bloq-3); + fprintf(stderr, "siz_x: %ld\r\n", siz_x); + + // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + // we reuse it for results for parsimony + c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, x_bytes, x_data); + for (c3_d i = 0; i < siz_x+1; i++) { + fprintf(stderr, "x_bytes[%ld]: %x\r\n", i, x_bytes[i]); + } + + float16_t n16, in16; + float32_t n32, in32; + float64_t n64, in64; + float128_t n128, in128; + + // Switch on the block size. + switch (bloq) { + case 4: + u3r_bytes(0, 2, (c3_y*)&n16, n); + in16 = f16_div((float16_t){SB_REAL16_ONE}, n16); + + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + // Perform division x/n + float16_t div_result16 = f16_mul(in16, x_val16); + // Compute floor of the division result + int64_t floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); + float16_t floor_float16 = i64_to_f16(floor_result16); + // Multiply n by floor(x/n) + float16_t mult_result16 = f16_mul(n16, floor_float16); + // Compute remainder: x - n * floor(x/n) + ((float16_t*)x_bytes)[i] = f16_sub(x_val16, mult_result16); + } + break; + + case 5: + u3l_log("n: %x", n); u3r_bytes(0, 4, (c3_y*)&(n32.v), n); - n32 = f32_div((float32_t){SB_REAL32_ONE}, n32); - sscal(len_x, n32, (float32_t*)x_bytes, 1); + fprintf(stderr, "n32: %f\r\n", n32.v); + in32 = f32_div((float32_t){SB_REAL32_ONE}, n32); + fprintf(stderr, "in32: %f\r\n", in32); + + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + fprintf(stderr, "x_val32: %f\r\n", (float32_t)x_val32); + // Perform division x/n + float32_t div_result32 = f32_mul((float32_t)in32, (float32_t)x_val32); + fprintf(stderr, "div_result32: %f\r\n", div_result32); + // Compute floor of the division result + int64_t floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); + fprintf(stderr, "floor_result32: %ld\r\n", floor_result32); + float32_t floor_float32 = i64_to_f32(floor_result32); + fprintf(stderr, "floor_float32: %f\r\n", floor_float32); + // Multiply n by floor(x/n) + float32_t mult_result32 = f32_mul(n32, floor_float32); + fprintf(stderr, "mult_result32: %f\r\n", mult_result32); + // Compute remainder: x - n * floor(x/n) + ((float32_t*)x_bytes)[i] = f32_sub(x_val32, mult_result32); + fprintf(stderr, "x_bytes[i]: %f\r\n\r\n", ((float32_t*)x_bytes)[i]); + } break; case 6: - u3r_bytes(0, 8, (c3_y*)&(n64.v), n); - n64 = f64_div((float64_t){SB_REAL64_ONE}, n64); - dscal(len_x, n64, (float64_t*)x_bytes, 1); + u3r_bytes(0, 8, (c3_y*)&n64, n); + in64 = f64_div((float64_t){SB_REAL64_ONE}, n64); + + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + // Perform division x/n + float64_t div_result64 = f64_mul(in64, x_val64); + // Compute floor of the division result + int64_t floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); + float64_t floor_float64 = i64_to_f64(floor_result64); + // Multiply n by floor(x/n) + float64_t mult_result64 = f64_mul(n64, floor_float64); + // Compute remainder: x - n * floor(x/n) + ((float64_t*)x_bytes)[i] = f64_sub(x_val64, mult_result64); + } break; case 7: - u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); - f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}), &n128, &n128); - qscal(len_x, n128, (float128_t*)x_bytes, 1); + u3r_bytes(0, 16, (c3_y*)&n128, n); + f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ZERO}), &n128, &in128); + + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + // Perform division x/n + float128_t div_result128; + f128M_mul((float128_t*)&in128, (float128_t*)&x_val128, (float128_t*)&div_result128); + // Compute floor of the division result + int64_t floor_result128 = f128_to_i64(div_result128, softfloat_round_minMag, false); + float128_t floor_float128 = i64_to_f128(floor_result128); + // Multiply n by floor(x/n) + float128_t mult_result128; + f128M_mul(((float128_t*)&n128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); + // Compute remainder: x - n * floor(x/n) + f128M_sub(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)x_bytes)[i])); + } break; } @@ -1216,9 +1350,9 @@ return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; // XX satisfy label - // Global rounding mode is ignored by SoftFloat conversions so we pass it in. - u3_noun r_data = u3qf_la_mod_real(x_data, y_data, x_shape, x_bloq, rnd); + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_mod_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -1368,6 +1502,41 @@ } } + u3_noun + u3wf_la_mods(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_3, &n, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(n) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_mods_real(x_data, n, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + u3_noun u3wf_la_dot(u3_noun cor) { diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 63d0bb724e..0b504b185e 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -251,11 +251,12 @@ u3_noun u3qf_la_sub_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_mul_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_div_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_mod_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_mod_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_adds_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_subs_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_muls_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_divs_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_mods_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_dot_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_trace_real(u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index d8375dc4fc..c6c148d83f 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2160,6 +2160,7 @@ static u3j_harm _139_hex__lagoon_adds_a[] = {{".2", u3wf_la_adds}, {}}; static u3j_harm _139_hex__lagoon_subs_a[] = {{".2", u3wf_la_subs}, {}}; static u3j_harm _139_hex__lagoon_muls_a[] = {{".2", u3wf_la_muls}, {}}; static u3j_harm _139_hex__lagoon_divs_a[] = {{".2", u3wf_la_divs}, {}}; +static u3j_harm _139_hex__lagoon_mods_a[] = {{".2", u3wf_la_mods}, {}}; static u3j_harm _139_hex__lagoon_dot_a[] = {{".2", u3wf_la_dot}, {}}; static u3j_harm _139_hex__lagoon_diag_a[] = {{".2", u3wf_la_diag}, {}}; static u3j_harm _139_hex__lagoon_trace_a[]= {{".2", u3wf_la_trace}, {}}; @@ -2174,6 +2175,7 @@ static u3j_core _139_hex__la_core_d[] = { "sub-scal", 7, _139_hex__lagoon_subs_a, 0, no_hashes }, { "mul-scal", 7, _139_hex__lagoon_muls_a, 0, no_hashes }, { "div-scal", 7, _139_hex__lagoon_divs_a, 0, no_hashes }, + { "mod-scal", 7, _139_hex__lagoon_mods_a, 0, no_hashes }, { "dot", 7, _139_hex__lagoon_dot_a, 0, no_hashes }, { "diag", 7, _139_hex__lagoon_diag_a, 0, no_hashes }, { "trace", 7, _139_hex__lagoon_trace_a,0, no_hashes }, diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 0d8dadbf2f..f36e490e2c 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -340,6 +340,7 @@ u3_noun u3wf_la_subs(u3_noun); u3_noun u3wf_la_muls(u3_noun); u3_noun u3wf_la_divs(u3_noun); + u3_noun u3wf_la_mods(u3_noun); u3_noun u3wf_la_dot(u3_noun); u3_noun u3wf_la_diag(u3_noun); u3_noun u3wf_la_trace(u3_noun); From 110580373116fb645e4bf47ff8436171ff675b8d Mon Sep 17 00:00:00 2001 From: Sigilante Date: Mon, 15 Apr 2024 15:00:36 -0500 Subject: [PATCH 15/41] Add transpose and fixed other jets. --- pkg/noun/jets/f/lagoon.c | 208 +++++++++++++++++++++++++-------------- pkg/noun/jets/q.h | 1 + pkg/noun/jets/tree.c | 2 + pkg/noun/jets/w.h | 1 + 4 files changed, 140 insertions(+), 72 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 8ae1e77e03..1318b532a2 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -491,7 +491,7 @@ // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. switch (bloq) { case 4: - u3r_bytes(0, 2, (c3_y*)&n16, n); + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = n16; @@ -500,7 +500,7 @@ break; case 5: - u3r_bytes(0, 4, (c3_y*)&n32, n); + u3r_bytes(0, 4, (c3_y*)&(n32.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = n32; @@ -509,7 +509,7 @@ break; case 6: - u3r_bytes(0, 8, (c3_y*)&n64, n); + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = n64; @@ -518,7 +518,7 @@ break; case 7: - u3r_bytes(0, 16, (c3_y*)&n128, n); + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; @@ -573,7 +573,7 @@ // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. switch (bloq) { case 4: - u3r_bytes(0, 2, (c3_y*)&n16, n); + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = n16; @@ -582,7 +582,7 @@ break; case 5: - u3r_bytes(0, 4, (c3_y*)&n32, n); + u3r_bytes(0, 4, (c3_y*)&(n32.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = n32; @@ -591,7 +591,7 @@ break; case 6: - u3r_bytes(0, 8, (c3_y*)&n64, n); + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = n64; @@ -600,7 +600,7 @@ break; case 7: - u3r_bytes(0, 16, (c3_y*)&n128, n); + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; @@ -654,17 +654,17 @@ // Switch on the block size. switch (bloq) { case 4: - u3r_bytes(0, 2, (c3_y*)&n16, n); + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); hscal(len_x, n16, (float16_t*)x_bytes, 1); break; case 5: - u3r_bytes(0, 4, (c3_y*)&n32, n); + u3r_bytes(0, 4, (c3_y*)&(n32.v), n); sscal(len_x, n32, (float32_t*)x_bytes, 1); break; case 6: - u3r_bytes(0, 8, (c3_y*)&n64, n); + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); dscal(len_x, n64, (float64_t*)x_bytes, 1); break; @@ -771,19 +771,14 @@ // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); - fprintf(stderr, "len_x: %ld\r\n", len_x); // siz_x is length in bytes c3_d siz_x = len_x * pow(2, bloq-3); - fprintf(stderr, "siz_x: %ld\r\n", siz_x); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) // we reuse it for results for parsimony c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); u3r_bytes(0, siz_x+1, x_bytes, x_data); - for (c3_d i = 0; i < siz_x+1; i++) { - fprintf(stderr, "x_bytes[%ld]: %x\r\n", i, x_bytes[i]); - } float16_t n16, in16; float32_t n32, in32; @@ -793,7 +788,7 @@ // Switch on the block size. switch (bloq) { case 4: - u3r_bytes(0, 2, (c3_y*)&n16, n); + u3r_bytes(0, 2, (c3_y*)&(n16.v), n); in16 = f16_div((float16_t){SB_REAL16_ONE}, n16); for (c3_d i = 0; i < len_x; i++) { @@ -811,34 +806,25 @@ break; case 5: - u3l_log("n: %x", n); u3r_bytes(0, 4, (c3_y*)&(n32.v), n); - fprintf(stderr, "n32: %f\r\n", n32.v); in32 = f32_div((float32_t){SB_REAL32_ONE}, n32); - fprintf(stderr, "in32: %f\r\n", in32); for (c3_d i = 0; i < len_x; i++) { float32_t x_val32 = ((float32_t*)x_bytes)[i]; - fprintf(stderr, "x_val32: %f\r\n", (float32_t)x_val32); // Perform division x/n float32_t div_result32 = f32_mul((float32_t)in32, (float32_t)x_val32); - fprintf(stderr, "div_result32: %f\r\n", div_result32); // Compute floor of the division result int64_t floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); - fprintf(stderr, "floor_result32: %ld\r\n", floor_result32); float32_t floor_float32 = i64_to_f32(floor_result32); - fprintf(stderr, "floor_float32: %f\r\n", floor_float32); // Multiply n by floor(x/n) float32_t mult_result32 = f32_mul(n32, floor_float32); - fprintf(stderr, "mult_result32: %f\r\n", mult_result32); // Compute remainder: x - n * floor(x/n) ((float32_t*)x_bytes)[i] = f32_sub(x_val32, mult_result32); - fprintf(stderr, "x_bytes[i]: %f\r\n\r\n", ((float32_t*)x_bytes)[i]); } break; case 6: - u3r_bytes(0, 8, (c3_y*)&n64, n); + u3r_bytes(0, 8, (c3_y*)&(n64.v), n); in64 = f64_div((float64_t){SB_REAL64_ONE}, n64); for (c3_d i = 0; i < len_x; i++) { @@ -856,7 +842,7 @@ break; case 7: - u3r_bytes(0, 16, (c3_y*)&n128, n); + u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ZERO}), &n128, &in128); for (c3_d i = 0; i < len_x; i++) { @@ -913,28 +899,39 @@ c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); u3r_bytes(0, siz_x+1, y_bytes, y_data); + u3_noun r_data; + // Switch on the block size. switch (bloq) { - case 4: - hdot(len_x, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + case 4: ; + float16_t r16[2]; + r16[0] = hdot(len_x, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); + r16[1].v = 0x1; + r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); break; - case 5: - sdot(len_x, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + case 5: ; + float32_t r32[2]; + r32[0] = sdot(len_x, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); + r32[1].v = 0x1; + r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); break; - case 6: - ddot(len_x, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + case 6: ; + float64_t r64[2]; + r64[0] = ddot(len_x, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); + r64[1].v = 0x1; + r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); break; - case 7: - qdot(len_x, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + case 7: ; + float128_t r128[2]; + r128[0] = qdot(len_x, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); + r128[1] = (float128_t){0x1, 0x0}; + r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); break; } - // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); - // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); @@ -949,6 +946,10 @@ u3_noun shape, u3_noun bloq) { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } // Assert length of dims is 2. assert(u3qb_lent(shape) == 2); // Unpack shape into an array of dimensions. @@ -958,18 +959,19 @@ // Unpack the data as a byte array. We assume total length < 2**64. c3_d len_x = _get_length(shape); c3_d siz_x = len_x * pow(2, bloq - 3); - c3_d stride = dims[0] * pow(2, bloq - 3); + c3_d wyd = pow(2, bloq - 3); c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); u3r_bytes(0, siz_x+1, x_bytes, x_data); - c3_d siz_y = stride * dims[1]; + c3_d siz_y = wyd * dims[1]; c3_y* y_bytes = (c3_y*)u3a_malloc((siz_y+1)*sizeof(c3_y)); u3_noun r_data; + // Grab the index at i*n_x+j in bytes; put it at j. for (c3_d i = 0; i < dims[1]; i++) { - for (c3_d j = 0; j < stride; j++) { - fprintf(stderr, "i*s+j = %d*%d+%d = %d // x_bytes[i]: %lx\r\n", i, stride, j, i*stride+j, x_bytes[i*stride+j + i]); - y_bytes[i*stride+j] = x_bytes[i*stride+j + i]; + // Scan across whole field width. + for (c3_y k = 0; k < wyd; k++) { + y_bytes[i*wyd+k] = x_bytes[(i*dims[0]+i)*wyd+k]; } } y_bytes[siz_y] = 1; // pin head @@ -984,6 +986,49 @@ return r_data; } +/* transpose - x' +*/ + u3_noun + u3qf_la_transpose(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Assert length of dims is 2. + assert(u3qb_lent(shape) == 2); + // Unpack shape into an array of dimensions. + c3_d *dims = _get_dims(shape); + + // Unpack the data as a byte array. We assume total length < 2**64. + c3_d len_x = _get_length(shape); + c3_d siz_x = len_x * pow(2, bloq - 3); + c3_d wyd = pow(2, bloq - 3); + c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + u3r_bytes(0, siz_x+1, x_bytes, x_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + + u3_noun r_data; + + // Grab the index at i*n_x+j in bytes; put it at j. + for (c3_d i = 0; i < dims[1]; i++) { + for (c3_d j = 0; j < dims[0]; j++) { + // Scan across whole field width. + for (c3_y k = 0; k < wyd; k++) { + y_bytes[(j*dims[1]+i)*wyd+k] = x_bytes[(i*dims[0]+j)*wyd+k]; + } + } + } + y_bytes[siz_x] = 1; // pin head + + // Unpack the result back into a noun. + r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(dims); + + return r_data; + } + /* trace - tr(x) */ u3_noun @@ -991,9 +1036,10 @@ u3_noun shape, u3_noun bloq) { - u3_noun diag_data = u3qf_la_diag(x_data, shape, bloq); + u3_noun d_data = u3qf_la_diag(x_data, shape, bloq); c3_d len_x0 = _get_dims(shape)[0]; - return u3qf_la_dot_real(diag_data, diag_data, u3nt(len_x0, 0x1, u3_nul), bloq); + u3_noun r_data = u3qf_la_dot_real(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), bloq); + return r_data; } /* mmul @@ -1583,7 +1629,8 @@ case c3__real: _set_rounding(rnd); u3_noun r_data = u3qf_la_dot_real(x_data, y_data, x_shape, x_bloq); - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + c3_d len_x0 = _get_dims(x_shape)[0]; + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; @@ -1606,26 +1653,51 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - rnd; - if ( c3n == u3r_mean(x_meta, - 2, &x_shape, - 6, &x_bloq, - 14, &x_kind, - 15, &x_fxp, - 0) - // c3n == u3r_sing(x_shape, y_shape) || - // c3n == u3r_sing(x_bloq, y_bloq) || - // c3n == u3r_sing(x_kind, y_kind) || - // c3n == u3r_sing(x_fxp, y_fxp) || - // c3n == u3r_mean(cor, u3x_con_sam, &rnd, 0) + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { u3_noun r_data = u3qf_la_diag(x_data, x_shape, x_bloq); c3_d len_x0 = _get_dims(x_shape)[0]; - return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), x_bloq, x_kind, x_fxp), r_data); + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + } + } + } + + u3_noun + u3wf_la_transpose(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + u3_noun r_data = u3qf_la_transpose(x_data, x_shape, x_bloq); + return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } } } @@ -1644,29 +1716,21 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - rnd; + u3_noun x_shape, x_bloq, x_kind, x_fxp; if ( c3n == u3r_mean(x_meta, 2, &x_shape, 6, &x_bloq, 14, &x_kind, 15, &x_fxp, 0) - // c3n == u3r_sing(x_shape, y_shape) || - // c3n == u3r_sing(x_bloq, y_bloq) || - // c3n == u3r_sing(x_kind, y_kind) || - // c3n == u3r_sing(x_fxp, y_fxp) || - // c3n == u3r_mean(cor, u3x_con_sam, &rnd, 0) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: - _set_rounding(rnd); + case c3__real: ; u3_noun r_data = u3qf_la_trace_real(x_data, x_shape, x_bloq); - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), x_bloq, x_kind, x_fxp), r_data); - break; + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 0b504b185e..37f4ccc127 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -259,6 +259,7 @@ u3_noun u3qf_la_mods_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_dot_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_transpose(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_trace_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index c6c148d83f..db84df3780 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2162,6 +2162,7 @@ static u3j_harm _139_hex__lagoon_muls_a[] = {{".2", u3wf_la_muls}, {}}; static u3j_harm _139_hex__lagoon_divs_a[] = {{".2", u3wf_la_divs}, {}}; static u3j_harm _139_hex__lagoon_mods_a[] = {{".2", u3wf_la_mods}, {}}; static u3j_harm _139_hex__lagoon_dot_a[] = {{".2", u3wf_la_dot}, {}}; +static u3j_harm _139_hex__lagoon_trans_a[] ={{".2", u3wf_la_transpose}, {}}; static u3j_harm _139_hex__lagoon_diag_a[] = {{".2", u3wf_la_diag}, {}}; static u3j_harm _139_hex__lagoon_trace_a[]= {{".2", u3wf_la_trace}, {}}; static u3j_harm _139_hex__lagoon_mmul_a[] = {{".2", u3wf_la_mmul}, {}}; @@ -2177,6 +2178,7 @@ static u3j_core _139_hex__la_core_d[] = { "div-scal", 7, _139_hex__lagoon_divs_a, 0, no_hashes }, { "mod-scal", 7, _139_hex__lagoon_mods_a, 0, no_hashes }, { "dot", 7, _139_hex__lagoon_dot_a, 0, no_hashes }, + { "transpose",7, _139_hex__lagoon_trans_a, 0, no_hashes }, { "diag", 7, _139_hex__lagoon_diag_a, 0, no_hashes }, { "trace", 7, _139_hex__lagoon_trace_a,0, no_hashes }, { "mmul", 7, _139_hex__lagoon_mmul_a, 0, no_hashes }, diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index f36e490e2c..36d66c21ba 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -343,6 +343,7 @@ u3_noun u3wf_la_mods(u3_noun); u3_noun u3wf_la_dot(u3_noun); u3_noun u3wf_la_diag(u3_noun); + u3_noun u3wf_la_transpose(u3_noun); u3_noun u3wf_la_trace(u3_noun); u3_noun u3wf_la_mmul(u3_noun); From 7eab0d7830494e5171200b0c31558da49a54f00e Mon Sep 17 00:00:00 2001 From: Sigilante Date: Tue, 16 Apr 2024 15:52:41 -0500 Subject: [PATCH 16/41] Add +abs etc. --- pkg/noun/jets/f/lagoon.c | 972 ++++++++++++++++++++++++++++++++++----- pkg/noun/jets/q.h | 9 + pkg/noun/jets/tree.c | 30 ++ pkg/noun/jets/w.h | 9 + 4 files changed, 899 insertions(+), 121 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 1318b532a2..7ffc7fd7ad 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -117,16 +117,16 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) - c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, y_bytes, y_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (bloq) { @@ -148,7 +148,7 @@ } // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); @@ -175,16 +175,16 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) - c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, y_bytes, y_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (bloq) { @@ -206,7 +206,7 @@ } // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); @@ -234,16 +234,16 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) - c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, y_bytes, y_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (bloq) { @@ -273,7 +273,7 @@ } // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); @@ -300,16 +300,16 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) - c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, y_bytes, y_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (bloq) { @@ -339,7 +339,7 @@ } // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); @@ -366,16 +366,16 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) - c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, y_bytes, y_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (bloq) { @@ -447,7 +447,358 @@ } // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* abs - |x| +*/ + u3_noun + u3qf_la_abs_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by for range) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + // Switch on the block size. + switch (bloq) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + ((float16_t*)x_bytes)[i] = f16_abs(((float16_t*)x_bytes)[i]); + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + ((float32_t*)x_bytes)[i] = f32_abs(((float32_t*)x_bytes)[i]); + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + ((float64_t*)x_bytes)[i] = f64_abs(((float64_t*)x_bytes)[i]); + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + ((float128_t*)x_bytes)[i] = f128_abs(((float128_t*)x_bytes)[i]); + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* gth - x > y +*/ + u3_noun + u3qf_la_gth_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (bloq) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + ((float16_t*)y_bytes)[i] = f16_gt(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + ((float32_t*)y_bytes)[i] = f32_gt(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + ((float64_t*)y_bytes)[i] = f64_gt(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + ((float128_t*)y_bytes)[i] = f128M_gt(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* gte - x > y +*/ + u3_noun + u3qf_la_gte_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (bloq) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + ((float16_t*)y_bytes)[i] = f16_ge(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + ((float32_t*)y_bytes)[i] = f32_ge(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + ((float64_t*)y_bytes)[i] = f64_ge(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + ((float128_t*)y_bytes)[i] = f128M_ge(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* lth - x > y +*/ + u3_noun + u3qf_la_lth_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (bloq) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + ((float16_t*)y_bytes)[i] = f16_lt(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + ((float32_t*)y_bytes)[i] = f32_lt(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + ((float64_t*)y_bytes)[i] = f64_lt(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + ((float128_t*)y_bytes)[i] = f128M_lt(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); + + // Clean up and return. + u3a_free(x_bytes); + u3a_free(y_bytes); + + return r_data; + } + +/* lte - x > y +*/ + u3_noun + u3qf_la_lte_real(u3_noun x_data, + u3_noun y_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); + + // Switch on the block size. + switch (bloq) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + float16_t y_val16 = ((float16_t*)y_bytes)[i]; + ((float16_t*)y_bytes)[i] = f16_le(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + float32_t y_val32 = ((float32_t*)y_bytes)[i]; + ((float32_t*)y_bytes)[i] = f32_le(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + float64_t y_val64 = ((float64_t*)y_bytes)[i]; + ((float64_t*)y_bytes)[i] = f64_le(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + float128_t y_val128 = ((float128_t*)y_bytes)[i]; + ((float128_t*)y_bytes)[i] = f128M_le(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; + } + break; + } + + // r_data is the result noun of [data] + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); @@ -473,15 +824,15 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) - c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); float16_t n16; float32_t n32; @@ -528,8 +879,8 @@ } // r_data is the result noun of [data] - y_bytes[siz_x] = 1; // pin head - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + y_bytes[syz_x] = 1; // pin head + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); @@ -555,15 +906,15 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/o leading 0x1) - c3_y* y_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); + c3_y* y_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); float16_t n16; float32_t n32; @@ -610,8 +961,8 @@ } // r_data is the result noun of [data] - x_bytes[siz_x] = 1; // pin head - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), x_bytes); + x_bytes[syz_x] = 1; // pin head + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); @@ -638,13 +989,13 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); - x_bytes[siz_x] = 1; // pin head + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + x_bytes[syz_x] = 1; // pin head float16_t n16; float32_t n32; @@ -675,7 +1026,7 @@ } // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), x_bytes); + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); @@ -701,13 +1052,13 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); - x_bytes[siz_x] = 1; // pin head + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + x_bytes[syz_x] = 1; // pin head float16_t in16; float32_t in32; @@ -746,7 +1097,7 @@ } // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), x_bytes); + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); @@ -772,13 +1123,13 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) // we reuse it for results for parsimony - c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); float16_t n16, in16; float32_t n32, in32; @@ -863,7 +1214,7 @@ } // r_data is the result noun of [data] - u3_noun r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), x_bytes); + u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); @@ -888,16 +1239,16 @@ // len_x is length in base units c3_d len_x = _get_length(shape); - // siz_x is length in bytes - c3_d siz_x = len_x * pow(2, bloq-3); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) - c3_y* x_bytes = (c3_y*)u3a_malloc(siz_x*sizeof(c3_y)); - u3r_bytes(0, siz_x, x_bytes, x_data); + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, y_bytes, y_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, y_bytes, y_data); u3_noun r_data; @@ -958,12 +1309,12 @@ // Unpack the data as a byte array. We assume total length < 2**64. c3_d len_x = _get_length(shape); - c3_d siz_x = len_x * pow(2, bloq - 3); + c3_d syz_x = len_x * pow(2, bloq - 3); c3_d wyd = pow(2, bloq - 3); - c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, x_bytes, x_data); - c3_d siz_y = wyd * dims[1]; - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_y+1)*sizeof(c3_y)); + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + c3_d syz_y = wyd * dims[1]; + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_y+1)*sizeof(c3_y)); u3_noun r_data; @@ -974,10 +1325,10 @@ y_bytes[i*wyd+k] = x_bytes[(i*dims[0]+i)*wyd+k]; } } - y_bytes[siz_y] = 1; // pin head + y_bytes[syz_y] = 1; // pin head // Unpack the result back into a noun. - r_data = u3i_bytes((siz_y+1)*sizeof(c3_y), y_bytes); + r_data = u3i_bytes((syz_y+1)*sizeof(c3_y), y_bytes); u3a_free(x_bytes); u3a_free(y_bytes); @@ -1000,11 +1351,11 @@ // Unpack the data as a byte array. We assume total length < 2**64. c3_d len_x = _get_length(shape); - c3_d siz_x = len_x * pow(2, bloq - 3); + c3_d syz_x = len_x * pow(2, bloq - 3); c3_d wyd = pow(2, bloq - 3); - c3_y* x_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); - u3r_bytes(0, siz_x+1, x_bytes, x_data); - c3_y* y_bytes = (c3_y*)u3a_malloc((siz_x+1)*sizeof(c3_y)); + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3_noun r_data; @@ -1017,10 +1368,10 @@ } } } - y_bytes[siz_x] = 1; // pin head + y_bytes[syz_x] = 1; // pin head // Unpack the result back into a noun. - r_data = u3i_bytes((siz_x+1)*sizeof(c3_y), y_bytes); + r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); u3a_free(x_bytes); u3a_free(y_bytes); @@ -1029,6 +1380,97 @@ return r_data; } +/* linspace - [a a+(b-a)/n ... b] +*/ + u3_noun + u3qf_la_linspace_real(u3_noun a, + u3_noun b, + u3_noun n, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + u3_noun r_data; + + switch (bloq) { + case 4: ; + float16_t a16, b16; + u3r_bytes(0, 2, (c3_y*)&(a16.v), a); + u3r_bytes(0, 2, (c3_y*)&(b16.v), b); + float16_t span16 = f16_sub(b16, a16); + float16_t interval16 = f16_div(span16, i32_to_f16(n)); + c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n+1)*2+1)*sizeof(c3_y)); + for (c3_d i = 1; i <= n; i++) { + ((float16_t*)x_bytes16)[n-i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); + } + ((float16_t*)x_bytes16)[n] = b16; + x_bytes16[(n+1)*2] = 1; // pin head + r_data = u3i_bytes(((n+1)*2+1)*sizeof(c3_y), x_bytes16); + u3a_free(x_bytes16); + break; + + case 5: ; + float32_t a32, b32; + u3r_bytes(0, 4, (c3_y*)&(a32.v), a); + u3r_bytes(0, 4, (c3_y*)&(b32.v), b); + float32_t span32 = f32_sub(b32, a32); + float32_t interval32 = f32_div(span32, i32_to_f32(n)); + c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n+1)*4+1)*sizeof(c3_y)); + for (c3_d i = 1; i <= n; i++) { + ((float32_t*)x_bytes32)[n-i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); + } + ((float32_t*)x_bytes32)[n] = b32; + x_bytes32[(n+1)*4] = 1; // pin head + r_data = u3i_bytes(((n+1)*4+1)*sizeof(c3_y), x_bytes32); + u3a_free(x_bytes32); + break; + + case 6: ; + float64_t a64, b64; + u3r_bytes(0, 8, (c3_y*)&(a64.v), a); + u3r_bytes(0, 8, (c3_y*)&(b64.v), b); + float64_t span64 = f64_sub(b64, a64); + float64_t interval64 = f64_div(span64, i32_to_f64(n)); + c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n+1)*8+1)*sizeof(c3_y)); + for (c3_d i = 1; i < n; i++) { + ((float64_t*)x_bytes64)[n-i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); + } + ((float64_t*)x_bytes64)[n] = b64; + x_bytes64[(n+1)*8] = 1; // pin head + r_data = u3i_bytes(((n+1)*8+1)*sizeof(c3_y), x_bytes64); + u3a_free(x_bytes64); + break; + + case 7: ; + float128_t a128, b128; + u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); + u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); + float128_t span128; + f128M_sub(&b128, &a128, &span128); + float128_t interval128; + float128_t n128; + i32_to_f128M(n, &n128); + f128M_div(&span128, &n128, &interval128); + c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n+1)*16+1)*sizeof(c3_y)); + float128_t i128; + for (c3_d i = 1; i < n; i++) { + i32_to_f128M(i, &i128); + f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[n-i]); + f128M_add(&a128, &((float128_t*)x_bytes128)[n-i], &((float128_t*)x_bytes128)[n-i]); + } + ((float128_t*)x_bytes128)[n] = b128; + x_bytes128[(n+1)*16] = 1; // pin head + r_data = u3i_bytes(((n+1)*16+1)*sizeof(c3_y), x_bytes128); + u3a_free(x_bytes128); + break; + } + + return r_data; + } + /* trace - tr(x) */ u3_noun @@ -1124,19 +1566,129 @@ return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); - default: - u3a_free(x_bytes); - u3a_free(y_bytes); - u3a_free(c_bytes); - - return u3_none; + default: + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(c_bytes); + + return u3_none; + } + } + + u3_noun + u3wf_la_add(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) + // fxp does not need to match here so no check + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_sub(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) + // fxp does not need to match here so no check + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_sub_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } } } u3_noun - u3wf_la_add(u3_noun cor) + u3wf_la_mul(u3_noun cor) { - // Each argument is a ray, [=meta data=@ux] + // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; @@ -1178,7 +1730,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qf_la_mul_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -1189,7 +1741,7 @@ } u3_noun - u3wf_la_sub(u3_noun cor) + u3wf_la_div(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -1233,7 +1785,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_sub_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qf_la_div_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -1244,7 +1796,7 @@ } u3_noun - u3wf_la_mul(u3_noun cor) + u3wf_la_mod(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -1288,7 +1840,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_mul_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qf_la_mod_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -1299,9 +1851,47 @@ } u3_noun - u3wf_la_div(u3_noun cor) + u3wf_la_abs(u3_noun cor) { - // Each argument is a ray, [=meta data=@ux] + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_abs_real(x_data, x_shape, x_bloq); + fprintf(stderr, "abs: %lx\n", x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_gth(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; @@ -1327,23 +1917,21 @@ y_bloq = u3h(u3t(y_meta)); // 6 y_kind = u3h(u3t(u3t(y_meta))); // 14 y_fxp = u3t(u3t(u3t(y_meta))); // 15 - rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || c3n == u3ud(y_kind) || c3n == u3r_sing(x_shape, y_shape) || c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) - // fxp does not need to match here so no check + c3n == u3r_sing(x_kind, y_kind) || + c3n == u3r_sing(x_fxp, y_fxp) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: - _set_rounding(rnd); - u3_noun r_data = u3qf_la_div_real(x_data, y_data, x_shape, x_bloq); + case c3__real: ; + u3_noun r_data = u3qf_la_gth_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -1354,9 +1942,9 @@ } u3_noun - u3wf_la_mod(u3_noun cor) + u3wf_la_gte(u3_noun cor) { - // Each argument is a ray, [=meta data=@ux] + // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; @@ -1382,23 +1970,127 @@ y_bloq = u3h(u3t(y_meta)); // 6 y_kind = u3h(u3t(u3t(y_meta))); // 14 y_fxp = u3t(u3t(u3t(y_meta))); // 15 - rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || c3n == u3ud(y_kind) || c3n == u3r_sing(x_shape, y_shape) || c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) - // fxp does not need to match here so no check + c3n == u3r_sing(x_kind, y_kind) || + c3n == u3r_sing(x_fxp, y_fxp) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: - _set_rounding(rnd); - u3_noun r_data = u3qf_la_mod_real(x_data, y_data, x_shape, x_bloq); + case c3__real: ; + u3_noun r_data = u3qf_la_gte_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_lth(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) || + c3n == u3r_sing(x_fxp, y_fxp) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_lth_real(x_data, y_data, x_shape, x_bloq); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_lte(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data, + y_meta, y_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_4, &x_meta, + u3x_sam_5, &x_data, + u3x_sam_6, &y_meta, + u3x_sam_7, &y_data, + 0) || + c3n == u3ud(x_data) || + c3n == u3ud(y_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp, + y_shape, y_bloq, y_kind, y_fxp, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(y_bloq) || + c3n == u3ud(x_kind) || + c3n == u3ud(y_kind) || + c3n == u3r_sing(x_shape, y_shape) || + c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) || + c3n == u3r_sing(x_fxp, y_fxp) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_lte_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -1640,7 +2332,7 @@ } u3_noun - u3wf_la_diag(u3_noun cor) + u3wf_la_transpose(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -1664,15 +2356,52 @@ { return u3m_bail(c3__exit); } else { - u3_noun r_data = u3qf_la_diag(x_data, x_shape, x_bloq); - c3_d len_x0 = _get_dims(x_shape)[0]; - return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + u3_noun r_data = u3qf_la_transpose(x_data, x_shape, x_bloq); + return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } } } u3_noun - u3wf_la_transpose(u3_noun cor) + u3wf_la_linspace(u3_noun cor) + { + u3_noun x_meta, a, b, n; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_12, &a, + u3x_sam_13, &b, + u3x_sam_7, &n, + 0)) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_linspace_real(a, b, n, x_bloq); + x_shape = u3nc(u3x_atom(n)+1, u3_nul); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_diag(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -1696,8 +2425,9 @@ { return u3m_bail(c3__exit); } else { - u3_noun r_data = u3qf_la_transpose(x_data, x_shape, x_bloq); - return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + u3_noun r_data = u3qf_la_diag(x_data, x_shape, x_bloq); + c3_d len_x0 = _get_dims(x_shape)[0]; + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } } } diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 37f4ccc127..de853bfe94 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -260,6 +260,15 @@ u3_noun u3qf_la_dot_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_transpose(u3_noun, u3_noun, u3_noun); + + u3_noun u3qf_la_linspace_real(u3_noun, u3_noun, u3_noun, u3_noun); + + u3_noun u3qf_la_abs_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_gth_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_gte_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_lth_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_lte_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_trace_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index db84df3780..6252c2fa63 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2163,6 +2163,21 @@ static u3j_harm _139_hex__lagoon_divs_a[] = {{".2", u3wf_la_divs}, {}}; static u3j_harm _139_hex__lagoon_mods_a[] = {{".2", u3wf_la_mods}, {}}; static u3j_harm _139_hex__lagoon_dot_a[] = {{".2", u3wf_la_dot}, {}}; static u3j_harm _139_hex__lagoon_trans_a[] ={{".2", u3wf_la_transpose}, {}}; +// static u3j_harm _139_hex__lagoon_stack_a[] ={{".2", u3wf_la_stack}, {}}; +// static u3j_harm _139_hex__lagoon_cumsum_a[]={{".2", u3wf_la_cumsum}, {}}; +// static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; +// static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; +// static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; +// static u3j_harm _139_hex__lagoon_min_a[]={{".2", u3wf_la_min}, {}}; +// static u3j_harm _139_hex__lagoon_max_a[]={{".2", u3wf_la_max}, {}}; +static u3j_harm _139_hex__lagoon_linspace_a[]={{".2", u3wf_la_linspace}, {}}; +// static u3j_harm _139_hex__lagoon_range_a[]={{".2", u3wf_la_range}, {}}; +// static u3j_harm _139_hex__lagoon_submatrix_a[]={{".2", u3wf_la_submatrix}, {}}; +static u3j_harm _139_hex__lagoon_abs_a[]={{".2", u3wf_la_abs}, {}}; +static u3j_harm _139_hex__lagoon_gth_a[]={{".2", u3wf_la_gth}, {}}; +static u3j_harm _139_hex__lagoon_gte_a[]={{".2", u3wf_la_gte}, {}}; +static u3j_harm _139_hex__lagoon_lth_a[]={{".2", u3wf_la_lth}, {}}; +static u3j_harm _139_hex__lagoon_lte_a[]={{".2", u3wf_la_lte}, {}}; static u3j_harm _139_hex__lagoon_diag_a[] = {{".2", u3wf_la_diag}, {}}; static u3j_harm _139_hex__lagoon_trace_a[]= {{".2", u3wf_la_trace}, {}}; static u3j_harm _139_hex__lagoon_mmul_a[] = {{".2", u3wf_la_mmul}, {}}; @@ -2179,6 +2194,21 @@ static u3j_core _139_hex__la_core_d[] = { "mod-scal", 7, _139_hex__lagoon_mods_a, 0, no_hashes }, { "dot", 7, _139_hex__lagoon_dot_a, 0, no_hashes }, { "transpose",7, _139_hex__lagoon_trans_a, 0, no_hashes }, + // { "stack", 7, _139_hex__lagoon_stack_a, 0, no_hashes }, + // { "cumsum", 7, _139_hex__lagoon_cumsum_a, 0, no_hashes }, + // { "argmin", 7, _139_hex__lagoon_argmin_a, 0, no_hashes }, + // { "argmax", 7, _139_hex__lagoon_argmax_a, 0, no_hashes }, + // { "ravel", 7, _139_hex__lagoon_ravel_a, 0, no_hashes }, + // { "min", 7, _139_hex__lagoon_min_a, 0, no_hashes }, + // { "max", 7, _139_hex__lagoon_max_a, 0, no_hashes }, + { "linspace", 7, _139_hex__lagoon_linspace_a, 0, no_hashes }, + // { "range", 7, _139_hex__lagoon_range_a, 0, no_hashes }, + // { "submatrix",7, _139_hex__lagoon_submatrix_a, 0, no_hashes }, + { "abs", 7, _139_hex__lagoon_abs_a, 0, no_hashes }, + { "gth", 7, _139_hex__lagoon_gth_a, 0, no_hashes }, + { "gte", 7, _139_hex__lagoon_gte_a, 0, no_hashes }, + { "lth", 7, _139_hex__lagoon_lth_a, 0, no_hashes }, + { "lte", 7, _139_hex__lagoon_lte_a, 0, no_hashes }, { "diag", 7, _139_hex__lagoon_diag_a, 0, no_hashes }, { "trace", 7, _139_hex__lagoon_trace_a,0, no_hashes }, { "mmul", 7, _139_hex__lagoon_mmul_a, 0, no_hashes }, diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 36d66c21ba..6ab0518f67 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -344,6 +344,15 @@ u3_noun u3wf_la_dot(u3_noun); u3_noun u3wf_la_diag(u3_noun); u3_noun u3wf_la_transpose(u3_noun); + + u3_noun u3wf_la_linspace(u3_noun); + + u3_noun u3wf_la_abs(u3_noun); + u3_noun u3wf_la_gth(u3_noun); + u3_noun u3wf_la_gte(u3_noun); + u3_noun u3wf_la_lth(u3_noun); + u3_noun u3wf_la_lte(u3_noun); + u3_noun u3wf_la_trace(u3_noun); u3_noun u3wf_la_mmul(u3_noun); From 118b28103bd04b60e90a1460fe019b329fe8f589 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Tue, 16 Apr 2024 20:35:17 -0500 Subject: [PATCH 17/41] Add min/max jets. --- WORKSPACE.bazel | 2 +- pkg/noun/jets/f/lagoon.c | 230 +++++++++++++++++++++++++++++++++++++++ pkg/noun/jets/q.h | 2 + pkg/noun/jets/tree.c | 8 +- pkg/noun/jets/w.h | 2 + 5 files changed, 239 insertions(+), 5 deletions(-) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index de3564b253..11cb3c7f58 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -297,7 +297,7 @@ versioned_http_archive( strip_prefix = "SoftBLAS-{version}", # sha256 = "", url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", - version = "3af44d8cbf0d61e31946af9127099257160d0451", + version = "afeccbabaf43b7d0fde6f3d2809b9c811b91641e", ) versioned_http_archive( diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 7ffc7fd7ad..aec62641ba 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -456,6 +456,162 @@ return r_data; } +/* min - min(x,y) +*/ + u3_noun + u3qf_la_min_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by for range) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: ; + float16_t min_val16 = ((float16_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + min_val16 = f16_min(min_val16, ((float16_t*)x_bytes)[i]); + } + float16_t r16[2]; + r16[0] = min_val16; + r16[1].v = 0x1; + r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); + break; + + case 5: ; + float32_t min_val32 = ((float32_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + min_val32 = f32_min(min_val32, ((float32_t*)x_bytes)[i]); + } + float32_t r32[2]; + r32[0] = min_val32; + r32[1].v = 0x1; + r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); + break; + + case 6: ; + float64_t min_val64 = ((float64_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + min_val64 = f64_min(min_val64, ((float64_t*)x_bytes)[i]); + } + float64_t r64[2]; + r64[0] = min_val64; + r64[1].v = 0x1; + r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); + break; + + case 7: ; + float128_t min_val128 = ((float128_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); + } + float128_t r128[2]; + r128[0] = min_val128; + r128[1] = (float128_t){0x1, 0x0}; + r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); + break; + } + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + +/* max - max(x,y) +*/ + u3_noun + u3qf_la_max_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/ leading 0x1, skipped by for range) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: ; + float16_t max_val16 = ((float16_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + max_val16 = f16_max(max_val16, ((float16_t*)x_bytes)[i]); + } + float16_t r16[2]; + r16[0] = max_val16; + r16[1].v = 0x1; + r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); + break; + + case 5: ; + float32_t max_val32 = ((float32_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + max_val32 = f32_max(max_val32, ((float32_t*)x_bytes)[i]); + } + float32_t r32[2]; + r32[0] = max_val32; + r32[1].v = 0x1; + r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); + break; + + case 6: ; + float64_t max_val64 = ((float64_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + max_val64 = f64_max(max_val64, ((float64_t*)x_bytes)[i]); + } + float64_t r64[2]; + r64[0] = max_val64; + r64[1].v = 0x1; + r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); + break; + + case 7: ; + float128_t max_val128 = ((float128_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); + } + float128_t r128[2]; + r128[0] = max_val128; + r128[1] = (float128_t){0x1, 0x0}; + r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); + break; + } + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + /* abs - |x| */ u3_noun @@ -1850,6 +2006,80 @@ } } + u3_noun + u3wf_la_min(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_min_real(x_data, x_shape, x_bloq); + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_max(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_max_real(x_data, x_shape, x_bloq); + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + u3_noun u3wf_la_abs(u3_noun cor) { diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index de853bfe94..17fdb2a646 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -261,6 +261,8 @@ u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_transpose(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_min_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_max_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_linspace_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_abs_real(u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 6252c2fa63..bc482ced0a 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2168,8 +2168,8 @@ static u3j_harm _139_hex__lagoon_trans_a[] ={{".2", u3wf_la_transpose}, {}}; // static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; // static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; // static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; -// static u3j_harm _139_hex__lagoon_min_a[]={{".2", u3wf_la_min}, {}}; -// static u3j_harm _139_hex__lagoon_max_a[]={{".2", u3wf_la_max}, {}}; +static u3j_harm _139_hex__lagoon_min_a[]={{".2", u3wf_la_min}, {}}; +static u3j_harm _139_hex__lagoon_max_a[]={{".2", u3wf_la_max}, {}}; static u3j_harm _139_hex__lagoon_linspace_a[]={{".2", u3wf_la_linspace}, {}}; // static u3j_harm _139_hex__lagoon_range_a[]={{".2", u3wf_la_range}, {}}; // static u3j_harm _139_hex__lagoon_submatrix_a[]={{".2", u3wf_la_submatrix}, {}}; @@ -2199,8 +2199,8 @@ static u3j_core _139_hex__la_core_d[] = // { "argmin", 7, _139_hex__lagoon_argmin_a, 0, no_hashes }, // { "argmax", 7, _139_hex__lagoon_argmax_a, 0, no_hashes }, // { "ravel", 7, _139_hex__lagoon_ravel_a, 0, no_hashes }, - // { "min", 7, _139_hex__lagoon_min_a, 0, no_hashes }, - // { "max", 7, _139_hex__lagoon_max_a, 0, no_hashes }, + { "min", 7, _139_hex__lagoon_min_a, 0, no_hashes }, + { "max", 7, _139_hex__lagoon_max_a, 0, no_hashes }, { "linspace", 7, _139_hex__lagoon_linspace_a, 0, no_hashes }, // { "range", 7, _139_hex__lagoon_range_a, 0, no_hashes }, // { "submatrix",7, _139_hex__lagoon_submatrix_a, 0, no_hashes }, diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 6ab0518f67..1e250bb79c 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -345,6 +345,8 @@ u3_noun u3wf_la_diag(u3_noun); u3_noun u3wf_la_transpose(u3_noun); + u3_noun u3wf_la_min(u3_noun); + u3_noun u3wf_la_max(u3_noun); u3_noun u3wf_la_linspace(u3_noun); u3_noun u3wf_la_abs(u3_noun); From c617a8297be21c9a0cf2f70041527cf3268a5039 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 17 Apr 2024 09:58:33 -0500 Subject: [PATCH 18/41] Add min/max arg jets. --- WORKSPACE.bazel | 2 +- pkg/noun/jets/f/lagoon.c | 222 +++++++++++++++++++++++++++++++++++++++ pkg/noun/jets/q.h | 3 + pkg/noun/jets/tree.c | 8 +- pkg/noun/jets/w.h | 3 + 5 files changed, 233 insertions(+), 5 deletions(-) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index 11cb3c7f58..c904a63eff 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -297,7 +297,7 @@ versioned_http_archive( strip_prefix = "SoftBLAS-{version}", # sha256 = "", url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", - version = "afeccbabaf43b7d0fde6f3d2809b9c811b91641e", + version = "7d05697aea5363dcf5f877a9c8b464e9c352d3d4", ) versioned_http_archive( diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index aec62641ba..4f724d4514 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -456,6 +456,152 @@ return r_data; } +/* argmin - argmin(x) +*/ + u3_noun + u3qf_la_argmin_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1, which doesn't matter here) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + c3_w min_idx = 0; + + // Switch on the block size. + switch (bloq) { + case 4: ; + float16_t min_val16 = ((float16_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f16_lt(((float16_t*)x_bytes)[i], min_val16)) { + min_val16 = ((float16_t*)x_bytes)[i]; + min_idx = (len_x - i); + } + } + break; + + case 5: ; + float32_t min_val32 = ((float32_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f32_lt(((float32_t*)x_bytes)[i], min_val32)) { + min_val32 = ((float32_t*)x_bytes)[i]; + min_idx = (len_x - i); + } + } + break; + + case 6: ; + float64_t min_val64 = ((float64_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f64_lt(((float64_t*)x_bytes)[i], min_val64)) { + min_val64 = ((float64_t*)x_bytes)[i]; + min_idx = (len_x - i); + } + } + break; + + case 7: ; + float128_t min_val128 = ((float128_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f128_lt(((float128_t*)x_bytes)[i], min_val128)) { + min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); + min_idx = (len_x - i); + } + } + break; + } + + u3_noun r_data = u3i_chub(min_idx); + + return r_data; + } + +/* argmax - argmax(x) +*/ + u3_noun + u3qf_la_argmax_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1, which doesn't matter here) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + c3_w max_idx = 0; + + // Switch on the block size. + switch (bloq) { + case 4: ; + float16_t max_val16 = ((float16_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f16_gt(((float16_t*)x_bytes)[i], max_val16)) { + max_val16 = ((float16_t*)x_bytes)[i]; + max_idx = (len_x - i); + } + } + break; + + case 5: ; + float32_t max_val32 = ((float32_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f32_gt(((float32_t*)x_bytes)[i], max_val32)) { + max_val32 = ((float32_t*)x_bytes)[i]; + max_idx = (len_x - i); + } + } + break; + + case 6: ; + float64_t max_val64 = ((float64_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f64_gt(((float64_t*)x_bytes)[i], max_val64)) { + max_val64 = ((float64_t*)x_bytes)[i]; + max_idx = (len_x - i); + } + } + break; + + case 7: ; + float128_t max_val128 = ((float128_t*)x_bytes)[0]; + for (c3_d i = 0; i < len_x; i++) { + if(f128_gt(((float128_t*)x_bytes)[i], max_val128)) { + max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); + max_idx = (len_x - i); + } + } + break; + } + + u3_noun r_data = u3i_chub(max_idx); + + return r_data; + } + /* min - min(x,y) */ u3_noun @@ -2006,6 +2152,82 @@ } } + u3_noun + u3wf_la_argmin(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_argmin_real(x_data, x_shape, x_bloq); + // bare atom (@ index) + return r_data; + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_argmax(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_argmax_real(x_data, x_shape, x_bloq); + // bare atom (@ index) + return r_data; + + default: + return u3_none; + } + } + } + } + u3_noun u3wf_la_min(u3_noun cor) { diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 17fdb2a646..cf25093573 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -261,6 +261,9 @@ u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_transpose(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_argmin_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_argmax_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_min_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_max_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_linspace_real(u3_noun, u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index bc482ced0a..1b876597fc 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2165,8 +2165,8 @@ static u3j_harm _139_hex__lagoon_dot_a[] = {{".2", u3wf_la_dot}, {}}; static u3j_harm _139_hex__lagoon_trans_a[] ={{".2", u3wf_la_transpose}, {}}; // static u3j_harm _139_hex__lagoon_stack_a[] ={{".2", u3wf_la_stack}, {}}; // static u3j_harm _139_hex__lagoon_cumsum_a[]={{".2", u3wf_la_cumsum}, {}}; -// static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; -// static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; +static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; +static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; // static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; static u3j_harm _139_hex__lagoon_min_a[]={{".2", u3wf_la_min}, {}}; static u3j_harm _139_hex__lagoon_max_a[]={{".2", u3wf_la_max}, {}}; @@ -2196,8 +2196,8 @@ static u3j_core _139_hex__la_core_d[] = { "transpose",7, _139_hex__lagoon_trans_a, 0, no_hashes }, // { "stack", 7, _139_hex__lagoon_stack_a, 0, no_hashes }, // { "cumsum", 7, _139_hex__lagoon_cumsum_a, 0, no_hashes }, - // { "argmin", 7, _139_hex__lagoon_argmin_a, 0, no_hashes }, - // { "argmax", 7, _139_hex__lagoon_argmax_a, 0, no_hashes }, + { "argmin", 7, _139_hex__lagoon_argmin_a, 0, no_hashes }, + { "argmax", 7, _139_hex__lagoon_argmax_a, 0, no_hashes }, // { "ravel", 7, _139_hex__lagoon_ravel_a, 0, no_hashes }, { "min", 7, _139_hex__lagoon_min_a, 0, no_hashes }, { "max", 7, _139_hex__lagoon_max_a, 0, no_hashes }, diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 1e250bb79c..d8b887c896 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -345,6 +345,9 @@ u3_noun u3wf_la_diag(u3_noun); u3_noun u3wf_la_transpose(u3_noun); + u3_noun u3wf_la_argmin(u3_noun); + u3_noun u3wf_la_argmax(u3_noun); + u3_noun u3wf_la_min(u3_noun); u3_noun u3wf_la_max(u3_noun); u3_noun u3wf_la_linspace(u3_noun); From 44f1013fc460c3fbe426297a1a98f92e5c892ef8 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 17 Apr 2024 10:17:38 -0500 Subject: [PATCH 19/41] Add cumsum jet. --- pkg/noun/jets/f/lagoon.c | 126 ++++++++++++++++++++++++++++++++++++--- pkg/noun/jets/q.h | 1 + pkg/noun/jets/tree.c | 2 +- pkg/noun/jets/w.h | 1 + 4 files changed, 121 insertions(+), 9 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 4f724d4514..4235daec55 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -456,6 +456,76 @@ return r_data; } +/* cumsum - x[0] + x[1] + ... x[n] +*/ + u3_noun + u3qf_la_cumsum_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // y_bytes is the data array (w/ leading 0x1, skipped by for range) + c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); + u3r_bytes(0, syz_x+1, x_bytes, x_data); + + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: ; + float16_t sum16[2]; + for (c3_d i = 0; i < len_x; i++) { + sum16[0] = f16_add(sum16[0], ((float16_t*)x_bytes)[i]); + } + sum16[1].v = 0x1; + r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)sum16); + break; + + case 5: ; + float32_t sum32[2]; + for (c3_d i = 0; i < len_x; i++) { + sum32[0] = f32_add(sum32[0], ((float32_t*)x_bytes)[i]); + } + sum32[1].v = 0x1; + r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)sum32); + break; + + case 6: ; + float64_t sum64[2]; + for (c3_d i = 0; i < len_x; i++) { + sum64[0] = f64_add(sum64[0], ((float64_t*)x_bytes)[i]); + } + sum64[1].v = 0x1; + r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)sum64); + break; + + case 7: ; + float128_t sum128[2]; + for (c3_d i = 0; i < len_x; i++) { + f128M_add(&(sum128[0]), &(((float128_t*)x_bytes)[i]), &(sum128[0])); + } + sum128[1] = (float128_t){0x1, 0x0}; + r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)sum128); + break; + } + + // Clean up and return. + u3a_free(x_bytes); + + return r_data; + } + /* argmin - argmin(x) */ u3_noun @@ -488,7 +558,7 @@ for (c3_d i = 0; i < len_x; i++) { if(f16_lt(((float16_t*)x_bytes)[i], min_val16)) { min_val16 = ((float16_t*)x_bytes)[i]; - min_idx = (len_x - i); + min_idx = (len_x - i - 1); } } break; @@ -498,7 +568,7 @@ for (c3_d i = 0; i < len_x; i++) { if(f32_lt(((float32_t*)x_bytes)[i], min_val32)) { min_val32 = ((float32_t*)x_bytes)[i]; - min_idx = (len_x - i); + min_idx = (len_x - i - 1); } } break; @@ -508,7 +578,7 @@ for (c3_d i = 0; i < len_x; i++) { if(f64_lt(((float64_t*)x_bytes)[i], min_val64)) { min_val64 = ((float64_t*)x_bytes)[i]; - min_idx = (len_x - i); + min_idx = (len_x - i - 1); } } break; @@ -518,7 +588,7 @@ for (c3_d i = 0; i < len_x; i++) { if(f128_lt(((float128_t*)x_bytes)[i], min_val128)) { min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); - min_idx = (len_x - i); + min_idx = (len_x - i - 1); } } break; @@ -561,7 +631,7 @@ for (c3_d i = 0; i < len_x; i++) { if(f16_gt(((float16_t*)x_bytes)[i], max_val16)) { max_val16 = ((float16_t*)x_bytes)[i]; - max_idx = (len_x - i); + max_idx = (len_x - i - 1); } } break; @@ -571,7 +641,7 @@ for (c3_d i = 0; i < len_x; i++) { if(f32_gt(((float32_t*)x_bytes)[i], max_val32)) { max_val32 = ((float32_t*)x_bytes)[i]; - max_idx = (len_x - i); + max_idx = (len_x - i - 1); } } break; @@ -581,7 +651,7 @@ for (c3_d i = 0; i < len_x; i++) { if(f64_gt(((float64_t*)x_bytes)[i], max_val64)) { max_val64 = ((float64_t*)x_bytes)[i]; - max_idx = (len_x - i); + max_idx = (len_x - i - 1); } } break; @@ -591,7 +661,7 @@ for (c3_d i = 0; i < len_x; i++) { if(f128_gt(((float128_t*)x_bytes)[i], max_val128)) { max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); - max_idx = (len_x - i); + max_idx = (len_x - i - 1); } } break; @@ -2152,6 +2222,46 @@ } } + u3_noun + u3wf_la_cumsum(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp, + rnd; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_cumsum_real(x_data, x_shape, x_bloq); + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + u3_noun u3wf_la_argmin(u3_noun cor) { diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index cf25093573..34a6b7dca7 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -261,6 +261,7 @@ u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_transpose(u3_noun, u3_noun, u3_noun); + u3_noun u3qf_la_cumsum_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_argmin_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_argmax_real(u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 1b876597fc..1738f75339 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2164,7 +2164,7 @@ static u3j_harm _139_hex__lagoon_mods_a[] = {{".2", u3wf_la_mods}, {}}; static u3j_harm _139_hex__lagoon_dot_a[] = {{".2", u3wf_la_dot}, {}}; static u3j_harm _139_hex__lagoon_trans_a[] ={{".2", u3wf_la_transpose}, {}}; // static u3j_harm _139_hex__lagoon_stack_a[] ={{".2", u3wf_la_stack}, {}}; -// static u3j_harm _139_hex__lagoon_cumsum_a[]={{".2", u3wf_la_cumsum}, {}}; +static u3j_harm _139_hex__lagoon_cumsum_a[]={{".2", u3wf_la_cumsum}, {}}; static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; // static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index d8b887c896..2b4626f324 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -345,6 +345,7 @@ u3_noun u3wf_la_diag(u3_noun); u3_noun u3wf_la_transpose(u3_noun); + u3_noun u3wf_la_cumsum(u3_noun); u3_noun u3wf_la_argmin(u3_noun); u3_noun u3wf_la_argmax(u3_noun); From c1f651d85ff5e3e24480cf210e87d1c2e6d34235 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 17 Apr 2024 10:41:24 -0500 Subject: [PATCH 20/41] Add ravel jet. --- pkg/noun/jets/f/lagoon.c | 102 +++++++++++++++++++++++++++++++++++++++ pkg/noun/jets/q.h | 2 +- pkg/noun/jets/tree.c | 2 +- pkg/noun/jets/w.h | 2 +- 4 files changed, 105 insertions(+), 3 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 4235daec55..09962d0a8f 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -672,6 +672,71 @@ return r_data; } +/* ravel - x -> ~[x[0], x[1], ... x[n]] + entire nd-array busted out as a linear list +*/ + u3_noun + u3qf_la_ravel_real(u3_noun x_data, + u3_noun shape, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(shape); + + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); + + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); + + // r_data is the result noun of [data] + u3_noun r_data; + + // Switch on the block size. + switch (bloq) { + case 4: + for (c3_d i = 0; i < len_x; i++) { + float16_t x_val16 = ((float16_t*)x_bytes)[i]; + r_data = u3nc(u3i_word(x_val16.v), r_data); + } + break; + + case 5: + for (c3_d i = 0; i < len_x; i++) { + float32_t x_val32 = ((float32_t*)x_bytes)[i]; + r_data = u3nc(u3i_word(x_val32.v), r_data); + } + break; + + case 6: + for (c3_d i = 0; i < len_x; i++) { + float64_t x_val64 = ((float64_t*)x_bytes)[i]; + r_data = u3nc(u3i_chub(x_val64.v), r_data); + } + break; + + case 7: + for (c3_d i = 0; i < len_x; i++) { + float128_t x_val128 = ((float128_t*)x_bytes)[i]; + r_data = u3nc(u3i_chubs(2, (c3_d*)&(x_val128.v)), r_data); + } + break; + } + + // Clean up and return. + u3a_free(x_bytes); + + // return u3qb_flop(r_data); + return r_data; + } + /* min - min(x,y) */ u3_noun @@ -2300,6 +2365,43 @@ } } + u3_noun + u3wf_la_ravel(u3_noun cor) + { + // Each argument is a ray, [=meta data=@ux] + u3_noun x_meta, x_data; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_3, &x_data, + 0) || + c3n == u3ud(x_data) ) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: ; + u3_noun r_data = u3qf_la_ravel_real(x_data, x_shape, x_bloq); + // (list @) + return r_data; + + default: + return u3_none; + } + } + } + } + u3_noun u3wf_la_argmax(u3_noun cor) { diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 34a6b7dca7..0f38bb3a96 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -264,7 +264,7 @@ u3_noun u3qf_la_cumsum_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_argmin_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_argmax_real(u3_noun, u3_noun, u3_noun); - + u3_noun u3qf_la_ravel_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_min_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_max_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_linspace_real(u3_noun, u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 1738f75339..9dea54bcfc 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2167,7 +2167,7 @@ static u3j_harm _139_hex__lagoon_trans_a[] ={{".2", u3wf_la_transpose}, {}}; static u3j_harm _139_hex__lagoon_cumsum_a[]={{".2", u3wf_la_cumsum}, {}}; static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; -// static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; +static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; static u3j_harm _139_hex__lagoon_min_a[]={{".2", u3wf_la_min}, {}}; static u3j_harm _139_hex__lagoon_max_a[]={{".2", u3wf_la_max}, {}}; static u3j_harm _139_hex__lagoon_linspace_a[]={{".2", u3wf_la_linspace}, {}}; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 2b4626f324..666b611bc5 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -348,7 +348,7 @@ u3_noun u3wf_la_cumsum(u3_noun); u3_noun u3wf_la_argmin(u3_noun); u3_noun u3wf_la_argmax(u3_noun); - + u3_noun u3wf_la_ravel(u3_noun); u3_noun u3wf_la_min(u3_noun); u3_noun u3wf_la_max(u3_noun); u3_noun u3wf_la_linspace(u3_noun); From 692b58d19b2ae8ba4e74489d2a8df8e562c26717 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 17 Apr 2024 13:41:40 -0500 Subject: [PATCH 21/41] Post "final" jet roster. --- pkg/noun/jets/f/lagoon.c | 210 +++++++++++++++++++++++++++++++++++---- pkg/noun/jets/q.h | 4 +- pkg/noun/jets/tree.c | 12 +-- pkg/noun/jets/w.h | 3 +- 4 files changed, 197 insertions(+), 32 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 09962d0a8f..8dc6144d2e 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -7,7 +7,6 @@ #include "softfloat.h" #include "softblas.h" -#include #include // for pow() #include @@ -733,7 +732,6 @@ // Clean up and return. u3a_free(x_bytes); - // return u3qb_flop(r_data); return r_data; } @@ -1739,10 +1737,14 @@ return u3_none; } // Assert length of dims is 2. - assert(u3qb_lent(shape) == 2); + if (u3qb_lent(shape) != 2) { + return u3m_bail(c3__exit); + } // Unpack shape into an array of dimensions. c3_d *dims = _get_dims(shape); - assert(dims[0] == dims[1]); + if (dims[0] != dims[1]) { + return u3m_bail(c3__exit); + } // Unpack the data as a byte array. We assume total length < 2**64. c3_d len_x = _get_length(shape); @@ -1782,7 +1784,9 @@ u3_noun bloq) { // Assert length of dims is 2. - assert(u3qb_lent(shape) == 2); + if (u3qb_lent(shape) != 2) { + return u3m_bail(c3__exit); + } // Unpack shape into an array of dimensions. c3_d *dims = _get_dims(shape); @@ -1838,12 +1842,13 @@ u3r_bytes(0, 2, (c3_y*)&(a16.v), a); u3r_bytes(0, 2, (c3_y*)&(b16.v), b); float16_t span16 = f16_sub(b16, a16); - float16_t interval16 = f16_div(span16, i32_to_f16(n)); + float16_t interval16 = f16_div(span16, i32_to_f16(n-1)); c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n+1)*2+1)*sizeof(c3_y)); for (c3_d i = 1; i <= n; i++) { ((float16_t*)x_bytes16)[n-i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); } - ((float16_t*)x_bytes16)[n] = b16; + ((float16_t*)x_bytes16)[n] = a16; + ((float16_t*)x_bytes16)[0] = b16; x_bytes16[(n+1)*2] = 1; // pin head r_data = u3i_bytes(((n+1)*2+1)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); @@ -1854,12 +1859,13 @@ u3r_bytes(0, 4, (c3_y*)&(a32.v), a); u3r_bytes(0, 4, (c3_y*)&(b32.v), b); float32_t span32 = f32_sub(b32, a32); - float32_t interval32 = f32_div(span32, i32_to_f32(n)); + float32_t interval32 = f32_div(span32, i32_to_f32(n-1)); c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n+1)*4+1)*sizeof(c3_y)); for (c3_d i = 1; i <= n; i++) { ((float32_t*)x_bytes32)[n-i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); } - ((float32_t*)x_bytes32)[n] = b32; + ((float32_t*)x_bytes32)[n] = a32; + ((float32_t*)x_bytes32)[0] = b32; x_bytes32[(n+1)*4] = 1; // pin head r_data = u3i_bytes(((n+1)*4+1)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); @@ -1870,12 +1876,13 @@ u3r_bytes(0, 8, (c3_y*)&(a64.v), a); u3r_bytes(0, 8, (c3_y*)&(b64.v), b); float64_t span64 = f64_sub(b64, a64); - float64_t interval64 = f64_div(span64, i32_to_f64(n)); + float64_t interval64 = f64_div(span64, i32_to_f64(n-1)); c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n+1)*8+1)*sizeof(c3_y)); for (c3_d i = 1; i < n; i++) { ((float64_t*)x_bytes64)[n-i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); } - ((float64_t*)x_bytes64)[n] = b64; + ((float64_t*)x_bytes64)[n] = a64; + ((float64_t*)x_bytes64)[0] = b64; x_bytes64[(n+1)*8] = 1; // pin head r_data = u3i_bytes(((n+1)*8+1)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); @@ -1889,7 +1896,7 @@ f128M_sub(&b128, &a128, &span128); float128_t interval128; float128_t n128; - i32_to_f128M(n, &n128); + i32_to_f128M(n-1, &n128); f128M_div(&span128, &n128, &interval128); c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n+1)*16+1)*sizeof(c3_y)); float128_t i128; @@ -1898,7 +1905,8 @@ f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[n-i]); f128M_add(&a128, &((float128_t*)x_bytes128)[n-i], &((float128_t*)x_bytes128)[n-i]); } - ((float128_t*)x_bytes128)[n] = b128; + ((float128_t*)x_bytes128)[n] = a128; + ((float128_t*)x_bytes128)[0] = b128; x_bytes128[(n+1)*16] = 1; // pin head r_data = u3i_bytes(((n+1)*16+1)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); @@ -1908,6 +1916,97 @@ return r_data; } +/* range - [a a+d ... b] +*/ + u3_noun + u3qf_la_range_real(u3_noun a, + u3_noun b, + u3_noun d, + u3_noun bloq) + { + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + + u3_noun r_data; + + switch (bloq) { + case 4: ; + float16_t a16, b16, interval16; + u3r_bytes(0, 2, (c3_y*)&(a16.v), a); + u3r_bytes(0, 2, (c3_y*)&(b16.v), b); + u3r_bytes(0, 2, (c3_y*)&(interval16.v), d); + c3_d n16 = f16_to_i64(f16_div(f16_sub(b16, a16), interval16), softfloat_round_minMag, false); + c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n16+1)*2+1)*sizeof(c3_y)); + for (c3_d i = 1; i <= n16; i++) { + ((float16_t*)x_bytes16)[n16-i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); + } + ((float16_t*)x_bytes16)[n16] = a16; + // ((float16_t*)x_bytes16)[0] = b16; + x_bytes16[(n16+1)*2] = 1; // pin head + r_data = u3i_bytes(((n16+1)*2+1)*sizeof(c3_y), x_bytes16); + u3a_free(x_bytes16); + break; + + case 5: ; + float32_t a32, b32, interval32; + u3r_bytes(0, 4, (c3_y*)&(a32.v), a); + u3r_bytes(0, 4, (c3_y*)&(b32.v), b); + u3r_bytes(0, 4, (c3_y*)&(interval32.v), d); + c3_d n32 = f32_to_i64(f32_div(f32_sub(b32, a32), interval32), softfloat_round_minMag, false); + c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n32+1)*4+1)*sizeof(c3_y)); + for (c3_d i = 1; i <= n32; i++) { + ((float32_t*)x_bytes32)[n32-i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); + } + ((float32_t*)x_bytes32)[n32] = a32; + // ((float32_t*)x_bytes32)[0] = b32; + x_bytes32[(n32+1)*4] = 1; // pin head + r_data = u3i_bytes(((n32+1)*4+1)*sizeof(c3_y), x_bytes32); + u3a_free(x_bytes32); + break; + + case 6: ; + float64_t a64, b64, interval64; + u3r_bytes(0, 8, (c3_y*)&(a64.v), a); + u3r_bytes(0, 8, (c3_y*)&(b64.v), b); + u3r_bytes(0, 8, (c3_y*)&(interval64.v), d); + c3_d n64 = f64_to_i64(f64_div(f64_sub(b64, a64), interval64), softfloat_round_minMag, false); + c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n64+1)*8+1)*sizeof(c3_y)); + for (c3_d i = 1; i < n64; i++) { + ((float64_t*)x_bytes64)[n64-i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); + } + ((float64_t*)x_bytes64)[n64] = a64; + // ((float64_t*)x_bytes64)[0] = b64; + x_bytes64[(n64+1)*8] = 1; // pin head + r_data = u3i_bytes(((n64+1)*8+1)*sizeof(c3_y), x_bytes64); + u3a_free(x_bytes64); + break; + + case 7: ; + float128_t a128, b128, interval128; + u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); + u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); + u3r_bytes(0, 16, (c3_y*)&(interval128.v), d); + c3_d n128 = f128_to_i64(f128_div(f128_sub(b128, a128), interval128), softfloat_round_minMag, false); + c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n128+1)*16+1)*sizeof(c3_y)); + float128_t i128; + for (c3_d i = 1; i < n128; i++) { + i32_to_f128M(i, &i128); + f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[n128-i]); + f128M_add(&a128, &((float128_t*)x_bytes128)[n128-i], &((float128_t*)x_bytes128)[n128-i]); + } + ((float128_t*)x_bytes128)[n128] = a128; + // ((float128_t*)x_bytes128)[0] = b128; + x_bytes128[(n128+1)*16] = 1; // pin head + r_data = u3i_bytes(((n128+1)*16+1)*sizeof(c3_y), x_bytes128); + u3a_free(x_bytes128); + break; + } + + return r_data; + } + /* trace - tr(x) */ u3_noun @@ -1936,10 +2035,12 @@ c3_d Nb = u3h(y_shape); c3_d P = u3h(u3t(y_shape)); - assert(u3_nul == u3t(u3t(x_shape))); - assert(Na == Nb); + if ((u3_nul != u3t(u3t(x_shape))) || + (u3_nul != u3t(u3t(y_shape))) || + (Na != Nb)) { + return u3m_bail(c3__exit); + } c3_d N = Na; - assert(u3_nul == u3t(u3t(y_shape))); c3_y* x_bytes = (c3_y*)u3a_malloc((M*N)*sizeof(c3_y)); u3r_bytes(0, M*N, x_bytes, x_data); @@ -3029,7 +3130,7 @@ u3_noun u3wf_la_linspace(u3_noun cor) { - u3_noun x_meta, a, b, n; + u3_noun x_meta, a, b, n, rnd; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, @@ -3045,6 +3146,7 @@ x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_fxp = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) @@ -3052,9 +3154,79 @@ return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; + case c3__real: + _set_rounding(rnd); u3_noun r_data = u3qf_la_linspace_real(a, b, n, x_bloq); - x_shape = u3nc(u3x_atom(n)+1, u3_nul); + x_shape = u3nt(u3x_atom(n), 0x1, u3_nul); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + + default: + return u3_none; + } + } + } + } + + u3_noun + u3wf_la_range(u3_noun cor) + { + u3_noun x_meta, a, b, d, rnd; + + if ( c3n == u3r_mean(cor, + u3x_sam_2, &x_meta, + u3x_sam_12, &a, + u3x_sam_13, &b, + u3x_sam_7, &d, + 0)) + { + return u3m_bail(c3__exit); + } else { + u3_noun x_shape, x_bloq, x_kind, x_fxp; + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3ud(x_bloq) || + c3n == u3ud(x_kind) + ) + { + return u3m_bail(c3__exit); + } else { + switch (x_kind) { + case c3__real: + _set_rounding(rnd); + u3_noun r_data = u3qf_la_range_real(a, b, d, x_bloq); + c3_d a_, b_, d_; + c3_ds n_; + switch (x_bloq) { + case 4: + u3r_bytes(0, 2, (c3_y*)&a_, a); + u3r_bytes(0, 2, (c3_y*)&b_, b); + u3r_bytes(0, 2, (c3_y*)&d_, d); + n_ = f16_to_i64(f16_div(f16_sub((float16_t){b_}, (float16_t){a_}), (float16_t){d_}), softfloat_round_minMag, false); + break; + case 5: + u3r_bytes(0, 4, (c3_y*)&a_, a); + u3r_bytes(0, 4, (c3_y*)&b_, b); + u3r_bytes(0, 4, (c3_y*)&d_, d); + n_ = f32_to_i64(f32_div(f32_sub((float32_t){b_}, (float32_t){a_}), (float32_t){d_}), softfloat_round_minMag, false); + break; + case 6: + u3r_bytes(0, 8, (c3_y*)&a_, a); + u3r_bytes(0, 8, (c3_y*)&b_, b); + u3r_bytes(0, 8, (c3_y*)&d_, d); + n_ = f64_to_i64(f64_div(f64_sub((float64_t){b_}, (float64_t){a_}), (float64_t){d_}), softfloat_round_minMag, false); + break; + case 7: + u3r_bytes(0, 16, (c3_y*)&a_, a); + u3r_bytes(0, 16, (c3_y*)&b_, b); + u3r_bytes(0, 16, (c3_y*)&d_, d); + n_ = f128_to_i64(f128_div(f128_sub((float128_t){b_}, (float128_t){a_}), (float128_t){d_}), softfloat_round_minMag, false); + break; + } + u3_noun n = u3i_chub(n_+1); + x_shape = u3nt(u3k(n), 0x1, u3_nul); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index 0f38bb3a96..b5e5cbc8e1 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -260,7 +260,6 @@ u3_noun u3qf_la_dot_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_transpose(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_cumsum_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_argmin_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_argmax_real(u3_noun, u3_noun, u3_noun); @@ -268,13 +267,12 @@ u3_noun u3qf_la_min_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_max_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_linspace_real(u3_noun, u3_noun, u3_noun, u3_noun); - + u3_noun u3qf_la_range_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_abs_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_gth_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_gte_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_lth_real(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_lte_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_trace_real(u3_noun, u3_noun, u3_noun); u3_noun u3qf_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 9dea54bcfc..dc27123a4d 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2163,7 +2163,6 @@ static u3j_harm _139_hex__lagoon_divs_a[] = {{".2", u3wf_la_divs}, {}}; static u3j_harm _139_hex__lagoon_mods_a[] = {{".2", u3wf_la_mods}, {}}; static u3j_harm _139_hex__lagoon_dot_a[] = {{".2", u3wf_la_dot}, {}}; static u3j_harm _139_hex__lagoon_trans_a[] ={{".2", u3wf_la_transpose}, {}}; -// static u3j_harm _139_hex__lagoon_stack_a[] ={{".2", u3wf_la_stack}, {}}; static u3j_harm _139_hex__lagoon_cumsum_a[]={{".2", u3wf_la_cumsum}, {}}; static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; @@ -2171,8 +2170,7 @@ static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; static u3j_harm _139_hex__lagoon_min_a[]={{".2", u3wf_la_min}, {}}; static u3j_harm _139_hex__lagoon_max_a[]={{".2", u3wf_la_max}, {}}; static u3j_harm _139_hex__lagoon_linspace_a[]={{".2", u3wf_la_linspace}, {}}; -// static u3j_harm _139_hex__lagoon_range_a[]={{".2", u3wf_la_range}, {}}; -// static u3j_harm _139_hex__lagoon_submatrix_a[]={{".2", u3wf_la_submatrix}, {}}; +static u3j_harm _139_hex__lagoon_range_a[]={{".2", u3wf_la_range}, {}}; static u3j_harm _139_hex__lagoon_abs_a[]={{".2", u3wf_la_abs}, {}}; static u3j_harm _139_hex__lagoon_gth_a[]={{".2", u3wf_la_gth}, {}}; static u3j_harm _139_hex__lagoon_gte_a[]={{".2", u3wf_la_gte}, {}}; @@ -2194,16 +2192,14 @@ static u3j_core _139_hex__la_core_d[] = { "mod-scal", 7, _139_hex__lagoon_mods_a, 0, no_hashes }, { "dot", 7, _139_hex__lagoon_dot_a, 0, no_hashes }, { "transpose",7, _139_hex__lagoon_trans_a, 0, no_hashes }, - // { "stack", 7, _139_hex__lagoon_stack_a, 0, no_hashes }, - // { "cumsum", 7, _139_hex__lagoon_cumsum_a, 0, no_hashes }, + { "cumsum", 7, _139_hex__lagoon_cumsum_a, 0, no_hashes }, { "argmin", 7, _139_hex__lagoon_argmin_a, 0, no_hashes }, { "argmax", 7, _139_hex__lagoon_argmax_a, 0, no_hashes }, - // { "ravel", 7, _139_hex__lagoon_ravel_a, 0, no_hashes }, + { "ravel", 7, _139_hex__lagoon_ravel_a, 0, no_hashes }, { "min", 7, _139_hex__lagoon_min_a, 0, no_hashes }, { "max", 7, _139_hex__lagoon_max_a, 0, no_hashes }, { "linspace", 7, _139_hex__lagoon_linspace_a, 0, no_hashes }, - // { "range", 7, _139_hex__lagoon_range_a, 0, no_hashes }, - // { "submatrix",7, _139_hex__lagoon_submatrix_a, 0, no_hashes }, + { "range", 7, _139_hex__lagoon_range_a, 0, no_hashes }, { "abs", 7, _139_hex__lagoon_abs_a, 0, no_hashes }, { "gth", 7, _139_hex__lagoon_gth_a, 0, no_hashes }, { "gte", 7, _139_hex__lagoon_gte_a, 0, no_hashes }, diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index 666b611bc5..e7976ba3b7 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -344,7 +344,6 @@ u3_noun u3wf_la_dot(u3_noun); u3_noun u3wf_la_diag(u3_noun); u3_noun u3wf_la_transpose(u3_noun); - u3_noun u3wf_la_cumsum(u3_noun); u3_noun u3wf_la_argmin(u3_noun); u3_noun u3wf_la_argmax(u3_noun); @@ -352,7 +351,7 @@ u3_noun u3wf_la_min(u3_noun); u3_noun u3wf_la_max(u3_noun); u3_noun u3wf_la_linspace(u3_noun); - + u3_noun u3wf_la_range(u3_noun); u3_noun u3wf_la_abs(u3_noun); u3_noun u3wf_la_gth(u3_noun); u3_noun u3wf_la_gte(u3_noun); From 338154c13e913c8248ba3d810af08db7429c89f3 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 17 Apr 2024 17:02:46 -0500 Subject: [PATCH 22/41] Post all jets. --- pkg/noun/jets/f/lagoon.c | 159 +++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 80 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 8dc6144d2e..9e5215e387 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -128,7 +128,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); break; @@ -186,7 +186,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); break; @@ -245,7 +245,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = f16_mul(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); @@ -311,7 +311,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = f16_div(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); @@ -377,7 +377,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; @@ -466,6 +466,7 @@ if (bloq < 4 || bloq > 7) { return u3_none; } + fprintf(stderr, "function rounding mode: %lx\r\n", softfloat_roundingMode); // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units @@ -481,7 +482,7 @@ u3_noun r_data; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: ; float16_t sum16[2]; for (c3_d i = 0; i < len_x; i++) { @@ -512,7 +513,10 @@ case 7: ; float128_t sum128[2]; for (c3_d i = 0; i < len_x; i++) { + fprintf(stderr, " sum128[%d] = %lx %lx\r\n", i, sum128[i].v[0], sum128[i].v[1]); + fprintf(stderr, "x_bytes[%d] = %lx %lx\r\n", i, ((float128_t*)x_bytes)[i].v[0], ((float128_t*)x_bytes)[i].v[1]); f128M_add(&(sum128[0]), &(((float128_t*)x_bytes)[i]), &(sum128[0])); + fprintf(stderr, " equals[%d] = %lx %lx\r\n", i, sum128[i].v[0], sum128[i].v[1]); } sum128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)sum128); @@ -551,7 +555,7 @@ c3_w min_idx = 0; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: ; float16_t min_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { @@ -624,7 +628,7 @@ c3_w max_idx = 0; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: ; float16_t max_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { @@ -696,10 +700,10 @@ u3r_bytes(0, syz_x, x_bytes, x_data); // r_data is the result noun of [data] - u3_noun r_data; + u3_noun r_data = u3_nul; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; @@ -761,7 +765,7 @@ u3_noun r_data; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: ; float16_t min_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { @@ -839,7 +843,7 @@ u3_noun r_data; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: ; float16_t max_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { @@ -915,7 +919,7 @@ u3r_bytes(0, syz_x+1, x_bytes, x_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { ((float16_t*)x_bytes)[i] = f16_abs(((float16_t*)x_bytes)[i]); @@ -979,7 +983,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; @@ -1052,7 +1056,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; @@ -1125,7 +1129,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; @@ -1198,7 +1202,7 @@ u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; @@ -1275,7 +1279,7 @@ float128_t n128; // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: u3r_bytes(0, 2, (c3_y*)&(n16.v), n); // set y to [n] @@ -1357,7 +1361,7 @@ float128_t n128; // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: u3r_bytes(0, 2, (c3_y*)&(n16.v), n); // set y to [n] @@ -1438,7 +1442,7 @@ float128_t n128; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: u3r_bytes(0, 2, (c3_y*)&(n16.v), n); hscal(len_x, n16, (float16_t*)x_bytes, 1); @@ -1501,7 +1505,7 @@ float128_t in128; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: // XX note that in16 is doing double duty here u3r_bytes(0, 2, (c3_y*)&(in16.v), n); @@ -1572,7 +1576,7 @@ float128_t n128, in128; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: u3r_bytes(0, 2, (c3_y*)&(n16.v), n); in16 = f16_div((float16_t){SB_REAL16_ONE}, n16); @@ -1688,7 +1692,7 @@ u3_noun r_data; // Switch on the block size. - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: ; float16_t r16[2]; r16[0] = hdot(len_x, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); @@ -1836,7 +1840,7 @@ u3_noun r_data; - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: ; float16_t a16, b16; u3r_bytes(0, 2, (c3_y*)&(a16.v), a); @@ -1931,7 +1935,7 @@ u3_noun r_data; - switch (bloq) { + switch (u3x_atom(bloq)) { case 4: ; float16_t a16, b16, interval16; u3r_bytes(0, 2, (c3_y*)&(a16.v), a); @@ -2042,75 +2046,66 @@ } c3_d N = Na; - c3_y* x_bytes = (c3_y*)u3a_malloc((M*N)*sizeof(c3_y)); - u3r_bytes(0, M*N, x_bytes, x_data); - c3_y* y_bytes = (c3_y*)u3a_malloc((N*P)*sizeof(c3_y)); - u3r_bytes(0, N*P, y_bytes, y_data); - c3_y* c_bytes = (c3_y*)u3a_malloc((M*P)*sizeof(c3_y)); - - u3_noun r_data; - - // Switch on the block size. - switch (bloq) { - case 4: - hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, N, (float16_t){SB_REAL16_ZERO}, (float16_t*)c_bytes, P); + // Unpack the data as a byte array. We assume total length < 2**64. + // len_x is length in base units + c3_d len_x = _get_length(x_shape); // M*N - // Unpack the result back into a noun. - r_data = u3i_bytes(M*P, c_bytes); + // syz_x is length in bytes + c3_d syz_x = len_x * pow(2, bloq-3); // M*N - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - u3a_free(c_bytes); + // x_bytes is the data array (w/o leading 0x1) + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*pow(2,bloq-3)*sizeof(c3_y)); + u3r_bytes(0, syz_x, x_bytes, x_data); - return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + // len_x is length in base units + c3_d len_y = _get_length(y_shape); // N*P - case 5: - sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, N, (float32_t){SB_REAL32_ZERO}, (float32_t*)c_bytes, P); + // syz_x is length in bytes + c3_d syz_y = len_x * pow(2, bloq-3); // N*P - // Unpack the result back into a noun. - r_data = u3i_bytes(M*P, c_bytes); + // y_bytes is the data array (w/o leading 0x1) + c3_y* y_bytes = (c3_y*)u3a_malloc(syz_y*pow(2,bloq-3)*sizeof(c3_y)); + u3r_bytes(0, syz_y, y_bytes, y_data); - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - u3a_free(c_bytes); + // syz_r is length in bytes + c3_d syz_r = (M*P) * pow(2, bloq-3); // M*P + + // len_r is length in base units + c3_d len_r = M*P; // M*P - return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + // r_bytes is the result array + c3_y* r_bytes = (c3_y*)u3a_malloc((syz_r*pow(2,bloq-3)+1)*sizeof(c3_y)); + r_bytes[syz_r] = 1; // pin head - case 6: - dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, N, (float64_t){SB_REAL64_ZERO}, (float64_t*)c_bytes, P); + u3_noun r_data; - // Unpack the result back into a noun. - r_data = u3i_bytes(M*P, c_bytes); + // Switch on the block size. + switch (u3x_atom(bloq)) { + case 4: + hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, N, (float16_t){SB_REAL16_ZERO}, (float16_t*)r_bytes, P); + break; - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - u3a_free(c_bytes); + case 5: + sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, N, (float32_t){SB_REAL32_ZERO}, (float32_t*)r_bytes, P); + break; - return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + case 6: + dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, N, (float64_t){SB_REAL64_ZERO}, (float64_t*)r_bytes, P); + break; case 7: - qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, N, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)c_bytes, P); - - // Unpack the result back into a noun. - r_data = u3i_bytes(M*P, c_bytes); + qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, N, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)r_bytes, P); + break; + } - // Clean up. - u3a_free(x_bytes); - u3a_free(y_bytes); - u3a_free(c_bytes); + // Unpack the result back into a noun. + r_data = u3i_bytes(syz_r*pow(2,bloq-3)+1, r_bytes); - return u3nc(u3nq(u3nl(M, P, u3_none), bloq, c3__real, u3_nul), r_data); + u3a_free(x_bytes); + u3a_free(y_bytes); + u3a_free(r_bytes); - default: - u3a_free(x_bytes); - u3a_free(y_bytes); - u3a_free(c_bytes); - - return u3_none; - } + return u3nc(u3nq(u3nt(u3k(M), u3k(P), u3_nul), u3k(bloq), c3__real, u3_nul), u3k(r_data)); } u3_noun @@ -2419,6 +2414,8 @@ case c3__real: _set_rounding(rnd); u3_noun r_data = u3qf_la_cumsum_real(x_data, x_shape, x_bloq); + fprintf(stderr, "desired rounding mode: %lx\r\n", rnd); + fprintf(stderr, "apparent rounding mode: %lx\r\n", softfloat_roundingMode); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3349,8 +3346,10 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - return u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq); - break; + u3_noun r_data; + r_data = u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq); + // result is already [meta data] + return u3k(r_data); default: return u3_none; From 6f2fc23c6d97f4b206cc239e7bb156f906f30ce5 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 17 Apr 2024 19:40:06 -0500 Subject: [PATCH 23/41] Fix cumsum. --- pkg/noun/jets/f/lagoon.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 9e5215e387..0d16f70eb5 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -485,6 +485,7 @@ switch (u3x_atom(bloq)) { case 4: ; float16_t sum16[2]; + sum16[0] = (float16_t){SB_REAL16_ZERO}; for (c3_d i = 0; i < len_x; i++) { sum16[0] = f16_add(sum16[0], ((float16_t*)x_bytes)[i]); } @@ -494,6 +495,7 @@ case 5: ; float32_t sum32[2]; + sum32[0] = (float32_t){SB_REAL32_ZERO}; for (c3_d i = 0; i < len_x; i++) { sum32[0] = f32_add(sum32[0], ((float32_t*)x_bytes)[i]); } @@ -503,6 +505,7 @@ case 6: ; float64_t sum64[2]; + sum64[0] = (float64_t){SB_REAL64_ZERO}; for (c3_d i = 0; i < len_x; i++) { sum64[0] = f64_add(sum64[0], ((float64_t*)x_bytes)[i]); } @@ -512,11 +515,9 @@ case 7: ; float128_t sum128[2]; + sum128[0] = (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; for (c3_d i = 0; i < len_x; i++) { - fprintf(stderr, " sum128[%d] = %lx %lx\r\n", i, sum128[i].v[0], sum128[i].v[1]); - fprintf(stderr, "x_bytes[%d] = %lx %lx\r\n", i, ((float128_t*)x_bytes)[i].v[0], ((float128_t*)x_bytes)[i].v[1]); f128M_add(&(sum128[0]), &(((float128_t*)x_bytes)[i]), &(sum128[0])); - fprintf(stderr, " equals[%d] = %lx %lx\r\n", i, sum128[i].v[0], sum128[i].v[1]); } sum128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)sum128); @@ -2414,8 +2415,6 @@ case c3__real: _set_rounding(rnd); u3_noun r_data = u3qf_la_cumsum_real(x_data, x_shape, x_bloq); - fprintf(stderr, "desired rounding mode: %lx\r\n", rnd); - fprintf(stderr, "apparent rounding mode: %lx\r\n", softfloat_roundingMode); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2640,7 +2639,6 @@ switch (x_kind) { case c3__real: ; u3_noun r_data = u3qf_la_abs_real(x_data, x_shape, x_bloq); - fprintf(stderr, "abs: %lx\n", x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: From 90014967eb162dafdf62ebc778339bf8d44929b8 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Thu, 18 Apr 2024 14:26:51 -0500 Subject: [PATCH 24/41] Post working version of all jets for reals to date. --- pkg/noun/jets/f/lagoon.c | 106 +++++++++++++++++++-------------------- pkg/noun/jets/tree.c | 20 ++++---- 2 files changed, 62 insertions(+), 64 deletions(-) diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/f/lagoon.c index 0d16f70eb5..8fd95be2ab 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/f/lagoon.c @@ -466,7 +466,6 @@ if (bloq < 4 || bloq > 7) { return u3_none; } - fprintf(stderr, "function rounding mode: %lx\r\n", softfloat_roundingMode); // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units @@ -1319,7 +1318,7 @@ } // r_data is the result noun of [data] - y_bytes[syz_x] = 1; // pin head + y_bytes[syz_x] = 0x1; // pin head u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. @@ -1401,7 +1400,7 @@ } // r_data is the result noun of [data] - x_bytes[syz_x] = 1; // pin head + x_bytes[syz_x] = 0x1; // pin head u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. @@ -1435,7 +1434,7 @@ // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); - x_bytes[syz_x] = 1; // pin head + x_bytes[syz_x] = 0x1; // pin head float16_t n16; float32_t n32; @@ -1498,7 +1497,7 @@ // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); - x_bytes[syz_x] = 1; // pin head + x_bytes[syz_x] = 0x1; // pin head float16_t in16; float32_t in32; @@ -1769,7 +1768,7 @@ y_bytes[i*wyd+k] = x_bytes[(i*dims[0]+i)*wyd+k]; } } - y_bytes[syz_y] = 1; // pin head + y_bytes[syz_y] = 0x1; // pin head // Unpack the result back into a noun. r_data = u3i_bytes((syz_y+1)*sizeof(c3_y), y_bytes); @@ -1814,7 +1813,7 @@ } } } - y_bytes[syz_x] = 1; // pin head + y_bytes[syz_x] = 0x1; // pin head // Unpack the result back into a noun. r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); @@ -1854,7 +1853,7 @@ } ((float16_t*)x_bytes16)[n] = a16; ((float16_t*)x_bytes16)[0] = b16; - x_bytes16[(n+1)*2] = 1; // pin head + x_bytes16[(n+1)*2] = 0x1; // pin head r_data = u3i_bytes(((n+1)*2+1)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); break; @@ -1871,7 +1870,7 @@ } ((float32_t*)x_bytes32)[n] = a32; ((float32_t*)x_bytes32)[0] = b32; - x_bytes32[(n+1)*4] = 1; // pin head + x_bytes32[(n+1)*4] = 0x1; // pin head r_data = u3i_bytes(((n+1)*4+1)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); break; @@ -1888,7 +1887,7 @@ } ((float64_t*)x_bytes64)[n] = a64; ((float64_t*)x_bytes64)[0] = b64; - x_bytes64[(n+1)*8] = 1; // pin head + x_bytes64[(n+1)*8] = 0x1; // pin head r_data = u3i_bytes(((n+1)*8+1)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); break; @@ -1912,7 +1911,7 @@ } ((float128_t*)x_bytes128)[n] = a128; ((float128_t*)x_bytes128)[0] = b128; - x_bytes128[(n+1)*16] = 1; // pin head + x_bytes128[(n+1)*16] = 0x1; // pin head r_data = u3i_bytes(((n+1)*16+1)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); break; @@ -1949,7 +1948,7 @@ } ((float16_t*)x_bytes16)[n16] = a16; // ((float16_t*)x_bytes16)[0] = b16; - x_bytes16[(n16+1)*2] = 1; // pin head + x_bytes16[(n16+1)*2] = 0x1; // pin head r_data = u3i_bytes(((n16+1)*2+1)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); break; @@ -1966,7 +1965,7 @@ } ((float32_t*)x_bytes32)[n32] = a32; // ((float32_t*)x_bytes32)[0] = b32; - x_bytes32[(n32+1)*4] = 1; // pin head + x_bytes32[(n32+1)*4] = 0x1; // pin head r_data = u3i_bytes(((n32+1)*4+1)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); break; @@ -1983,7 +1982,7 @@ } ((float64_t*)x_bytes64)[n64] = a64; // ((float64_t*)x_bytes64)[0] = b64; - x_bytes64[(n64+1)*8] = 1; // pin head + x_bytes64[(n64+1)*8] = 0x1; // pin head r_data = u3i_bytes(((n64+1)*8+1)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); break; @@ -2003,7 +2002,7 @@ } ((float128_t*)x_bytes128)[n128] = a128; // ((float128_t*)x_bytes128)[0] = b128; - x_bytes128[(n128+1)*16] = 1; // pin head + x_bytes128[(n128+1)*16] = 0x1; // pin head r_data = u3i_bytes(((n128+1)*16+1)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); break; @@ -2021,7 +2020,7 @@ { u3_noun d_data = u3qf_la_diag(x_data, shape, bloq); c3_d len_x0 = _get_dims(shape)[0]; - u3_noun r_data = u3qf_la_dot_real(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), bloq); + u3_noun r_data = u3qf_la_dot_real(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), u3k(bloq)); return r_data; } @@ -2035,10 +2034,10 @@ u3_noun bloq) { // Unpack the data as a byte array. We assume total length < 2**64. - c3_d M = u3h(x_shape); - c3_d Na = u3h(u3t(x_shape)); - c3_d Nb = u3h(y_shape); - c3_d P = u3h(u3t(y_shape)); + c3_d M = u3x_atom(u3h(x_shape)); + c3_d Na= u3x_atom(u3h(u3t(x_shape))); + c3_d Nb= u3x_atom(u3h(y_shape)); + c3_d P = u3x_atom(u3h(u3t(y_shape))); if ((u3_nul != u3t(u3t(x_shape))) || (u3_nul != u3t(u3t(y_shape))) || @@ -2055,58 +2054,62 @@ c3_d syz_x = len_x * pow(2, bloq-3); // M*N // x_bytes is the data array (w/o leading 0x1) - c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*pow(2,bloq-3)*sizeof(c3_y)); + c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // len_x is length in base units c3_d len_y = _get_length(y_shape); // N*P // syz_x is length in bytes - c3_d syz_y = len_x * pow(2, bloq-3); // N*P + c3_d syz_y = len_y * pow(2, bloq-3); // N*P // y_bytes is the data array (w/o leading 0x1) - c3_y* y_bytes = (c3_y*)u3a_malloc(syz_y*pow(2,bloq-3)*sizeof(c3_y)); + c3_y* y_bytes = (c3_y*)u3a_malloc(syz_y*sizeof(c3_y)); u3r_bytes(0, syz_y, y_bytes, y_data); - - // syz_r is length in bytes - c3_d syz_r = (M*P) * pow(2, bloq-3); // M*P // len_r is length in base units c3_d len_r = M*P; // M*P - // r_bytes is the result array - c3_y* r_bytes = (c3_y*)u3a_malloc((syz_r*pow(2,bloq-3)+1)*sizeof(c3_y)); - r_bytes[syz_r] = 1; // pin head + // syz_r is length in bytes + c3_d syz_r = len_r * pow(2, bloq-3); // M*P - u3_noun r_data; + // r_bytes is the result array + c3_y* r_bytes = (c3_y*)u3a_malloc((syz_r+1)*sizeof(c3_y)); + r_bytes[syz_r] = 0x1; // pin head + // initialize with 0x0s + for (c3_d i = 0; i < syz_r; i++) { + r_bytes[i] = 0x0; + } // Switch on the block size. switch (u3x_atom(bloq)) { case 4: - hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, N, (float16_t){SB_REAL16_ZERO}, (float16_t*)r_bytes, P); + hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, P, (float16_t){SB_REAL16_ZERO}, (float16_t*)r_bytes, P); break; case 5: - sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, N, (float32_t){SB_REAL32_ZERO}, (float32_t*)r_bytes, P); + sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, P, (float32_t){SB_REAL32_ZERO}, (float32_t*)r_bytes, P); break; case 6: - dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, N, (float64_t){SB_REAL64_ZERO}, (float64_t*)r_bytes, P); + dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, P, (float64_t){SB_REAL64_ZERO}, (float64_t*)r_bytes, P); break; case 7: - qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, N, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)r_bytes, P); + qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, P, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)r_bytes, P); break; } // Unpack the result back into a noun. - r_data = u3i_bytes(syz_r*pow(2,bloq-3)+1, r_bytes); + u3_noun r_data = u3i_bytes(syz_r+1, r_bytes); + u3_noun M_ = u3i_chub(M); + u3_noun P_ = u3i_chub(P); u3a_free(x_bytes); u3a_free(y_bytes); u3a_free(r_bytes); - return u3nc(u3nq(u3nt(u3k(M), u3k(P), u3_nul), u3k(bloq), c3__real, u3_nul), u3k(r_data)); + return u3nc(u3nq(u3nt(M_, P_, u3_nul), u3k(bloq), c3__real, u3_nul), r_data); } u3_noun @@ -3321,22 +3324,18 @@ u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, rnd; - if ( c3n == u3r_mean(x_meta, - 2, &x_shape, - 6, &x_bloq, - 14, &x_kind, - 15, &x_fxp, - 0) || - c3n == u3r_mean(y_meta, - 2, &y_shape, - 6, &y_bloq, - 14, &y_kind, - 15, &y_fxp, - 0) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) || + x_shape = u3h(x_meta); // 2 + x_bloq = u3h(u3t(x_meta)); // 6 + x_kind = u3h(u3t(u3t(x_meta))); // 14 + x_fxp = u3t(u3t(u3t(x_meta))); // 15 + y_shape = u3h(y_meta); // 2 + y_bloq = u3h(u3t(y_meta)); // 6 + y_kind = u3h(u3t(u3t(y_meta))); // 14 + y_fxp = u3t(u3t(u3t(y_meta))); // 15 + rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + if ( c3n == u3r_sing(x_bloq, y_bloq) || + c3n == u3r_sing(x_kind, y_kind) // fxp does not need to match so no check - c3n == u3r_mean(cor, u3x_con_sam, &rnd, 0) ) { return u3m_bail(c3__exit); @@ -3344,10 +3343,9 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data; - r_data = u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq); + u3_noun r_data = u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq); // result is already [meta data] - return u3k(r_data); + return r_data; default: return u3_none; diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index dc27123a4d..35573a42e6 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2148,8 +2148,8 @@ static u3j_core _139_hex_json_d[] = }; /* linear algebra jets - XX move to outer _sep_ core for /lib? eventually -static u3j_core _139_sep_d[] = + XX move to outer _hep_ core for /lib? eventually +static u3j_core _139_hep_d[] = */ static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; static u3j_harm _139_hex__lagoon_sub_a[] = {{".2", u3wf_la_sub}, {}}; @@ -2167,15 +2167,15 @@ static u3j_harm _139_hex__lagoon_cumsum_a[]={{".2", u3wf_la_cumsum}, {}}; static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; -static u3j_harm _139_hex__lagoon_min_a[]={{".2", u3wf_la_min}, {}}; -static u3j_harm _139_hex__lagoon_max_a[]={{".2", u3wf_la_max}, {}}; +static u3j_harm _139_hex__lagoon_min_a[] = {{".2", u3wf_la_min}, {}}; +static u3j_harm _139_hex__lagoon_max_a[] = {{".2", u3wf_la_max}, {}}; static u3j_harm _139_hex__lagoon_linspace_a[]={{".2", u3wf_la_linspace}, {}}; -static u3j_harm _139_hex__lagoon_range_a[]={{".2", u3wf_la_range}, {}}; -static u3j_harm _139_hex__lagoon_abs_a[]={{".2", u3wf_la_abs}, {}}; -static u3j_harm _139_hex__lagoon_gth_a[]={{".2", u3wf_la_gth}, {}}; -static u3j_harm _139_hex__lagoon_gte_a[]={{".2", u3wf_la_gte}, {}}; -static u3j_harm _139_hex__lagoon_lth_a[]={{".2", u3wf_la_lth}, {}}; -static u3j_harm _139_hex__lagoon_lte_a[]={{".2", u3wf_la_lte}, {}}; +static u3j_harm _139_hex__lagoon_range_a[]= {{".2", u3wf_la_range}, {}}; +static u3j_harm _139_hex__lagoon_abs_a[] = {{".2", u3wf_la_abs}, {}}; +static u3j_harm _139_hex__lagoon_gth_a[] = {{".2", u3wf_la_gth}, {}}; +static u3j_harm _139_hex__lagoon_gte_a[] = {{".2", u3wf_la_gte}, {}}; +static u3j_harm _139_hex__lagoon_lth_a[] = {{".2", u3wf_la_lth}, {}}; +static u3j_harm _139_hex__lagoon_lte_a[] = {{".2", u3wf_la_lte}, {}}; static u3j_harm _139_hex__lagoon_diag_a[] = {{".2", u3wf_la_diag}, {}}; static u3j_harm _139_hex__lagoon_trace_a[]= {{".2", u3wf_la_trace}, {}}; static u3j_harm _139_hex__lagoon_mmul_a[] = {{".2", u3wf_la_mmul}, {}}; From e43d2101790208d23c82f679a8a590d4a4139028 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 24 Apr 2024 10:51:09 -0500 Subject: [PATCH 25/41] Post with things moved and support for ARM. --- bazel/third_party/softblas/softblas.BUILD | 62 ++++++++ pkg/noun/jets/{f => i}/lagoon.c | 172 +++++++++++----------- pkg/noun/jets/q.h | 56 +++---- pkg/noun/jets/tree.c | 132 +++++++++-------- pkg/noun/jets/w.h | 58 ++++---- 5 files changed, 273 insertions(+), 207 deletions(-) rename pkg/noun/jets/{f => i}/lagoon.c (96%) diff --git a/bazel/third_party/softblas/softblas.BUILD b/bazel/third_party/softblas/softblas.BUILD index 3442c5da45..34c80c93e6 100644 --- a/bazel/third_party/softblas/softblas.BUILD +++ b/bazel/third_party/softblas/softblas.BUILD @@ -5,6 +5,68 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") cc_library( name = "softblas", visibility = ["//visibility:public"], + deps = select({ + "@platforms//cpu:aarch64": [":softblas_aarch64"], + "@platforms//cpu:x86_64": [":softblas_x86_64"], + "//conditions:default": [], + }), +) + +cc_library( + name = "softblas_aarch64", + visibility = ["//visibility:public"], + hdrs = ["include/softblas.h"], + includes = ["include"], + srcs = [ + "include/softblas.h", + "src/softblas_state.c", + "src/blas/level1/sasum.c", + "src/blas/level1/dasum.c", + "src/blas/level1/hasum.c", + "src/blas/level1/qasum.c", + "src/blas/level1/saxpy.c", + "src/blas/level1/daxpy.c", + "src/blas/level1/haxpy.c", + "src/blas/level1/qaxpy.c", + "src/blas/level1/scopy.c", + "src/blas/level1/dcopy.c", + "src/blas/level1/hcopy.c", + "src/blas/level1/qcopy.c", + "src/blas/level1/sdot.c", + "src/blas/level1/ddot.c", + "src/blas/level1/hdot.c", + "src/blas/level1/qdot.c", + "src/blas/level1/snrm2.c", + "src/blas/level1/dnrm2.c", + "src/blas/level1/hnrm2.c", + "src/blas/level1/qnrm2.c", + "src/blas/level1/sscal.c", + "src/blas/level1/dscal.c", + "src/blas/level1/hscal.c", + "src/blas/level1/qscal.c", + "src/blas/level1/sswap.c", + "src/blas/level1/dswap.c", + "src/blas/level1/hswap.c", + "src/blas/level1/qswap.c", + "src/blas/level1/isamax.c", + "src/blas/level1/idamax.c", + "src/blas/level1/ihamax.c", + "src/blas/level1/iqamax.c", + "src/blas/level2/sgemv.c", + "src/blas/level2/dgemv.c", + "src/blas/level2/hgemv.c", + "src/blas/level2/qgemv.c", + "src/blas/level3/sgemm.c", + "src/blas/level3/dgemm.c", + "src/blas/level3/hgemm.c", + "src/blas/level3/qgemm.c" + ], + deps = ["@softfloat"], +) + +cc_library( + name = "softblas_x86_64", + visibility = ["//visibility:public"], hdrs = ["include/softblas.h"], includes = ["include"], srcs = [ diff --git a/pkg/noun/jets/f/lagoon.c b/pkg/noun/jets/i/lagoon.c similarity index 96% rename from pkg/noun/jets/f/lagoon.c rename to pkg/noun/jets/i/lagoon.c index 8fd95be2ab..be9b8f6791 100644 --- a/pkg/noun/jets/f/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -101,7 +101,7 @@ /* add - axpy = 1*x+y */ u3_noun - u3qf_la_add_real(u3_noun x_data, + u3qi_la_add_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq @@ -159,7 +159,7 @@ /* sub - axpy = -1*y+x */ u3_noun - u3qf_la_sub_real(u3_noun x_data, + u3qi_la_sub_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq @@ -219,7 +219,7 @@ elementwise multiplication */ u3_noun - u3qf_la_mul_real(u3_noun x_data, + u3qi_la_mul_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -285,7 +285,7 @@ elementwise division */ u3_noun - u3qf_la_div_real(u3_noun x_data, + u3qi_la_div_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -351,7 +351,7 @@ remainder after division */ u3_noun - u3qf_la_mod_real(u3_noun x_data, + u3qi_la_mod_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -458,7 +458,7 @@ /* cumsum - x[0] + x[1] + ... x[n] */ u3_noun - u3qf_la_cumsum_real(u3_noun x_data, + u3qi_la_cumsum_real(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -532,7 +532,7 @@ /* argmin - argmin(x) */ u3_noun - u3qf_la_argmin_real(u3_noun x_data, + u3qi_la_argmin_real(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -605,7 +605,7 @@ /* argmax - argmax(x) */ u3_noun - u3qf_la_argmax_real(u3_noun x_data, + u3qi_la_argmax_real(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -679,7 +679,7 @@ entire nd-array busted out as a linear list */ u3_noun - u3qf_la_ravel_real(u3_noun x_data, + u3qi_la_ravel_real(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -742,7 +742,7 @@ /* min - min(x,y) */ u3_noun - u3qf_la_min_real(u3_noun x_data, + u3qi_la_min_real(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -820,7 +820,7 @@ /* max - max(x,y) */ u3_noun - u3qf_la_max_real(u3_noun x_data, + u3qi_la_max_real(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -898,7 +898,7 @@ /* abs - |x| */ u3_noun - u3qf_la_abs_real(u3_noun x_data, + u3qi_la_abs_real(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -957,7 +957,7 @@ /* gth - x > y */ u3_noun - u3qf_la_gth_real(u3_noun x_data, + u3qi_la_gth_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1030,7 +1030,7 @@ /* gte - x > y */ u3_noun - u3qf_la_gte_real(u3_noun x_data, + u3qi_la_gte_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1103,7 +1103,7 @@ /* lth - x > y */ u3_noun - u3qf_la_lth_real(u3_noun x_data, + u3qi_la_lth_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1176,7 +1176,7 @@ /* lte - x > y */ u3_noun - u3qf_la_lte_real(u3_noun x_data, + u3qi_la_lte_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1249,7 +1249,7 @@ /* adds - axpy = 1*x+[n] */ u3_noun - u3qf_la_adds_real(u3_noun x_data, + u3qi_la_adds_real(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1331,7 +1331,7 @@ /* subs - axpy = -1*[n]+x */ u3_noun - u3qf_la_subs_real(u3_noun x_data, + u3qi_la_subs_real(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1414,7 +1414,7 @@ elementwise multiplication */ u3_noun - u3qf_la_muls_real(u3_noun x_data, + u3qi_la_muls_real(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1477,7 +1477,7 @@ elementwise division */ u3_noun - u3qf_la_divs_real(u3_noun x_data, + u3qi_la_divs_real(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1548,7 +1548,7 @@ remainder after scalar division */ u3_noun - u3qf_la_mods_real(u3_noun x_data, + u3qi_la_mods_real(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1664,7 +1664,7 @@ /* dot - ?dot = x · y */ u3_noun - u3qf_la_dot_real(u3_noun x_data, + u3qi_la_dot_real(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1732,7 +1732,7 @@ /* diag - diag(x) */ u3_noun - u3qf_la_diag(u3_noun x_data, + u3qi_la_diag(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -1783,7 +1783,7 @@ /* transpose - x' */ u3_noun - u3qf_la_transpose(u3_noun x_data, + u3qi_la_transpose(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -1828,7 +1828,7 @@ /* linspace - [a a+(b-a)/n ... b] */ u3_noun - u3qf_la_linspace_real(u3_noun a, + u3qi_la_linspace_real(u3_noun a, u3_noun b, u3_noun n, u3_noun bloq) @@ -1923,7 +1923,7 @@ /* range - [a a+d ... b] */ u3_noun - u3qf_la_range_real(u3_noun a, + u3qi_la_range_real(u3_noun a, u3_noun b, u3_noun d, u3_noun bloq) @@ -2014,20 +2014,20 @@ /* trace - tr(x) */ u3_noun - u3qf_la_trace_real(u3_noun x_data, + u3qi_la_trace_real(u3_noun x_data, u3_noun shape, u3_noun bloq) { - u3_noun d_data = u3qf_la_diag(x_data, shape, bloq); + u3_noun d_data = u3qi_la_diag(x_data, shape, bloq); c3_d len_x0 = _get_dims(shape)[0]; - u3_noun r_data = u3qf_la_dot_real(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), u3k(bloq)); + u3_noun r_data = u3qi_la_dot_real(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), u3k(bloq)); return r_data; } /* mmul */ u3_noun - u3qf_la_mmul_real(u3_noun x_data, + u3qi_la_mmul_real(u3_noun x_data, u3_noun y_data, u3_noun x_shape, u3_noun y_shape, @@ -2113,7 +2113,7 @@ } u3_noun - u3wf_la_add(u3_noun cor) + u3wi_la_add(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2157,7 +2157,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_add_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_add_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2168,7 +2168,7 @@ } u3_noun - u3wf_la_sub(u3_noun cor) + u3wi_la_sub(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2212,7 +2212,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_sub_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_sub_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2223,7 +2223,7 @@ } u3_noun - u3wf_la_mul(u3_noun cor) + u3wi_la_mul(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2267,7 +2267,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_mul_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_mul_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2278,7 +2278,7 @@ } u3_noun - u3wf_la_div(u3_noun cor) + u3wi_la_div(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2322,7 +2322,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_div_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_div_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2333,7 +2333,7 @@ } u3_noun - u3wf_la_mod(u3_noun cor) + u3wi_la_mod(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2377,7 +2377,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_mod_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_mod_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2388,7 +2388,7 @@ } u3_noun - u3wf_la_cumsum(u3_noun cor) + u3wi_la_cumsum(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -2417,7 +2417,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_cumsum_real(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_cumsum_real(x_data, x_shape, x_bloq); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2428,7 +2428,7 @@ } u3_noun - u3wf_la_argmin(u3_noun cor) + u3wi_la_argmin(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -2454,7 +2454,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_argmin_real(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_argmin_real(x_data, x_shape, x_bloq); // bare atom (@ index) return r_data; @@ -2466,7 +2466,7 @@ } u3_noun - u3wf_la_ravel(u3_noun cor) + u3wi_la_ravel(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -2491,7 +2491,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_ravel_real(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_ravel_real(x_data, x_shape, x_bloq); // (list @) return r_data; @@ -2503,7 +2503,7 @@ } u3_noun - u3wf_la_argmax(u3_noun cor) + u3wi_la_argmax(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -2529,7 +2529,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_argmax_real(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_argmax_real(x_data, x_shape, x_bloq); // bare atom (@ index) return r_data; @@ -2541,7 +2541,7 @@ } u3_noun - u3wf_la_min(u3_noun cor) + u3wi_la_min(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -2567,7 +2567,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_min_real(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_min_real(x_data, x_shape, x_bloq); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2578,7 +2578,7 @@ } u3_noun - u3wf_la_max(u3_noun cor) + u3wi_la_max(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -2604,7 +2604,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_max_real(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_max_real(x_data, x_shape, x_bloq); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2615,7 +2615,7 @@ } u3_noun - u3wf_la_abs(u3_noun cor) + u3wi_la_abs(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -2641,7 +2641,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_abs_real(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_abs_real(x_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2652,7 +2652,7 @@ } u3_noun - u3wf_la_gth(u3_noun cor) + u3wi_la_gth(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2694,7 +2694,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_gth_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_gth_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2705,7 +2705,7 @@ } u3_noun - u3wf_la_gte(u3_noun cor) + u3wi_la_gte(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2747,7 +2747,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_gte_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_gte_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2758,7 +2758,7 @@ } u3_noun - u3wf_la_lth(u3_noun cor) + u3wi_la_lth(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2800,7 +2800,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_lth_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_lth_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2811,7 +2811,7 @@ } u3_noun - u3wf_la_lte(u3_noun cor) + u3wi_la_lte(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -2853,7 +2853,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_lte_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_lte_real(x_data, y_data, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2864,7 +2864,7 @@ } u3_noun - u3wf_la_adds(u3_noun cor) + u3wi_la_adds(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; @@ -2889,7 +2889,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_adds_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = u3qi_la_adds_real(x_data, n, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2899,7 +2899,7 @@ } u3_noun - u3wf_la_subs(u3_noun cor) + u3wi_la_subs(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; @@ -2924,7 +2924,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_subs_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = u3qi_la_subs_real(x_data, n, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2934,7 +2934,7 @@ } u3_noun - u3wf_la_muls(u3_noun cor) + u3wi_la_muls(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; @@ -2959,7 +2959,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_muls_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = u3qi_la_muls_real(x_data, n, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2969,7 +2969,7 @@ } u3_noun - u3wf_la_divs(u3_noun cor) + u3wi_la_divs(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; @@ -2994,7 +2994,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_divs_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = u3qi_la_divs_real(x_data, n, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3004,7 +3004,7 @@ } u3_noun - u3wf_la_mods(u3_noun cor) + u3wi_la_mods(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; @@ -3029,7 +3029,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_mods_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = u3qi_la_mods_real(x_data, n, x_shape, x_bloq); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3039,7 +3039,7 @@ } u3_noun - u3wf_la_dot(u3_noun cor) + u3wi_la_dot(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -3083,7 +3083,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_dot_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_dot_real(x_data, y_data, x_shape, x_bloq); c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); @@ -3095,7 +3095,7 @@ } u3_noun - u3wf_la_transpose(u3_noun cor) + u3wi_la_transpose(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -3119,14 +3119,14 @@ { return u3m_bail(c3__exit); } else { - u3_noun r_data = u3qf_la_transpose(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_transpose(x_data, x_shape, x_bloq); return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } } } u3_noun - u3wf_la_linspace(u3_noun cor) + u3wi_la_linspace(u3_noun cor) { u3_noun x_meta, a, b, n, rnd; @@ -3154,7 +3154,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_linspace_real(a, b, n, x_bloq); + u3_noun r_data = u3qi_la_linspace_real(a, b, n, x_bloq); x_shape = u3nt(u3x_atom(n), 0x1, u3_nul); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); @@ -3166,7 +3166,7 @@ } u3_noun - u3wf_la_range(u3_noun cor) + u3wi_la_range(u3_noun cor) { u3_noun x_meta, a, b, d, rnd; @@ -3194,7 +3194,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_range_real(a, b, d, x_bloq); + u3_noun r_data = u3qi_la_range_real(a, b, d, x_bloq); c3_d a_, b_, d_; c3_ds n_; switch (x_bloq) { @@ -3235,7 +3235,7 @@ } u3_noun - u3wf_la_diag(u3_noun cor) + u3wi_la_diag(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -3259,7 +3259,7 @@ { return u3m_bail(c3__exit); } else { - u3_noun r_data = u3qf_la_diag(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_diag(x_data, x_shape, x_bloq); c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } @@ -3267,7 +3267,7 @@ } u3_noun - u3wf_la_trace(u3_noun cor) + u3wi_la_trace(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; @@ -3293,7 +3293,7 @@ } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qf_la_trace_real(x_data, x_shape, x_bloq); + u3_noun r_data = u3qi_la_trace_real(x_data, x_shape, x_bloq); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3304,7 +3304,7 @@ } u3_noun - u3wf_la_mmul(u3_noun cor) + u3wi_la_mmul(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, @@ -3343,7 +3343,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qf_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq); + u3_noun r_data = u3qi_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq); // result is already [meta data] return r_data; diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index b5e5cbc8e1..14a4cac1e3 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -247,34 +247,34 @@ u3_noun u3qfp_nepo(u3_noun, u3_noun); u3_noun u3qfp_rake(u3_noun); - u3_noun u3qf_la_add_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_sub_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_mul_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_div_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_mod_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_adds_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_subs_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_muls_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_divs_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_mods_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_dot_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_diag(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_transpose(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_cumsum_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_argmin_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_argmax_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_ravel_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_min_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_max_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_linspace_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_range_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_abs_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_gth_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_gte_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_lth_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_lte_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_trace_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qf_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_add_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_sub_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mul_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_div_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mod_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_adds_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_subs_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_muls_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_divs_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mods_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_dot_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_diag(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_transpose(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_cumsum_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_argmin_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_argmax_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_ravel_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_min_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_max_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_linspace_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_range_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_abs_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_gth_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_gte_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_lth_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_lte_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_trace_real(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); # define u3qfu_van_fan 28 # define u3qfu_van_rib 58 diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 35573a42e6..c9118c9a4b 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2147,77 +2147,82 @@ static u3j_core _139_hex_json_d[] = {} }; -/* linear algebra jets - XX move to outer _hep_ core for /lib? eventually -static u3j_core _139_hep_d[] = +/* /lib jets in non core */ -static u3j_harm _139_hex__lagoon_add_a[] = {{".2", u3wf_la_add}, {}}; -static u3j_harm _139_hex__lagoon_sub_a[] = {{".2", u3wf_la_sub}, {}}; -static u3j_harm _139_hex__lagoon_mul_a[] = {{".2", u3wf_la_mul}, {}}; -static u3j_harm _139_hex__lagoon_div_a[] = {{".2", u3wf_la_div}, {}}; -static u3j_harm _139_hex__lagoon_mod_a[] = {{".2", u3wf_la_mod}, {}}; -static u3j_harm _139_hex__lagoon_adds_a[] = {{".2", u3wf_la_adds}, {}}; -static u3j_harm _139_hex__lagoon_subs_a[] = {{".2", u3wf_la_subs}, {}}; -static u3j_harm _139_hex__lagoon_muls_a[] = {{".2", u3wf_la_muls}, {}}; -static u3j_harm _139_hex__lagoon_divs_a[] = {{".2", u3wf_la_divs}, {}}; -static u3j_harm _139_hex__lagoon_mods_a[] = {{".2", u3wf_la_mods}, {}}; -static u3j_harm _139_hex__lagoon_dot_a[] = {{".2", u3wf_la_dot}, {}}; -static u3j_harm _139_hex__lagoon_trans_a[] ={{".2", u3wf_la_transpose}, {}}; -static u3j_harm _139_hex__lagoon_cumsum_a[]={{".2", u3wf_la_cumsum}, {}}; -static u3j_harm _139_hex__lagoon_argmin_a[]={{".2", u3wf_la_argmin}, {}}; -static u3j_harm _139_hex__lagoon_argmax_a[]={{".2", u3wf_la_argmax}, {}}; -static u3j_harm _139_hex__lagoon_ravel_a[]={{".2", u3wf_la_ravel}, {}}; -static u3j_harm _139_hex__lagoon_min_a[] = {{".2", u3wf_la_min}, {}}; -static u3j_harm _139_hex__lagoon_max_a[] = {{".2", u3wf_la_max}, {}}; -static u3j_harm _139_hex__lagoon_linspace_a[]={{".2", u3wf_la_linspace}, {}}; -static u3j_harm _139_hex__lagoon_range_a[]= {{".2", u3wf_la_range}, {}}; -static u3j_harm _139_hex__lagoon_abs_a[] = {{".2", u3wf_la_abs}, {}}; -static u3j_harm _139_hex__lagoon_gth_a[] = {{".2", u3wf_la_gth}, {}}; -static u3j_harm _139_hex__lagoon_gte_a[] = {{".2", u3wf_la_gte}, {}}; -static u3j_harm _139_hex__lagoon_lth_a[] = {{".2", u3wf_la_lth}, {}}; -static u3j_harm _139_hex__lagoon_lte_a[] = {{".2", u3wf_la_lte}, {}}; -static u3j_harm _139_hex__lagoon_diag_a[] = {{".2", u3wf_la_diag}, {}}; -static u3j_harm _139_hex__lagoon_trace_a[]= {{".2", u3wf_la_trace}, {}}; -static u3j_harm _139_hex__lagoon_mmul_a[] = {{".2", u3wf_la_mmul}, {}}; -static u3j_core _139_hex__la_core_d[] = - { { "add-rays", 7, _139_hex__lagoon_add_a, 0, no_hashes }, - { "sub-rays", 7, _139_hex__lagoon_sub_a, 0, no_hashes }, - { "mul-rays", 7, _139_hex__lagoon_mul_a, 0, no_hashes }, - { "div-rays", 7, _139_hex__lagoon_div_a, 0, no_hashes }, - { "mod-rays", 7, _139_hex__lagoon_mod_a, 0, no_hashes }, - { "add-scal", 7, _139_hex__lagoon_adds_a, 0, no_hashes }, - { "sub-scal", 7, _139_hex__lagoon_subs_a, 0, no_hashes }, - { "mul-scal", 7, _139_hex__lagoon_muls_a, 0, no_hashes }, - { "div-scal", 7, _139_hex__lagoon_divs_a, 0, no_hashes }, - { "mod-scal", 7, _139_hex__lagoon_mods_a, 0, no_hashes }, - { "dot", 7, _139_hex__lagoon_dot_a, 0, no_hashes }, - { "transpose",7, _139_hex__lagoon_trans_a, 0, no_hashes }, - { "cumsum", 7, _139_hex__lagoon_cumsum_a, 0, no_hashes }, - { "argmin", 7, _139_hex__lagoon_argmin_a, 0, no_hashes }, - { "argmax", 7, _139_hex__lagoon_argmax_a, 0, no_hashes }, - { "ravel", 7, _139_hex__lagoon_ravel_a, 0, no_hashes }, - { "min", 7, _139_hex__lagoon_min_a, 0, no_hashes }, - { "max", 7, _139_hex__lagoon_max_a, 0, no_hashes }, - { "linspace", 7, _139_hex__lagoon_linspace_a, 0, no_hashes }, - { "range", 7, _139_hex__lagoon_range_a, 0, no_hashes }, - { "abs", 7, _139_hex__lagoon_abs_a, 0, no_hashes }, - { "gth", 7, _139_hex__lagoon_gth_a, 0, no_hashes }, - { "gte", 7, _139_hex__lagoon_gte_a, 0, no_hashes }, - { "lth", 7, _139_hex__lagoon_lth_a, 0, no_hashes }, - { "lte", 7, _139_hex__lagoon_lte_a, 0, no_hashes }, - { "diag", 7, _139_hex__lagoon_diag_a, 0, no_hashes }, - { "trace", 7, _139_hex__lagoon_trace_a,0, no_hashes }, - { "mmul", 7, _139_hex__lagoon_mmul_a, 0, no_hashes }, +static u3j_harm _139_non__lagoon_add_a[] = {{".2", u3wi_la_add, c3n}, {}}; +static u3j_harm _139_non__lagoon_sub_a[] = {{".2", u3wi_la_sub, c3n}, {}}; +static u3j_harm _139_non__lagoon_mul_a[] = {{".2", u3wi_la_mul, c3n}, {}}; +static u3j_harm _139_non__lagoon_div_a[] = {{".2", u3wi_la_div, c3n}, {}}; +static u3j_harm _139_non__lagoon_mod_a[] = {{".2", u3wi_la_mod, c3n}, {}}; +static u3j_harm _139_non__lagoon_adds_a[] = {{".2", u3wi_la_adds, c3n}, {}}; +static u3j_harm _139_non__lagoon_subs_a[] = {{".2", u3wi_la_subs, c3n}, {}}; +static u3j_harm _139_non__lagoon_muls_a[] = {{".2", u3wi_la_muls, c3n}, {}}; +static u3j_harm _139_non__lagoon_divs_a[] = {{".2", u3wi_la_divs, c3n}, {}}; +static u3j_harm _139_non__lagoon_mods_a[] = {{".2", u3wi_la_mods, c3n}, {}}; +static u3j_harm _139_non__lagoon_dot_a[] = {{".2", u3wi_la_dot, c3n}, {}}; +static u3j_harm _139_non__lagoon_trans_a[] ={{".2", u3wi_la_transpose, c3n}, {}}; +static u3j_harm _139_non__lagoon_cumsum_a[]={{".2", u3wi_la_cumsum, c3n}, {}}; +static u3j_harm _139_non__lagoon_argmin_a[]={{".2", u3wi_la_argmin, c3n}, {}}; +static u3j_harm _139_non__lagoon_argmax_a[]={{".2", u3wi_la_argmax, c3n}, {}}; +static u3j_harm _139_non__lagoon_ravel_a[]={{".2", u3wi_la_ravel, c3n}, {}}; +static u3j_harm _139_non__lagoon_min_a[] = {{".2", u3wi_la_min, c3n}, {}}; +static u3j_harm _139_non__lagoon_max_a[] = {{".2", u3wi_la_max, c3n}, {}}; +static u3j_harm _139_non__lagoon_linspace_a[]={{".2", u3wi_la_linspace, c3n}, {}}; +static u3j_harm _139_non__lagoon_range_a[]= {{".2", u3wi_la_range, c3n}, {}}; +static u3j_harm _139_non__lagoon_abs_a[] = {{".2", u3wi_la_abs, c3n}, {}}; +static u3j_harm _139_non__lagoon_gth_a[] = {{".2", u3wi_la_gth, c3n}, {}}; +static u3j_harm _139_non__lagoon_gte_a[] = {{".2", u3wi_la_gte, c3n}, {}}; +static u3j_harm _139_non__lagoon_lth_a[] = {{".2", u3wi_la_lth, c3n}, {}}; +static u3j_harm _139_non__lagoon_lte_a[] = {{".2", u3wi_la_lte, c3n}, {}}; +static u3j_harm _139_non__lagoon_diag_a[] = {{".2", u3wi_la_diag, c3n}, {}}; +static u3j_harm _139_non__lagoon_trace_a[]= {{".2", u3wi_la_trace, c3n}, {}}; +static u3j_harm _139_non__lagoon_mmul_a[] = {{".2", u3wi_la_mmul, c3n}, {}}; +static u3j_core _139_non__la_core_d[] = + { { "add-rays", 7, _139_non__lagoon_add_a, 0, no_hashes }, + { "sub-rays", 7, _139_non__lagoon_sub_a, 0, no_hashes }, + { "mul-rays", 7, _139_non__lagoon_mul_a, 0, no_hashes }, + { "div-rays", 7, _139_non__lagoon_div_a, 0, no_hashes }, + { "mod-rays", 7, _139_non__lagoon_mod_a, 0, no_hashes }, + { "add-scal", 7, _139_non__lagoon_adds_a, 0, no_hashes }, + { "sub-scal", 7, _139_non__lagoon_subs_a, 0, no_hashes }, + { "mul-scal", 7, _139_non__lagoon_muls_a, 0, no_hashes }, + { "div-scal", 7, _139_non__lagoon_divs_a, 0, no_hashes }, + { "mod-scal", 7, _139_non__lagoon_mods_a, 0, no_hashes }, + { "dot", 7, _139_non__lagoon_dot_a, 0, no_hashes }, + { "transpose",7, _139_non__lagoon_trans_a, 0, no_hashes }, + { "cumsum", 7, _139_non__lagoon_cumsum_a, 0, no_hashes }, + { "argmin", 7, _139_non__lagoon_argmin_a, 0, no_hashes }, + { "argmax", 7, _139_non__lagoon_argmax_a, 0, no_hashes }, + { "ravel", 7, _139_non__lagoon_ravel_a, 0, no_hashes }, + { "min", 7, _139_non__lagoon_min_a, 0, no_hashes }, + { "max", 7, _139_non__lagoon_max_a, 0, no_hashes }, + { "linspace", 7, _139_non__lagoon_linspace_a, 0, no_hashes }, + { "range", 7, _139_non__lagoon_range_a, 0, no_hashes }, + { "abs", 7, _139_non__lagoon_abs_a, 0, no_hashes }, + { "gth", 7, _139_non__lagoon_gth_a, 0, no_hashes }, + { "gte", 7, _139_non__lagoon_gte_a, 0, no_hashes }, + { "lth", 7, _139_non__lagoon_lth_a, 0, no_hashes }, + { "lte", 7, _139_non__lagoon_lte_a, 0, no_hashes }, + { "diag", 7, _139_non__lagoon_diag_a, 0, no_hashes }, + { "trace", 7, _139_non__lagoon_trace_a,0, no_hashes }, + { "mmul", 7, _139_non__lagoon_mmul_a, 0, no_hashes }, {} }; -static u3j_core _139_hex__lagoon_d[] = - { { "la-core", 7, 0, _139_hex__la_core_d, no_hashes }, +static u3j_core _139_non__lagoon_d[] = + { { "la-core", 7, 0, _139_non__la_core_d, no_hashes }, + {} + }; + +static u3j_core _139_non_d[] = + { { "lagoon", 6, 0, _139_non__lagoon_d, no_hashes }, {} }; static u3j_core _139_hex_d[] = -{ { "lore", 63, _140_hex_lore_a, 0, no_hashes }, +{ { "sep", 7, 0, _139_non_d, no_hashes }, + + { "lore", 63, _140_hex_lore_a, 0, no_hashes }, { "leer", 63, _140_hex_leer_a, 0, no_hashes }, { "loss", 63, _140_hex_loss_a, 0, no_hashes }, { "lune", 127, _140_hex_lune_a, 0, no_hashes }, @@ -2235,7 +2240,6 @@ static u3j_core _139_hex_d[] = { "mimes", 31, 0, _140_hex_mimes_d, no_hashes }, { "json", 31, 0, _139_hex_json_d, no_hashes }, - { "lagoon", 31, 0, _139_hex__lagoon_d, no_hashes }, {} }; diff --git a/pkg/noun/jets/w.h b/pkg/noun/jets/w.h index e7976ba3b7..01948a851e 100644 --- a/pkg/noun/jets/w.h +++ b/pkg/noun/jets/w.h @@ -331,34 +331,34 @@ u3_noun u3wfu_repo(u3_noun); u3_noun u3wfu_rest(u3_noun); - u3_noun u3wf_la_add(u3_noun); - u3_noun u3wf_la_sub(u3_noun); - u3_noun u3wf_la_mul(u3_noun); - u3_noun u3wf_la_div(u3_noun); - u3_noun u3wf_la_mod(u3_noun); - u3_noun u3wf_la_adds(u3_noun); - u3_noun u3wf_la_subs(u3_noun); - u3_noun u3wf_la_muls(u3_noun); - u3_noun u3wf_la_divs(u3_noun); - u3_noun u3wf_la_mods(u3_noun); - u3_noun u3wf_la_dot(u3_noun); - u3_noun u3wf_la_diag(u3_noun); - u3_noun u3wf_la_transpose(u3_noun); - u3_noun u3wf_la_cumsum(u3_noun); - u3_noun u3wf_la_argmin(u3_noun); - u3_noun u3wf_la_argmax(u3_noun); - u3_noun u3wf_la_ravel(u3_noun); - u3_noun u3wf_la_min(u3_noun); - u3_noun u3wf_la_max(u3_noun); - u3_noun u3wf_la_linspace(u3_noun); - u3_noun u3wf_la_range(u3_noun); - u3_noun u3wf_la_abs(u3_noun); - u3_noun u3wf_la_gth(u3_noun); - u3_noun u3wf_la_gte(u3_noun); - u3_noun u3wf_la_lth(u3_noun); - u3_noun u3wf_la_lte(u3_noun); - - u3_noun u3wf_la_trace(u3_noun); - u3_noun u3wf_la_mmul(u3_noun); + u3_noun u3wi_la_add(u3_noun); + u3_noun u3wi_la_sub(u3_noun); + u3_noun u3wi_la_mul(u3_noun); + u3_noun u3wi_la_div(u3_noun); + u3_noun u3wi_la_mod(u3_noun); + u3_noun u3wi_la_adds(u3_noun); + u3_noun u3wi_la_subs(u3_noun); + u3_noun u3wi_la_muls(u3_noun); + u3_noun u3wi_la_divs(u3_noun); + u3_noun u3wi_la_mods(u3_noun); + u3_noun u3wi_la_dot(u3_noun); + u3_noun u3wi_la_diag(u3_noun); + u3_noun u3wi_la_transpose(u3_noun); + u3_noun u3wi_la_cumsum(u3_noun); + u3_noun u3wi_la_argmin(u3_noun); + u3_noun u3wi_la_argmax(u3_noun); + u3_noun u3wi_la_ravel(u3_noun); + u3_noun u3wi_la_min(u3_noun); + u3_noun u3wi_la_max(u3_noun); + u3_noun u3wi_la_linspace(u3_noun); + u3_noun u3wi_la_range(u3_noun); + u3_noun u3wi_la_abs(u3_noun); + u3_noun u3wi_la_gth(u3_noun); + u3_noun u3wi_la_gte(u3_noun); + u3_noun u3wi_la_lth(u3_noun); + u3_noun u3wi_la_lte(u3_noun); + + u3_noun u3wi_la_trace(u3_noun); + u3_noun u3wi_la_mmul(u3_noun); #endif /* ifndef U3_JETS_W_H */ From fe485ea43e4106840c90c3692ffb30947302e98c Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 24 Apr 2024 10:57:47 -0500 Subject: [PATCH 26/41] Attempt ARM build with same files as x86_64. --- bazel/third_party/softfloat/softfloat.BUILD | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/bazel/third_party/softfloat/softfloat.BUILD b/bazel/third_party/softfloat/softfloat.BUILD index 0d57ba5d7f..09f7b21370 100644 --- a/bazel/third_party/softfloat/softfloat.BUILD +++ b/bazel/third_party/softfloat/softfloat.BUILD @@ -235,6 +235,25 @@ cc_library( "source/extF80M_eq_signaling.c", "source/extF80M_le_quiet.c", "source/extF80M_lt_quiet.c", + "source/f128_to_f16.c", + "source/f128_to_f32.c", + "source/f128_to_extF80.c", + "source/f128_to_f64.c", + "source/f128_roundToInt.c", + "source/f128_add.c", + "source/f128_sub.c", + "source/f128_mul.c", + "source/f128_mulAdd.c", + "source/f128_div.c", + "source/f128_rem.c", + "source/f128_sqrt.c", + "source/f128_eq.c", + "source/f128_le.c", + "source/f128_lt.c", + "source/f128_eq_signaling.c", + "source/f128_le_quiet.c", + "source/f128_lt_quiet.c", + "source/f128_isSignalingNaN.c", "source/f128M_to_ui32.c", "source/f128M_to_ui64.c", "source/f128M_to_i32.c", From bf13e281cdc815bcfe5bfef9538f3a6104435efb Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 24 Apr 2024 11:04:24 -0500 Subject: [PATCH 27/41] Attempt ARM build with same files as x86_64. --- bazel/third_party/softfloat/softfloat.BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bazel/third_party/softfloat/softfloat.BUILD b/bazel/third_party/softfloat/softfloat.BUILD index 09f7b21370..cf01a9dcf2 100644 --- a/bazel/third_party/softfloat/softfloat.BUILD +++ b/bazel/third_party/softfloat/softfloat.BUILD @@ -20,6 +20,9 @@ cc_library( srcs = [ # See `OBJS_PRIMITIVES` in `build/Linux-ARM-VFPv2-GCC/Makefile` in the # `softfloat` repo. + "source/s_eq128.c", + "source/s_le128.c", + "source/s_lt128.c", "source/s_compare96M.c", "source/s_compare128M.c", "source/s_shortShiftLeft64To96M.c", From 4832fbe06cf1c7201090f7b5fe28b3d763b5f4a5 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 24 Apr 2024 11:46:51 -0500 Subject: [PATCH 28/41] Swap out SoftFloat fns. --- bazel/third_party/softfloat/softfloat.BUILD | 22 ----------------- pkg/noun/jets/i/lagoon.c | 26 +++++++++++++-------- 2 files changed, 16 insertions(+), 32 deletions(-) diff --git a/bazel/third_party/softfloat/softfloat.BUILD b/bazel/third_party/softfloat/softfloat.BUILD index cf01a9dcf2..0d57ba5d7f 100644 --- a/bazel/third_party/softfloat/softfloat.BUILD +++ b/bazel/third_party/softfloat/softfloat.BUILD @@ -20,9 +20,6 @@ cc_library( srcs = [ # See `OBJS_PRIMITIVES` in `build/Linux-ARM-VFPv2-GCC/Makefile` in the # `softfloat` repo. - "source/s_eq128.c", - "source/s_le128.c", - "source/s_lt128.c", "source/s_compare96M.c", "source/s_compare128M.c", "source/s_shortShiftLeft64To96M.c", @@ -238,25 +235,6 @@ cc_library( "source/extF80M_eq_signaling.c", "source/extF80M_le_quiet.c", "source/extF80M_lt_quiet.c", - "source/f128_to_f16.c", - "source/f128_to_f32.c", - "source/f128_to_extF80.c", - "source/f128_to_f64.c", - "source/f128_roundToInt.c", - "source/f128_add.c", - "source/f128_sub.c", - "source/f128_mul.c", - "source/f128_mulAdd.c", - "source/f128_div.c", - "source/f128_rem.c", - "source/f128_sqrt.c", - "source/f128_eq.c", - "source/f128_le.c", - "source/f128_lt.c", - "source/f128_eq_signaling.c", - "source/f128_le_quiet.c", - "source/f128_lt_quiet.c", - "source/f128_isSignalingNaN.c", "source/f128M_to_ui32.c", "source/f128M_to_ui64.c", "source/f128M_to_i32.c", diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index be9b8f6791..f0906cd20a 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -385,7 +385,7 @@ // Perform division x/n float16_t div_result16 = f16_div(x_val16, y_val16); // Compute floor of the division result - int64_t floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); + c3_ds floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); float16_t floor_float16 = i64_to_f16(floor_result16); // Multiply n by floor(x/n) float16_t mult_result16 = f16_mul(y_val16, floor_float16); @@ -401,7 +401,7 @@ // Perform division x/n float32_t div_result32 = f32_div(x_val32, y_val32); // Compute floor of the division result - int64_t floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); + c3_ds floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); float32_t floor_float32 = i64_to_f32(floor_result32); // Multiply n by floor(x/n) float32_t mult_result32 = f32_mul(y_val32, floor_float32); @@ -417,7 +417,7 @@ // Perform division x/n float64_t div_result64 = f64_div(x_val64, y_val64); // Compute floor of the division result - int64_t floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); + c3_ds floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); float64_t floor_float64 = i64_to_f64(floor_result64); // Multiply n by floor(x/n) float64_t mult_result64 = f64_mul(y_val64, floor_float64); @@ -434,7 +434,7 @@ float128_t div_result128; f128M_div((float128_t*)&x_val128, (float128_t*)&y_val128, (float128_t*)&div_result128); // Compute floor of the division result - int64_t floor_result128 = f128_to_i64(div_result128, softfloat_round_minMag, false); + c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false); float128_t floor_float128 = i64_to_f128(floor_result128); // Multiply n by floor(x/n) float128_t mult_result128; @@ -1586,7 +1586,7 @@ // Perform division x/n float16_t div_result16 = f16_mul(in16, x_val16); // Compute floor of the division result - int64_t floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); + c3_ds floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); float16_t floor_float16 = i64_to_f16(floor_result16); // Multiply n by floor(x/n) float16_t mult_result16 = f16_mul(n16, floor_float16); @@ -1604,7 +1604,7 @@ // Perform division x/n float32_t div_result32 = f32_mul((float32_t)in32, (float32_t)x_val32); // Compute floor of the division result - int64_t floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); + c3_ds floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); float32_t floor_float32 = i64_to_f32(floor_result32); // Multiply n by floor(x/n) float32_t mult_result32 = f32_mul(n32, floor_float32); @@ -1622,7 +1622,7 @@ // Perform division x/n float64_t div_result64 = f64_mul(in64, x_val64); // Compute floor of the division result - int64_t floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); + c3_ds floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); float64_t floor_float64 = i64_to_f64(floor_result64); // Multiply n by floor(x/n) float64_t mult_result64 = f64_mul(n64, floor_float64); @@ -1641,7 +1641,7 @@ float128_t div_result128; f128M_mul((float128_t*)&in128, (float128_t*)&x_val128, (float128_t*)&div_result128); // Compute floor of the division result - int64_t floor_result128 = f128_to_i64(div_result128, softfloat_round_minMag, false); + c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false); float128_t floor_float128 = i64_to_f128(floor_result128); // Multiply n by floor(x/n) float128_t mult_result128; @@ -1992,7 +1992,10 @@ u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); u3r_bytes(0, 16, (c3_y*)&(interval128.v), d); - c3_d n128 = f128_to_i64(f128_div(f128_sub(b128, a128), interval128), softfloat_round_minMag, false); + float128_t tmp; + f128M_sub(&b128, &a128, &tmp); + f128M_div(&tmp, &interval128, &interval128); + c3_d n128 = f128M_to_i64(&tmp, softfloat_round_minMag, false); c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n128+1)*16+1)*sizeof(c3_y)); float128_t i128; for (c3_d i = 1; i < n128; i++) { @@ -3220,7 +3223,10 @@ u3r_bytes(0, 16, (c3_y*)&a_, a); u3r_bytes(0, 16, (c3_y*)&b_, b); u3r_bytes(0, 16, (c3_y*)&d_, d); - n_ = f128_to_i64(f128_div(f128_sub((float128_t){b_}, (float128_t){a_}), (float128_t){d_}), softfloat_round_minMag, false); + float128_t tmp; + f128M_sub((float128_t*){&b_}, (float128_t*){&a_}, &tmp); + f128M_div(&tmp, (float128_t*){&d_}, &tmp); + n_ = f128M_to_i64(&tmp, softfloat_round_minMag, false); break; } u3_noun n = u3i_chub(n_+1); From 04a33f757043b75ffd46236b646a7782c044eec9 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 24 Apr 2024 11:51:00 -0500 Subject: [PATCH 29/41] Swap out SoftFloat fns. --- pkg/noun/jets/i/lagoon.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index f0906cd20a..b5fe75f298 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -435,7 +435,8 @@ f128M_div((float128_t*)&x_val128, (float128_t*)&y_val128, (float128_t*)&div_result128); // Compute floor of the division result c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false); - float128_t floor_float128 = i64_to_f128(floor_result128); + float128_t floor_float128; + i64_to_f128M(floor_result128, &floor_float128); // Multiply n by floor(x/n) float128_t mult_result128; f128M_mul(((float128_t*)&y_val128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); @@ -1642,7 +1643,8 @@ f128M_mul((float128_t*)&in128, (float128_t*)&x_val128, (float128_t*)&div_result128); // Compute floor of the division result c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false); - float128_t floor_float128 = i64_to_f128(floor_result128); + float128_t floor_float128; + i64_to_f128M(floor_result128, &floor_float128); // Multiply n by floor(x/n) float128_t mult_result128; f128M_mul(((float128_t*)&n128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); From 8b76ac5ab77292080312b1516afb74e3117075b2 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 24 Apr 2024 11:55:46 -0500 Subject: [PATCH 30/41] Swap out SoftFloat fns. --- pkg/noun/jets/i/lagoon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index b5fe75f298..502cc6af2e 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -590,7 +590,7 @@ case 7: ; float128_t min_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { - if(f128_lt(((float128_t*)x_bytes)[i], min_val128)) { + if(f128M_lt(&(((float128_t*)x_bytes)[i]), &min_val128)) { min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); min_idx = (len_x - i - 1); } From fdd6951712135afbcafab3afa94cd2e654f183ed Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 24 Apr 2024 11:59:29 -0500 Subject: [PATCH 31/41] Swap out SoftFloat fns. --- pkg/noun/jets/i/lagoon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 502cc6af2e..2bfe195b3c 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -663,7 +663,7 @@ case 7: ; float128_t max_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { - if(f128_gt(((float128_t*)x_bytes)[i], max_val128)) { + if(f128M_gt(&(((float128_t*)x_bytes)[i]), &max_val128)) { max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); max_idx = (len_x - i - 1); } From d078a314d24464fd3d51e604f8a96609a2576cf5 Mon Sep 17 00:00:00 2001 From: Pyry Kovanen Date: Thu, 25 Apr 2024 19:37:33 +0300 Subject: [PATCH 32/41] ci: install pkg-config in linux-aarch64 --- .github/workflows/shared.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/shared.yml b/.github/workflows/shared.yml index aee47884a5..bd8044e580 100644 --- a/.github/workflows/shared.yml +++ b/.github/workflows/shared.yml @@ -113,7 +113,7 @@ jobs: run: | case "${{ matrix.target }}" in "linux-aarch64") - sudo apt-get -y install autoconf-archive + sudo apt-get -y install autoconf-archive pkg-config bazel run //bazel/toolchain:aarch64-linux-musl-gcc ;; "linux-x86_64") From 8b3122b0ab9ea09aa91d00b1163d2bb046e69905 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Fri, 24 May 2024 15:11:13 -0500 Subject: [PATCH 33/41] Update for SoftBLAS version. --- WORKSPACE.bazel | 2 +- pkg/noun/jets/i/lagoon.c | 224 +++++++++++++++++++++------------------ pkg/noun/jets/tree.c | 2 +- 3 files changed, 124 insertions(+), 104 deletions(-) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index c904a63eff..ec6a637114 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -297,7 +297,7 @@ versioned_http_archive( strip_prefix = "SoftBLAS-{version}", # sha256 = "", url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", - version = "7d05697aea5363dcf5f877a9c8b464e9c352d3d4", + version = "29daa2f2fd0ad5070e405ad287f3623804f8fc67", ) versioned_http_archive( diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 2bfe195b3c..460058ecc7 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -98,6 +98,16 @@ return dims; } +/* soft check on u3_none return from q jet +*/ + static inline u3_noun _soft_run(u3_noun a) + { + if (u3_none == a) { + u3m_bail(c3__fail); + } + return a; + } + /* add - axpy = 1*x+y */ u3_noun @@ -125,7 +135,8 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x+1, y_bytes, y_data); + u3r_bytes(0, syz_x, y_bytes, y_data); + y_bytes[syz_x] = 0x1; // Switch on the block size. switch (u3x_atom(bloq)) { @@ -183,8 +194,9 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x+1, y_bytes, y_data); - + u3r_bytes(0, syz_x, y_bytes, y_data); + y_bytes[syz_x] = 0x1; + // Switch on the block size. switch (u3x_atom(bloq)) { case 4: @@ -242,7 +254,8 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x+1, y_bytes, y_data); + u3r_bytes(0, syz_x, y_bytes, y_data); + y_bytes[syz_x] = 0x1; // Switch on the block size. switch (u3x_atom(bloq)) { @@ -308,7 +321,8 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x+1, y_bytes, y_data); + u3r_bytes(0, syz_x, y_bytes, y_data); + y_bytes[syz_x] = 0x1; // Switch on the block size. switch (u3x_atom(bloq)) { @@ -374,7 +388,8 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x+1, y_bytes, y_data); + u3r_bytes(0, syz_x, y_bytes, y_data); + y_bytes[syz_x] = 0x1; // Switch on the block size. switch (u3x_atom(bloq)) { @@ -557,44 +572,44 @@ // Switch on the block size. switch (u3x_atom(bloq)) { - case 4: ; + case 4: { float16_t min_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f16_lt(((float16_t*)x_bytes)[i], min_val16)) { min_val16 = ((float16_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } - } + } } break; - case 5: ; + case 5: { float32_t min_val32 = ((float32_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f32_lt(((float32_t*)x_bytes)[i], min_val32)) { min_val32 = ((float32_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } - } + } } break; - case 6: ; + case 6: { float64_t min_val64 = ((float64_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f64_lt(((float64_t*)x_bytes)[i], min_val64)) { min_val64 = ((float64_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } - } + } } break; - case 7: ; + case 7: { float128_t min_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f128M_lt(&(((float128_t*)x_bytes)[i]), &min_val128)) { min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); min_idx = (len_x - i - 1); } - } + } } break; } @@ -630,44 +645,44 @@ // Switch on the block size. switch (u3x_atom(bloq)) { - case 4: ; + case 4: { float16_t max_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f16_gt(((float16_t*)x_bytes)[i], max_val16)) { max_val16 = ((float16_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } - } + } } break; - case 5: ; + case 5: { float32_t max_val32 = ((float32_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f32_gt(((float32_t*)x_bytes)[i], max_val32)) { max_val32 = ((float32_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } - } + } } break; - case 6: ; + case 6: { float64_t max_val64 = ((float64_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f64_gt(((float64_t*)x_bytes)[i], max_val64)) { max_val64 = ((float64_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } - } + } } break; - case 7: ; + case 7: { float128_t max_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f128M_gt(&(((float128_t*)x_bytes)[i]), &max_val128)) { max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); max_idx = (len_x - i - 1); } - } + } } break; } @@ -1744,12 +1759,12 @@ } // Assert length of dims is 2. if (u3qb_lent(shape) != 2) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } // Unpack shape into an array of dimensions. c3_d *dims = _get_dims(shape); if (dims[0] != dims[1]) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } // Unpack the data as a byte array. We assume total length < 2**64. @@ -1791,7 +1806,7 @@ { // Assert length of dims is 2. if (u3qb_lent(shape) != 2) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } // Unpack shape into an array of dimensions. c3_d *dims = _get_dims(shape); @@ -2044,10 +2059,15 @@ c3_d Nb= u3x_atom(u3h(y_shape)); c3_d P = u3x_atom(u3h(u3t(y_shape))); + // Fence on valid bloq size. + if (bloq < 4 || bloq > 7) { + return u3_none; + } + if ((u3_nul != u3t(u3t(x_shape))) || (u3_nul != u3t(u3t(y_shape))) || (Na != Nb)) { - return u3m_bail(c3__exit); + return u3_none; } c3_d N = Na; @@ -2133,7 +2153,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2157,12 +2177,12 @@ // fxp does not need to match here so no check ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_add_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_add_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2188,7 +2208,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2212,12 +2232,12 @@ // fxp does not need to match here so no check ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_sub_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_sub_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2243,7 +2263,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2267,12 +2287,12 @@ // fxp does not need to match here so no check ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_mul_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_mul_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2298,7 +2318,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2322,12 +2342,12 @@ // fxp does not need to match here so no check ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_div_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_div_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2353,7 +2373,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2377,12 +2397,12 @@ // fxp does not need to match here so no check ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_mod_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_mod_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2404,7 +2424,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2417,12 +2437,12 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_cumsum_real(x_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_cumsum_real(x_data, x_shape, x_bloq)); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2444,7 +2464,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2455,7 +2475,7 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; @@ -2482,7 +2502,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2492,11 +2512,11 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_ravel_real(x_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_ravel_real(x_data, x_shape, x_bloq)); // (list @) return r_data; @@ -2519,7 +2539,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2530,7 +2550,7 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; @@ -2557,7 +2577,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2568,11 +2588,11 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_min_real(x_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_min_real(x_data, x_shape, x_bloq)); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2594,7 +2614,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2605,11 +2625,11 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_max_real(x_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_max_real(x_data, x_shape, x_bloq)); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2631,7 +2651,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2642,11 +2662,11 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_abs_real(x_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_abs_real(x_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2672,7 +2692,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2695,11 +2715,11 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_gth_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_gth_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2725,7 +2745,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2748,11 +2768,11 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_gte_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_gte_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2778,7 +2798,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2801,11 +2821,11 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_lth_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_lth_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2831,7 +2851,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2854,11 +2874,11 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_lte_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_lte_real(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2882,7 +2902,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2894,7 +2914,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_adds_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_adds_real(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2917,7 +2937,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2929,7 +2949,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_subs_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_subs_real(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2952,7 +2972,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2964,7 +2984,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_muls_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_muls_real(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2987,7 +3007,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2999,7 +3019,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_divs_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_divs_real(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3022,7 +3042,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -3034,7 +3054,7 @@ switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_mods_real(x_data, n, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_mods_real(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3059,7 +3079,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -3083,12 +3103,12 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_dot_real(x_data, y_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_dot_real(x_data, y_data, x_shape, x_bloq)); c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); @@ -3111,7 +3131,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -3122,7 +3142,7 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun r_data = u3qi_la_transpose(x_data, x_shape, x_bloq); return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); @@ -3142,7 +3162,7 @@ u3x_sam_7, &n, 0)) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -3154,12 +3174,12 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_linspace_real(a, b, n, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_linspace_real(a, b, n, x_bloq)); x_shape = u3nt(u3x_atom(n), 0x1, u3_nul); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); @@ -3182,7 +3202,7 @@ u3x_sam_7, &d, 0)) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -3194,12 +3214,12 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_range_real(a, b, d, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_range_real(a, b, d, x_bloq)); c3_d a_, b_, d_; c3_ds n_; switch (x_bloq) { @@ -3254,7 +3274,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -3265,9 +3285,9 @@ c3n == u3ud(x_kind) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { - u3_noun r_data = u3qi_la_diag(x_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_diag(x_data, x_shape, x_bloq)); c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } @@ -3286,7 +3306,7 @@ 0) || c3n == u3ud(x_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; if ( c3n == u3r_mean(x_meta, @@ -3297,11 +3317,11 @@ 0) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: ; - u3_noun r_data = u3qi_la_trace_real(x_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_trace_real(x_data, x_shape, x_bloq)); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3327,7 +3347,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -3346,12 +3366,12 @@ // fxp does not need to match so no check ) { - return u3m_bail(c3__exit); + u3m_bail(c3__exit); } else { switch (x_kind) { case c3__real: _set_rounding(rnd); - u3_noun r_data = u3qi_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq)); // result is already [meta data] return r_data; diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index c9118c9a4b..48ef0db9f6 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2220,7 +2220,7 @@ static u3j_core _139_non_d[] = }; static u3j_core _139_hex_d[] = -{ { "sep", 7, 0, _139_non_d, no_hashes }, +{ { "non", 7, 0, _139_non_d, no_hashes }, { "lore", 63, _140_hex_lore_a, 0, no_hashes }, { "leer", 63, _140_hex_leer_a, 0, no_hashes }, From 529f399a1ac1535767eb22ce0fe4328a2cc68b70 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Mon, 22 Jul 2024 16:33:39 -0500 Subject: [PATCH 34/41] Post with i754 mote. --- pkg/c3/motes.h | 1 + pkg/noun/jets/i/lagoon.c | 468 +++++++++++++++++++-------------------- pkg/noun/jets/q.h | 52 ++--- 3 files changed, 258 insertions(+), 263 deletions(-) diff --git a/pkg/c3/motes.h b/pkg/c3/motes.h index 277761185f..6c1217dc2f 100644 --- a/pkg/c3/motes.h +++ b/pkg/c3/motes.h @@ -613,6 +613,7 @@ # define c3__is c3_s2('i','s') # define c3__item c3_s4('i','t','e','m') # define c3__ix c3_s2('i','x') +# define c3__i754 c3_s4('i','7','5','4') # define c3__j c3_s1('j') # define c3__jack c3_s4('j','a','c','k') # define c3__jam c3_s3('j','a','m') diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 460058ecc7..9ead70d14a 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -30,6 +30,16 @@ c3_d c[2]; }; +/* soft check on u3_none return from q jet +*/ + static inline u3_noun _soft_run(u3_noun a) + { + if (u3_none == a) { + u3m_bail(c3__fail); + } + return a; + } + // $?(%n %u %d %z %a) static inline void _set_rounding(c3_w a) @@ -98,20 +108,10 @@ return dims; } -/* soft check on u3_none return from q jet -*/ - static inline u3_noun _soft_run(u3_noun a) - { - if (u3_none == a) { - u3m_bail(c3__fail); - } - return a; - } - /* add - axpy = 1*x+y */ u3_noun - u3qi_la_add_real(u3_noun x_data, + u3qi_la_add_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq @@ -121,6 +121,7 @@ if (bloq < 4 || bloq > 7) { return u3_none; } + fprintf(stderr, ">>> u3qi_la_add\n\r"); // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units @@ -135,8 +136,7 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x, y_bytes, y_data); - y_bytes[syz_x] = 0x1; + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { @@ -170,7 +170,7 @@ /* sub - axpy = -1*y+x */ u3_noun - u3qi_la_sub_real(u3_noun x_data, + u3qi_la_sub_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq @@ -194,9 +194,8 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x, y_bytes, y_data); - y_bytes[syz_x] = 0x1; - + u3r_bytes(0, syz_x+1, y_bytes, y_data); + // Switch on the block size. switch (u3x_atom(bloq)) { case 4: @@ -231,7 +230,7 @@ elementwise multiplication */ u3_noun - u3qi_la_mul_real(u3_noun x_data, + u3qi_la_mul_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -254,8 +253,7 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x, y_bytes, y_data); - y_bytes[syz_x] = 0x1; + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { @@ -298,7 +296,7 @@ elementwise division */ u3_noun - u3qi_la_div_real(u3_noun x_data, + u3qi_la_div_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -321,8 +319,7 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x, y_bytes, y_data); - y_bytes[syz_x] = 0x1; + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { @@ -365,7 +362,7 @@ remainder after division */ u3_noun - u3qi_la_mod_real(u3_noun x_data, + u3qi_la_mod_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -388,8 +385,7 @@ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); - u3r_bytes(0, syz_x, y_bytes, y_data); - y_bytes[syz_x] = 0x1; + u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { @@ -474,7 +470,7 @@ /* cumsum - x[0] + x[1] + ... x[n] */ u3_noun - u3qi_la_cumsum_real(u3_noun x_data, + u3qi_la_cumsum_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -498,7 +494,7 @@ // Switch on the block size. switch (u3x_atom(bloq)) { - case 4: ; + case 4: { float16_t sum16[2]; sum16[0] = (float16_t){SB_REAL16_ZERO}; for (c3_d i = 0; i < len_x; i++) { @@ -506,9 +502,9 @@ } sum16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)sum16); - break; + break;} - case 5: ; + case 5: { float32_t sum32[2]; sum32[0] = (float32_t){SB_REAL32_ZERO}; for (c3_d i = 0; i < len_x; i++) { @@ -516,9 +512,9 @@ } sum32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)sum32); - break; + break;} - case 6: ; + case 6: { float64_t sum64[2]; sum64[0] = (float64_t){SB_REAL64_ZERO}; for (c3_d i = 0; i < len_x; i++) { @@ -526,9 +522,9 @@ } sum64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)sum64); - break; + break;} - case 7: ; + case 7: { float128_t sum128[2]; sum128[0] = (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; for (c3_d i = 0; i < len_x; i++) { @@ -536,7 +532,7 @@ } sum128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)sum128); - break; + break;} } // Clean up and return. @@ -548,7 +544,7 @@ /* argmin - argmin(x) */ u3_noun - u3qi_la_argmin_real(u3_noun x_data, + u3qi_la_argmin_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -579,8 +575,8 @@ min_val16 = ((float16_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } - } } - break; + } + break;} case 5: { float32_t min_val32 = ((float32_t*)x_bytes)[0]; @@ -589,8 +585,8 @@ min_val32 = ((float32_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } - } } - break; + } + break;} case 6: { float64_t min_val64 = ((float64_t*)x_bytes)[0]; @@ -599,8 +595,8 @@ min_val64 = ((float64_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } - } } - break; + } + break;} case 7: { float128_t min_val128 = ((float128_t*)x_bytes)[0]; @@ -609,8 +605,8 @@ min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); min_idx = (len_x - i - 1); } - } } - break; + } + break;} } u3_noun r_data = u3i_chub(min_idx); @@ -621,7 +617,7 @@ /* argmax - argmax(x) */ u3_noun - u3qi_la_argmax_real(u3_noun x_data, + u3qi_la_argmax_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -652,8 +648,8 @@ max_val16 = ((float16_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } - } } - break; + } + break;} case 5: { float32_t max_val32 = ((float32_t*)x_bytes)[0]; @@ -662,8 +658,8 @@ max_val32 = ((float32_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } - } } - break; + } + break;} case 6: { float64_t max_val64 = ((float64_t*)x_bytes)[0]; @@ -672,8 +668,8 @@ max_val64 = ((float64_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } - } } - break; + } + break;} case 7: { float128_t max_val128 = ((float128_t*)x_bytes)[0]; @@ -682,8 +678,8 @@ max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); max_idx = (len_x - i - 1); } - } } - break; + } + break;} } u3_noun r_data = u3i_chub(max_idx); @@ -695,7 +691,7 @@ entire nd-array busted out as a linear list */ u3_noun - u3qi_la_ravel_real(u3_noun x_data, + u3qi_la_ravel_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -758,7 +754,7 @@ /* min - min(x,y) */ u3_noun - u3qi_la_min_real(u3_noun x_data, + u3qi_la_min_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -782,7 +778,7 @@ // Switch on the block size. switch (u3x_atom(bloq)) { - case 4: ; + case 4: { float16_t min_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { min_val16 = f16_min(min_val16, ((float16_t*)x_bytes)[i]); @@ -791,9 +787,9 @@ r16[0] = min_val16; r16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); - break; + break;} - case 5: ; + case 5: { float32_t min_val32 = ((float32_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { min_val32 = f32_min(min_val32, ((float32_t*)x_bytes)[i]); @@ -802,9 +798,9 @@ r32[0] = min_val32; r32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); - break; + break;} - case 6: ; + case 6: { float64_t min_val64 = ((float64_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { min_val64 = f64_min(min_val64, ((float64_t*)x_bytes)[i]); @@ -813,9 +809,9 @@ r64[0] = min_val64; r64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); - break; + break;} - case 7: ; + case 7: { float128_t min_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); @@ -824,7 +820,7 @@ r128[0] = min_val128; r128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); - break; + break;} } // Clean up and return. @@ -836,7 +832,7 @@ /* max - max(x,y) */ u3_noun - u3qi_la_max_real(u3_noun x_data, + u3qi_la_max_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -860,7 +856,7 @@ // Switch on the block size. switch (u3x_atom(bloq)) { - case 4: ; + case 4: { float16_t max_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { max_val16 = f16_max(max_val16, ((float16_t*)x_bytes)[i]); @@ -869,9 +865,9 @@ r16[0] = max_val16; r16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); - break; + break;} - case 5: ; + case 5: { float32_t max_val32 = ((float32_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { max_val32 = f32_max(max_val32, ((float32_t*)x_bytes)[i]); @@ -880,9 +876,9 @@ r32[0] = max_val32; r32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); - break; + break;} - case 6: ; + case 6: { float64_t max_val64 = ((float64_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { max_val64 = f64_max(max_val64, ((float64_t*)x_bytes)[i]); @@ -891,9 +887,9 @@ r64[0] = max_val64; r64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); - break; + break;} - case 7: ; + case 7: { float128_t max_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); @@ -902,7 +898,7 @@ r128[0] = max_val128; r128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); - break; + break;} } // Clean up and return. @@ -914,7 +910,7 @@ /* abs - |x| */ u3_noun - u3qi_la_abs_real(u3_noun x_data, + u3qi_la_abs_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { @@ -973,7 +969,7 @@ /* gth - x > y */ u3_noun - u3qi_la_gth_real(u3_noun x_data, + u3qi_la_gth_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1046,7 +1042,7 @@ /* gte - x > y */ u3_noun - u3qi_la_gte_real(u3_noun x_data, + u3qi_la_gte_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1119,7 +1115,7 @@ /* lth - x > y */ u3_noun - u3qi_la_lth_real(u3_noun x_data, + u3qi_la_lth_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1192,7 +1188,7 @@ /* lte - x > y */ u3_noun - u3qi_la_lte_real(u3_noun x_data, + u3qi_la_lte_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1265,7 +1261,7 @@ /* adds - axpy = 1*x+[n] */ u3_noun - u3qi_la_adds_real(u3_noun x_data, + u3qi_la_adds_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1347,7 +1343,7 @@ /* subs - axpy = -1*[n]+x */ u3_noun - u3qi_la_subs_real(u3_noun x_data, + u3qi_la_subs_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1430,7 +1426,7 @@ elementwise multiplication */ u3_noun - u3qi_la_muls_real(u3_noun x_data, + u3qi_la_muls_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1493,7 +1489,7 @@ elementwise division */ u3_noun - u3qi_la_divs_real(u3_noun x_data, + u3qi_la_divs_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1564,7 +1560,7 @@ remainder after scalar division */ u3_noun - u3qi_la_mods_real(u3_noun x_data, + u3qi_la_mods_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) @@ -1681,7 +1677,7 @@ /* dot - ?dot = x · y */ u3_noun - u3qi_la_dot_real(u3_noun x_data, + u3qi_la_dot_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) @@ -1710,33 +1706,33 @@ // Switch on the block size. switch (u3x_atom(bloq)) { - case 4: ; + case 4: { float16_t r16[2]; r16[0] = hdot(len_x, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); r16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); - break; + break;} - case 5: ; + case 5: { float32_t r32[2]; r32[0] = sdot(len_x, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); r32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); - break; + break;} - case 6: ; + case 6: { float64_t r64[2]; r64[0] = ddot(len_x, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); r64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); - break; + break;} - case 7: ; + case 7: { float128_t r128[2]; r128[0] = qdot(len_x, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); r128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); - break; + break;} } // Clean up and return. @@ -1759,12 +1755,12 @@ } // Assert length of dims is 2. if (u3qb_lent(shape) != 2) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } // Unpack shape into an array of dimensions. c3_d *dims = _get_dims(shape); if (dims[0] != dims[1]) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } // Unpack the data as a byte array. We assume total length < 2**64. @@ -1806,7 +1802,7 @@ { // Assert length of dims is 2. if (u3qb_lent(shape) != 2) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } // Unpack shape into an array of dimensions. c3_d *dims = _get_dims(shape); @@ -1845,7 +1841,7 @@ /* linspace - [a a+(b-a)/n ... b] */ u3_noun - u3qi_la_linspace_real(u3_noun a, + u3qi_la_linspace_i754(u3_noun a, u3_noun b, u3_noun n, u3_noun bloq) @@ -1858,7 +1854,7 @@ u3_noun r_data; switch (u3x_atom(bloq)) { - case 4: ; + case 4: { float16_t a16, b16; u3r_bytes(0, 2, (c3_y*)&(a16.v), a); u3r_bytes(0, 2, (c3_y*)&(b16.v), b); @@ -1873,9 +1869,9 @@ x_bytes16[(n+1)*2] = 0x1; // pin head r_data = u3i_bytes(((n+1)*2+1)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); - break; + break;} - case 5: ; + case 5: { float32_t a32, b32; u3r_bytes(0, 4, (c3_y*)&(a32.v), a); u3r_bytes(0, 4, (c3_y*)&(b32.v), b); @@ -1890,9 +1886,9 @@ x_bytes32[(n+1)*4] = 0x1; // pin head r_data = u3i_bytes(((n+1)*4+1)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); - break; + break;} - case 6: ; + case 6: { float64_t a64, b64; u3r_bytes(0, 8, (c3_y*)&(a64.v), a); u3r_bytes(0, 8, (c3_y*)&(b64.v), b); @@ -1907,9 +1903,9 @@ x_bytes64[(n+1)*8] = 0x1; // pin head r_data = u3i_bytes(((n+1)*8+1)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); - break; + break;} - case 7: ; + case 7: { float128_t a128, b128; u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); @@ -1931,7 +1927,7 @@ x_bytes128[(n+1)*16] = 0x1; // pin head r_data = u3i_bytes(((n+1)*16+1)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); - break; + break;} } return r_data; @@ -1940,7 +1936,7 @@ /* range - [a a+d ... b] */ u3_noun - u3qi_la_range_real(u3_noun a, + u3qi_la_range_i754(u3_noun a, u3_noun b, u3_noun d, u3_noun bloq) @@ -1953,7 +1949,7 @@ u3_noun r_data; switch (u3x_atom(bloq)) { - case 4: ; + case 4: { float16_t a16, b16, interval16; u3r_bytes(0, 2, (c3_y*)&(a16.v), a); u3r_bytes(0, 2, (c3_y*)&(b16.v), b); @@ -1968,9 +1964,9 @@ x_bytes16[(n16+1)*2] = 0x1; // pin head r_data = u3i_bytes(((n16+1)*2+1)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); - break; + break;} - case 5: ; + case 5: { float32_t a32, b32, interval32; u3r_bytes(0, 4, (c3_y*)&(a32.v), a); u3r_bytes(0, 4, (c3_y*)&(b32.v), b); @@ -1985,9 +1981,9 @@ x_bytes32[(n32+1)*4] = 0x1; // pin head r_data = u3i_bytes(((n32+1)*4+1)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); - break; + break;} - case 6: ; + case 6: { float64_t a64, b64, interval64; u3r_bytes(0, 8, (c3_y*)&(a64.v), a); u3r_bytes(0, 8, (c3_y*)&(b64.v), b); @@ -2002,9 +1998,9 @@ x_bytes64[(n64+1)*8] = 0x1; // pin head r_data = u3i_bytes(((n64+1)*8+1)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); - break; + break;} - case 7: ; + case 7: { float128_t a128, b128, interval128; u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); @@ -2025,7 +2021,7 @@ x_bytes128[(n128+1)*16] = 0x1; // pin head r_data = u3i_bytes(((n128+1)*16+1)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); - break; + break;} } return r_data; @@ -2034,20 +2030,20 @@ /* trace - tr(x) */ u3_noun - u3qi_la_trace_real(u3_noun x_data, + u3qi_la_trace_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { u3_noun d_data = u3qi_la_diag(x_data, shape, bloq); c3_d len_x0 = _get_dims(shape)[0]; - u3_noun r_data = u3qi_la_dot_real(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), u3k(bloq)); + u3_noun r_data = u3qi_la_dot_i754(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), u3k(bloq)); return r_data; } /* mmul */ u3_noun - u3qi_la_mmul_real(u3_noun x_data, + u3qi_la_mmul_i754(u3_noun x_data, u3_noun y_data, u3_noun x_shape, u3_noun y_shape, @@ -2059,15 +2055,10 @@ c3_d Nb= u3x_atom(u3h(y_shape)); c3_d P = u3x_atom(u3h(u3t(y_shape))); - // Fence on valid bloq size. - if (bloq < 4 || bloq > 7) { - return u3_none; - } - if ((u3_nul != u3t(u3t(x_shape))) || (u3_nul != u3t(u3t(y_shape))) || (Na != Nb)) { - return u3_none; + return u3m_bail(c3__exit); } c3_d N = Na; @@ -2134,7 +2125,7 @@ u3a_free(y_bytes); u3a_free(r_bytes); - return u3nc(u3nq(u3nt(M_, P_, u3_nul), u3k(bloq), c3__real, u3_nul), r_data); + return u3nc(u3nq(u3nt(M_, P_, u3_nul), u3k(bloq), c3__i754, u3_nul), r_data); } u3_noun @@ -2143,6 +2134,7 @@ // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; + fprintf(stderr, "> u3wi_la_add\n\r"); if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, @@ -2153,7 +2145,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2177,12 +2169,14 @@ // fxp does not need to match here so no check ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { + fprintf(stderr, "> u3wi_la_add\n\r"); switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_add_real(x_data, y_data, x_shape, x_bloq)); + fprintf(stderr, ">> u3wi_la_add\n\r"); + u3_noun r_data = _soft_run(u3qi_la_add_i754(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2208,7 +2202,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2232,12 +2226,12 @@ // fxp does not need to match here so no check ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_sub_real(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_sub_i754(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2263,7 +2257,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2287,12 +2281,12 @@ // fxp does not need to match here so no check ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_mul_real(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_mul_i754(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2318,7 +2312,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2342,12 +2336,12 @@ // fxp does not need to match here so no check ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_div_real(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_div_i754(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2373,7 +2367,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2397,12 +2391,12 @@ // fxp does not need to match here so no check ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_mod_real(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_mod_i754(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2424,7 +2418,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2437,12 +2431,12 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_cumsum_real(x_data, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_cumsum_i754(x_data, x_shape, x_bloq)); return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2464,7 +2458,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2475,13 +2469,13 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = u3qi_la_argmin_real(x_data, x_shape, x_bloq); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_argmin_i754(x_data, x_shape, x_bloq)); // bare atom (@ index) - return r_data; + return r_data;} default: return u3_none; @@ -2502,7 +2496,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2512,13 +2506,13 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_ravel_real(x_data, x_shape, x_bloq)); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_ravel_i754(x_data, x_shape, x_bloq)); // (list @) - return r_data; + return r_data;} default: return u3_none; @@ -2539,7 +2533,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2550,13 +2544,13 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = u3qi_la_argmax_real(x_data, x_shape, x_bloq); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_argmax_i754(x_data, x_shape, x_bloq)); // bare atom (@ index) - return r_data; + return r_data;} default: return u3_none; @@ -2577,7 +2571,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2588,12 +2582,12 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_min_real(x_data, x_shape, x_bloq)); - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_min_i754(x_data, x_shape, x_bloq)); + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: return u3_none; @@ -2614,7 +2608,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2625,12 +2619,12 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_max_real(x_data, x_shape, x_bloq)); - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_max_i754(x_data, x_shape, x_bloq)); + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: return u3_none; @@ -2651,7 +2645,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -2662,12 +2656,12 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_abs_real(x_data, x_shape, x_bloq)); - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_abs_i754(x_data, x_shape, x_bloq)); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: return u3_none; @@ -2692,7 +2686,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2715,12 +2709,12 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_gth_real(x_data, y_data, x_shape, x_bloq)); - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_gth_i754(x_data, y_data, x_shape, x_bloq)); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: return u3_none; @@ -2745,7 +2739,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2768,12 +2762,12 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_gte_real(x_data, y_data, x_shape, x_bloq)); - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_gte_i754(x_data, y_data, x_shape, x_bloq)); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: return u3_none; @@ -2798,7 +2792,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2821,12 +2815,12 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_lth_real(x_data, y_data, x_shape, x_bloq)); - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_lth_i754(x_data, y_data, x_shape, x_bloq)); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: return u3_none; @@ -2851,7 +2845,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -2874,12 +2868,12 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_lte_real(x_data, y_data, x_shape, x_bloq)); - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_lte_i754(x_data, y_data, x_shape, x_bloq)); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: return u3_none; @@ -2902,7 +2896,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2912,9 +2906,9 @@ x_fxp = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_adds_real(x_data, n, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_adds_i754(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2937,7 +2931,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2947,9 +2941,9 @@ x_fxp = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_subs_real(x_data, n, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_subs_i754(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2972,7 +2966,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -2982,9 +2976,9 @@ x_fxp = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_muls_real(x_data, n, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_muls_i754(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3007,7 +3001,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -3017,9 +3011,9 @@ x_fxp = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_divs_real(x_data, n, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_divs_i754(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3042,7 +3036,7 @@ c3n == u3ud(x_data) || c3n == u3ud(n) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, rnd; @@ -3052,9 +3046,9 @@ x_fxp = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_mods_real(x_data, n, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_mods_i754(x_data, n, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3079,7 +3073,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -3103,12 +3097,12 @@ c3n == u3r_sing(x_fxp, y_fxp) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_dot_real(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_dot_i754(x_data, y_data, x_shape, x_bloq)); c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); @@ -3131,7 +3125,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -3142,9 +3136,9 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { - u3_noun r_data = u3qi_la_transpose(x_data, x_shape, x_bloq); + u3_noun r_data = _soft_run(u3qi_la_transpose(x_data, x_shape, x_bloq)); return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } } @@ -3162,7 +3156,7 @@ u3x_sam_7, &n, 0)) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -3174,12 +3168,12 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_linspace_real(a, b, n, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_linspace_i754(a, b, n, x_bloq)); x_shape = u3nt(u3x_atom(n), 0x1, u3_nul); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); @@ -3202,7 +3196,7 @@ u3x_sam_7, &d, 0)) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -3214,12 +3208,12 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_range_real(a, b, d, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_range_i754(a, b, d, x_bloq)); c3_d a_, b_, d_; c3_ds n_; switch (x_bloq) { @@ -3274,7 +3268,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; x_shape = u3h(x_meta); // 2 @@ -3285,7 +3279,7 @@ c3n == u3ud(x_kind) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun r_data = _soft_run(u3qi_la_diag(x_data, x_shape, x_bloq)); c3_d len_x0 = _get_dims(x_shape)[0]; @@ -3306,7 +3300,7 @@ 0) || c3n == u3ud(x_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp; if ( c3n == u3r_mean(x_meta, @@ -3317,12 +3311,12 @@ 0) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: ; - u3_noun r_data = _soft_run(u3qi_la_trace_real(x_data, x_shape, x_bloq)); - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + case c3__i754: { + u3_noun r_data = _soft_run(u3qi_la_trace_i754(x_data, x_shape, x_bloq)); + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: return u3_none; @@ -3347,7 +3341,7 @@ c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_fxp, y_shape, y_bloq, y_kind, y_fxp, @@ -3366,12 +3360,12 @@ // fxp does not need to match so no check ) { - u3m_bail(c3__exit); + return u3m_bail(c3__exit); } else { switch (x_kind) { - case c3__real: + case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_mmul_real(x_data, y_data, x_shape, y_shape, x_bloq)); + u3_noun r_data = _soft_run(u3qi_la_mmul_i754(x_data, y_data, x_shape, y_shape, x_bloq)); // result is already [meta data] return r_data; diff --git a/pkg/noun/jets/q.h b/pkg/noun/jets/q.h index bb366844e2..720cd8a2de 100644 --- a/pkg/noun/jets/q.h +++ b/pkg/noun/jets/q.h @@ -245,34 +245,34 @@ u3_noun u3qfp_nepo(u3_noun, u3_noun); u3_noun u3qfp_rake(u3_noun); - u3_noun u3qi_la_add_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_sub_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_mul_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_div_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_mod_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_adds_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_subs_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_muls_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_divs_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_mods_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_dot_real(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_add_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_sub_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mul_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_div_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mod_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_adds_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_subs_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_muls_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_divs_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mods_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_dot_i754(u3_noun, u3_noun, u3_noun, u3_noun); u3_noun u3qi_la_diag(u3_noun, u3_noun, u3_noun); u3_noun u3qi_la_transpose(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_cumsum_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_argmin_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_argmax_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_ravel_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_min_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_max_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_linspace_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_range_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_abs_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_gth_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_gte_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_lth_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_lte_real(u3_noun, u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_trace_real(u3_noun, u3_noun, u3_noun); - u3_noun u3qi_la_mmul_real(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_cumsum_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_argmin_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_argmax_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_ravel_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_min_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_max_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_linspace_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_range_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_abs_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_gth_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_gte_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_lth_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_lte_i754(u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_trace_i754(u3_noun, u3_noun, u3_noun); + u3_noun u3qi_la_mmul_i754(u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); # define u3qfu_van_fan 28 # define u3qfu_van_rib 58 From b80e99a3906b2f85a25398424997744866cc8c08 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Tue, 23 Jul 2024 10:01:31 -0500 Subject: [PATCH 35/41] WIP jet hint call stack correct --- pkg/noun/jets/i/lagoon.c | 7 +++---- pkg/noun/jets/tree.c | 7 +------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 9ead70d14a..6bf53d4a1e 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -121,7 +121,6 @@ if (bloq < 4 || bloq > 7) { return u3_none; } - fprintf(stderr, ">>> u3qi_la_add\n\r"); // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units @@ -2134,7 +2133,6 @@ // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; - fprintf(stderr, "> u3wi_la_add\n\r"); if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, @@ -2159,6 +2157,7 @@ y_kind = u3h(u3t(u3t(y_meta))); // 14 y_fxp = u3t(u3t(u3t(y_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 + fprintf(stderr, "> u3wi_la_add\r\n"); if ( c3n == u3ud(x_bloq) || c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || @@ -2171,15 +2170,15 @@ { return u3m_bail(c3__exit); } else { - fprintf(stderr, "> u3wi_la_add\n\r"); + fprintf(stderr, "x_bloq: %x\r\n", x_kind); switch (x_kind) { case c3__i754: _set_rounding(rnd); - fprintf(stderr, ">> u3wi_la_add\n\r"); u3_noun r_data = _soft_run(u3qi_la_add_i754(x_data, y_data, x_shape, x_bloq)); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: + fprintf(stderr, "default\r\n"); return u3_none; } } diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 8ee3037fae..7b6224ba97 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2185,13 +2185,8 @@ static u3j_core _139_non__la_core_d[] = {} }; -static u3j_core _139_non__lagoon_d[] = - { { "la-core", 7, 0, _139_non__la_core_d, no_hashes }, - {} - }; - static u3j_core _139_non_d[] = - { { "lagoon", 6, 0, _139_non__lagoon_d, no_hashes }, + { { "lagoon", 7, 0, _139_non__la_core_d, no_hashes }, {} }; From 439fdf335a1ace4496796e702bd8eb169718e5c3 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Tue, 23 Jul 2024 10:57:41 -0500 Subject: [PATCH 36/41] WIP debugging u3_none path --- pkg/noun/jets/i/lagoon.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 6bf53d4a1e..eb92dc447f 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -2157,7 +2157,6 @@ y_kind = u3h(u3t(u3t(y_meta))); // 14 y_fxp = u3t(u3t(u3t(y_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 - fprintf(stderr, "> u3wi_la_add\r\n"); if ( c3n == u3ud(x_bloq) || c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || @@ -2170,7 +2169,7 @@ { return u3m_bail(c3__exit); } else { - fprintf(stderr, "x_bloq: %x\r\n", x_kind); + fprintf(stderr, "\r\nx_kind: %x\r\n", x_kind); switch (x_kind) { case c3__i754: _set_rounding(rnd); @@ -2178,7 +2177,7 @@ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: - fprintf(stderr, "default\r\n"); + fprintf(stderr, "uint\r\n"); return u3_none; } } @@ -3234,15 +3233,16 @@ u3r_bytes(0, 8, (c3_y*)&d_, d); n_ = f64_to_i64(f64_div(f64_sub((float64_t){b_}, (float64_t){a_}), (float64_t){d_}), softfloat_round_minMag, false); break; - case 7: - u3r_bytes(0, 16, (c3_y*)&a_, a); - u3r_bytes(0, 16, (c3_y*)&b_, b); - u3r_bytes(0, 16, (c3_y*)&d_, d); + case 7: { + c3_d a__[2], b__[2], d__[2]; + u3r_bytes(0, 16, (c3_y*)&a__, a); + u3r_bytes(0, 16, (c3_y*)&b__, b); + u3r_bytes(0, 16, (c3_y*)&d__, d); float128_t tmp; - f128M_sub((float128_t*){&b_}, (float128_t*){&a_}, &tmp); - f128M_div(&tmp, (float128_t*){&d_}, &tmp); + f128M_sub((float128_t*){&b__}, (float128_t*){&a__}, &tmp); + f128M_div(&tmp, (float128_t*){&d__}, &tmp); n_ = f128M_to_i64(&tmp, softfloat_round_minMag, false); - break; + break;} } u3_noun n = u3i_chub(n_+1); x_shape = u3nt(u3k(n), 0x1, u3_nul); From ab03d62d4d431f06608d6a17d65824458aeec613 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Fri, 26 Jul 2024 12:53:23 -0500 Subject: [PATCH 37/41] Turn off Nock verification. --- pkg/noun/jets/i/lagoon.c | 2 -- pkg/noun/jets/tree.c | 58 ++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index eb92dc447f..4466018f48 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -2169,7 +2169,6 @@ { return u3m_bail(c3__exit); } else { - fprintf(stderr, "\r\nx_kind: %x\r\n", x_kind); switch (x_kind) { case c3__i754: _set_rounding(rnd); @@ -2177,7 +2176,6 @@ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: - fprintf(stderr, "uint\r\n"); return u3_none; } } diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 7b6224ba97..19335435f7 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2125,34 +2125,34 @@ static u3j_core _139_hex_json_d[] = /* /lib jets in non core */ -static u3j_harm _139_non__lagoon_add_a[] = {{".2", u3wi_la_add, c3n}, {}}; -static u3j_harm _139_non__lagoon_sub_a[] = {{".2", u3wi_la_sub, c3n}, {}}; -static u3j_harm _139_non__lagoon_mul_a[] = {{".2", u3wi_la_mul, c3n}, {}}; -static u3j_harm _139_non__lagoon_div_a[] = {{".2", u3wi_la_div, c3n}, {}}; -static u3j_harm _139_non__lagoon_mod_a[] = {{".2", u3wi_la_mod, c3n}, {}}; -static u3j_harm _139_non__lagoon_adds_a[] = {{".2", u3wi_la_adds, c3n}, {}}; -static u3j_harm _139_non__lagoon_subs_a[] = {{".2", u3wi_la_subs, c3n}, {}}; -static u3j_harm _139_non__lagoon_muls_a[] = {{".2", u3wi_la_muls, c3n}, {}}; -static u3j_harm _139_non__lagoon_divs_a[] = {{".2", u3wi_la_divs, c3n}, {}}; -static u3j_harm _139_non__lagoon_mods_a[] = {{".2", u3wi_la_mods, c3n}, {}}; -static u3j_harm _139_non__lagoon_dot_a[] = {{".2", u3wi_la_dot, c3n}, {}}; -static u3j_harm _139_non__lagoon_trans_a[] ={{".2", u3wi_la_transpose, c3n}, {}}; -static u3j_harm _139_non__lagoon_cumsum_a[]={{".2", u3wi_la_cumsum, c3n}, {}}; -static u3j_harm _139_non__lagoon_argmin_a[]={{".2", u3wi_la_argmin, c3n}, {}}; -static u3j_harm _139_non__lagoon_argmax_a[]={{".2", u3wi_la_argmax, c3n}, {}}; -static u3j_harm _139_non__lagoon_ravel_a[]={{".2", u3wi_la_ravel, c3n}, {}}; -static u3j_harm _139_non__lagoon_min_a[] = {{".2", u3wi_la_min, c3n}, {}}; -static u3j_harm _139_non__lagoon_max_a[] = {{".2", u3wi_la_max, c3n}, {}}; -static u3j_harm _139_non__lagoon_linspace_a[]={{".2", u3wi_la_linspace, c3n}, {}}; -static u3j_harm _139_non__lagoon_range_a[]= {{".2", u3wi_la_range, c3n}, {}}; -static u3j_harm _139_non__lagoon_abs_a[] = {{".2", u3wi_la_abs, c3n}, {}}; -static u3j_harm _139_non__lagoon_gth_a[] = {{".2", u3wi_la_gth, c3n}, {}}; -static u3j_harm _139_non__lagoon_gte_a[] = {{".2", u3wi_la_gte, c3n}, {}}; -static u3j_harm _139_non__lagoon_lth_a[] = {{".2", u3wi_la_lth, c3n}, {}}; -static u3j_harm _139_non__lagoon_lte_a[] = {{".2", u3wi_la_lte, c3n}, {}}; -static u3j_harm _139_non__lagoon_diag_a[] = {{".2", u3wi_la_diag, c3n}, {}}; -static u3j_harm _139_non__lagoon_trace_a[]= {{".2", u3wi_la_trace, c3n}, {}}; -static u3j_harm _139_non__lagoon_mmul_a[] = {{".2", u3wi_la_mmul, c3n}, {}}; +static u3j_harm _139_non__lagoon_add_a[] = {{".2", u3wi_la_add}, {}}; +static u3j_harm _139_non__lagoon_sub_a[] = {{".2", u3wi_la_sub}, {}}; +static u3j_harm _139_non__lagoon_mul_a[] = {{".2", u3wi_la_mul}, {}}; +static u3j_harm _139_non__lagoon_div_a[] = {{".2", u3wi_la_div}, {}}; +static u3j_harm _139_non__lagoon_mod_a[] = {{".2", u3wi_la_mod}, {}}; +static u3j_harm _139_non__lagoon_adds_a[] = {{".2", u3wi_la_adds}, {}}; +static u3j_harm _139_non__lagoon_subs_a[] = {{".2", u3wi_la_subs}, {}}; +static u3j_harm _139_non__lagoon_muls_a[] = {{".2", u3wi_la_muls}, {}}; +static u3j_harm _139_non__lagoon_divs_a[] = {{".2", u3wi_la_divs}, {}}; +static u3j_harm _139_non__lagoon_mods_a[] = {{".2", u3wi_la_mods}, {}}; +static u3j_harm _139_non__lagoon_dot_a[] = {{".2", u3wi_la_dot}, {}}; +static u3j_harm _139_non__lagoon_trans_a[] ={{".2", u3wi_la_transpose}, {}}; +static u3j_harm _139_non__lagoon_cumsum_a[]={{".2", u3wi_la_cumsum}, {}}; +static u3j_harm _139_non__lagoon_argmin_a[]={{".2", u3wi_la_argmin}, {}}; +static u3j_harm _139_non__lagoon_argmax_a[]={{".2", u3wi_la_argmax}, {}}; +static u3j_harm _139_non__lagoon_ravel_a[]={{".2", u3wi_la_ravel}, {}}; +static u3j_harm _139_non__lagoon_min_a[] = {{".2", u3wi_la_min}, {}}; +static u3j_harm _139_non__lagoon_max_a[] = {{".2", u3wi_la_max}, {}}; +static u3j_harm _139_non__lagoon_linspace_a[]={{".2", u3wi_la_linspace}, {}}; +static u3j_harm _139_non__lagoon_range_a[]= {{".2", u3wi_la_range}, {}}; +static u3j_harm _139_non__lagoon_abs_a[] = {{".2", u3wi_la_abs}, {}}; +static u3j_harm _139_non__lagoon_gth_a[] = {{".2", u3wi_la_gth}, {}}; +static u3j_harm _139_non__lagoon_gte_a[] = {{".2", u3wi_la_gte}, {}}; +static u3j_harm _139_non__lagoon_lth_a[] = {{".2", u3wi_la_lth}, {}}; +static u3j_harm _139_non__lagoon_lte_a[] = {{".2", u3wi_la_lte}, {}}; +static u3j_harm _139_non__lagoon_diag_a[] = {{".2", u3wi_la_diag}, {}}; +static u3j_harm _139_non__lagoon_trace_a[]= {{".2", u3wi_la_trace}, {}}; +static u3j_harm _139_non__lagoon_mmul_a[] = {{".2", u3wi_la_mmul}, {}}; static u3j_core _139_non__la_core_d[] = { { "add-rays", 7, _139_non__lagoon_add_a, 0, no_hashes }, { "sub-rays", 7, _139_non__lagoon_sub_a, 0, no_hashes }, @@ -2169,7 +2169,7 @@ static u3j_core _139_non__la_core_d[] = { "cumsum", 7, _139_non__lagoon_cumsum_a, 0, no_hashes }, { "argmin", 7, _139_non__lagoon_argmin_a, 0, no_hashes }, { "argmax", 7, _139_non__lagoon_argmax_a, 0, no_hashes }, - { "ravel", 7, _139_non__lagoon_ravel_a, 0, no_hashes }, + // { "ravel", 7, _139_non__lagoon_ravel_a, 0, no_hashes }, { "min", 7, _139_non__lagoon_min_a, 0, no_hashes }, { "max", 7, _139_non__lagoon_max_a, 0, no_hashes }, { "linspace", 7, _139_non__lagoon_linspace_a, 0, no_hashes }, From a3e7b428eb2d0add6a03d3ed213582bc84e3ccd9 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Fri, 26 Jul 2024 19:44:23 -0500 Subject: [PATCH 38/41] Post working jets. --- pkg/noun/jets/i/lagoon.c | 138 +++++++++++++++++++++++++++------------ pkg/noun/jets/tree.c | 2 +- 2 files changed, 97 insertions(+), 43 deletions(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 4466018f48..01e8e9954a 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -108,6 +108,26 @@ return dims; } +/* check consistency of array shape and bloq size + |= =ray + ^- ? + .= (roll shape.meta.ray ^mul) + (dec (met bloq.meta.ray data.ray)) +*/ + static inline c3_o _check(u3_noun ray) + { + // Calculate expected size. + u3_atom shp = u3h(u3h(ray)); // (reported) shape of ray, +4 + u3_atom blq = u3h(u3t(u3h(ray))); // block size of ray, +10 + u3_atom sin = _get_length(shp); // calculated length of ray + + // Calculate actual size. + u3_atom len = u3r_met(blq, u3t(ray)); // length of ray + u3_atom dex = u3qa_dec(len); // decrement length b/c of pinned 1 + + return __(sin == dex); + } + /* add - axpy = 1*x+y */ u3_noun @@ -1859,14 +1879,14 @@ u3r_bytes(0, 2, (c3_y*)&(b16.v), b); float16_t span16 = f16_sub(b16, a16); float16_t interval16 = f16_div(span16, i32_to_f16(n-1)); - c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n+1)*2+1)*sizeof(c3_y)); - for (c3_d i = 1; i <= n; i++) { - ((float16_t*)x_bytes16)[n-i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); - } - ((float16_t*)x_bytes16)[n] = a16; - ((float16_t*)x_bytes16)[0] = b16; - x_bytes16[(n+1)*2] = 0x1; // pin head - r_data = u3i_bytes(((n+1)*2+1)*sizeof(c3_y), x_bytes16); + c3_y* x_bytes16 = (c3_y*)u3a_malloc((n*2+1)*sizeof(c3_y)); + for (c3_d i = 1; i < n-1; i++) { + ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); + } + ((float16_t*)x_bytes16)[0] = a16; + ((float16_t*)x_bytes16)[n-1] = b16; + x_bytes16[n*2] = 0x1; // pin head + r_data = u3i_bytes((n*2+1)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); break;} @@ -1876,14 +1896,14 @@ u3r_bytes(0, 4, (c3_y*)&(b32.v), b); float32_t span32 = f32_sub(b32, a32); float32_t interval32 = f32_div(span32, i32_to_f32(n-1)); - c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n+1)*4+1)*sizeof(c3_y)); - for (c3_d i = 1; i <= n; i++) { - ((float32_t*)x_bytes32)[n-i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); - } - ((float32_t*)x_bytes32)[n] = a32; - ((float32_t*)x_bytes32)[0] = b32; - x_bytes32[(n+1)*4] = 0x1; // pin head - r_data = u3i_bytes(((n+1)*4+1)*sizeof(c3_y), x_bytes32); + c3_y* x_bytes32 = (c3_y*)u3a_malloc((n*4+1)*sizeof(c3_y)); + for (c3_d i = 1; i < n-1; i++) { + ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); + } + ((float32_t*)x_bytes32)[0] = a32; + ((float32_t*)x_bytes32)[n-1] = b32; + x_bytes32[n*4] = 0x1; // pin head + r_data = u3i_bytes((n*4+1)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); break;} @@ -1893,14 +1913,14 @@ u3r_bytes(0, 8, (c3_y*)&(b64.v), b); float64_t span64 = f64_sub(b64, a64); float64_t interval64 = f64_div(span64, i32_to_f64(n-1)); - c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n+1)*8+1)*sizeof(c3_y)); - for (c3_d i = 1; i < n; i++) { - ((float64_t*)x_bytes64)[n-i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); - } - ((float64_t*)x_bytes64)[n] = a64; - ((float64_t*)x_bytes64)[0] = b64; - x_bytes64[(n+1)*8] = 0x1; // pin head - r_data = u3i_bytes(((n+1)*8+1)*sizeof(c3_y), x_bytes64); + c3_y* x_bytes64 = (c3_y*)u3a_malloc((n*8+1)*sizeof(c3_y)); + for (c3_d i = 1; i < n-1; i++) { + ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); + } + ((float64_t*)x_bytes64)[0] = a64; + ((float64_t*)x_bytes64)[n-1] = b64; + x_bytes64[n*8] = 0x1; // pin head + r_data = u3i_bytes((n*8+1)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); break;} @@ -1914,17 +1934,17 @@ float128_t n128; i32_to_f128M(n-1, &n128); f128M_div(&span128, &n128, &interval128); - c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n+1)*16+1)*sizeof(c3_y)); + c3_y* x_bytes128 = (c3_y*)u3a_malloc((n*16+1)*sizeof(c3_y)); float128_t i128; - for (c3_d i = 1; i < n; i++) { + for (c3_d i = 1; i < n-1; i++) { i32_to_f128M(i, &i128); - f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[n-i]); - f128M_add(&a128, &((float128_t*)x_bytes128)[n-i], &((float128_t*)x_bytes128)[n-i]); + f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]); + f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]); } - ((float128_t*)x_bytes128)[n] = a128; - ((float128_t*)x_bytes128)[0] = b128; - x_bytes128[(n+1)*16] = 0x1; // pin head - r_data = u3i_bytes(((n+1)*16+1)*sizeof(c3_y), x_bytes128); + ((float128_t*)x_bytes128)[0] = a128; + ((float128_t*)x_bytes128)[n-1] = b128; + x_bytes128[n*16] = 0x1; // pin head + r_data = u3i_bytes((n*16+1)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); break;} } @@ -2173,6 +2193,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_add_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2228,6 +2249,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_sub_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2283,6 +2305,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_mul_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2338,6 +2361,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_div_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2393,6 +2417,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_mod_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2424,7 +2449,8 @@ x_fxp = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); @@ -2433,6 +2459,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_cumsum_i754(x_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2462,7 +2489,8 @@ x_kind = u3h(u3t(u3t(x_meta))); // 14 x_fxp = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); @@ -2499,7 +2527,8 @@ x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); @@ -2537,7 +2566,8 @@ x_kind = u3h(u3t(u3t(x_meta))); // 14 x_fxp = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); @@ -2575,7 +2605,8 @@ x_kind = u3h(u3t(u3t(x_meta))); // 14 x_fxp = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); @@ -2583,6 +2614,7 @@ switch (x_kind) { case c3__i754: { u3_noun r_data = _soft_run(u3qi_la_min_i754(x_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: @@ -2612,7 +2644,8 @@ x_kind = u3h(u3t(u3t(x_meta))); // 14 x_fxp = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); @@ -2620,6 +2653,7 @@ switch (x_kind) { case c3__i754: { u3_noun r_data = _soft_run(u3qi_la_max_i754(x_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: @@ -2657,6 +2691,7 @@ switch (x_kind) { case c3__i754: { u3_noun r_data = _soft_run(u3qi_la_abs_i754(x_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: @@ -2710,6 +2745,7 @@ switch (x_kind) { case c3__i754: { u3_noun r_data = _soft_run(u3qi_la_gth_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: @@ -2763,6 +2799,7 @@ switch (x_kind) { case c3__i754: { u3_noun r_data = _soft_run(u3qi_la_gte_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: @@ -2816,6 +2853,7 @@ switch (x_kind) { case c3__i754: { u3_noun r_data = _soft_run(u3qi_la_lth_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: @@ -2869,6 +2907,7 @@ switch (x_kind) { case c3__i754: { u3_noun r_data = _soft_run(u3qi_la_lte_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: @@ -2905,6 +2944,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_adds_i754(x_data, n, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2940,6 +2980,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_subs_i754(x_data, n, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -2975,6 +3016,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_muls_i754(x_data, n, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3010,6 +3052,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_divs_i754(x_data, n, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3045,6 +3088,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_mods_i754(x_data, n, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3099,6 +3143,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_dot_i754(x_data, y_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); @@ -3129,12 +3174,14 @@ x_kind = u3h(u3t(u3t(x_meta))); // 14 x_fxp = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == _check(cor) ) { return u3m_bail(c3__exit); } else { u3_noun r_data = _soft_run(u3qi_la_transpose(x_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } } @@ -3170,7 +3217,8 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_linspace_i754(a, b, n, x_bloq)); - x_shape = u3nt(u3x_atom(n), 0x1, u3_nul); + if (r_data == u3_none) { return u3_none; } + x_shape = u3nc(u3x_atom(n), u3_nul); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: @@ -3210,6 +3258,7 @@ case c3__i754: _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_range_i754(a, b, d, x_bloq)); + if (r_data == u3_none) { return u3_none; } c3_d a_, b_, d_; c3_ds n_; switch (x_bloq) { @@ -3273,12 +3322,14 @@ x_kind = u3h(u3t(u3t(x_meta))); // 14 x_fxp = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == _check(cor) ) { return u3m_bail(c3__exit); } else { u3_noun r_data = _soft_run(u3qi_la_diag(x_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); } @@ -3313,6 +3364,7 @@ switch (x_kind) { case c3__i754: { u3_noun r_data = _soft_run(u3qi_la_trace_i754(x_data, x_shape, x_bloq)); + if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} default: @@ -3353,7 +3405,9 @@ y_fxp = u3t(u3t(u3t(y_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) + c3n == u3r_sing(x_kind, y_kind) || + c3n == _check(u3nc(x_meta, x_data)) || + c3n == _check(u3nc(y_meta, y_data)) // fxp does not need to match so no check ) { diff --git a/pkg/noun/jets/tree.c b/pkg/noun/jets/tree.c index 19335435f7..7e790b21f5 100644 --- a/pkg/noun/jets/tree.c +++ b/pkg/noun/jets/tree.c @@ -2169,7 +2169,7 @@ static u3j_core _139_non__la_core_d[] = { "cumsum", 7, _139_non__lagoon_cumsum_a, 0, no_hashes }, { "argmin", 7, _139_non__lagoon_argmin_a, 0, no_hashes }, { "argmax", 7, _139_non__lagoon_argmax_a, 0, no_hashes }, - // { "ravel", 7, _139_non__lagoon_ravel_a, 0, no_hashes }, + { "ravel", 7, _139_non__lagoon_ravel_a, 0, no_hashes }, { "min", 7, _139_non__lagoon_min_a, 0, no_hashes }, { "max", 7, _139_non__lagoon_max_a, 0, no_hashes }, { "linspace", 7, _139_non__lagoon_linspace_a, 0, no_hashes }, From 13e08dbcac9e51f25bff0e347aecaf8386afd27e Mon Sep 17 00:00:00 2001 From: Sigilante Date: Wed, 31 Jul 2024 12:40:42 -0500 Subject: [PATCH 39/41] Post corrected traversal order. --- pkg/noun/jets/i/lagoon.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 01e8e9954a..64447176fb 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -516,8 +516,8 @@ case 4: { float16_t sum16[2]; sum16[0] = (float16_t){SB_REAL16_ZERO}; - for (c3_d i = 0; i < len_x; i++) { - sum16[0] = f16_add(sum16[0], ((float16_t*)x_bytes)[i]); + for (c3_d i = len_x; i > 0; i--) { + sum16[0] = f16_add(sum16[0], ((float16_t*)x_bytes)[i-1]); } sum16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)sum16); @@ -526,8 +526,8 @@ case 5: { float32_t sum32[2]; sum32[0] = (float32_t){SB_REAL32_ZERO}; - for (c3_d i = 0; i < len_x; i++) { - sum32[0] = f32_add(sum32[0], ((float32_t*)x_bytes)[i]); + for (c3_d i = len_x; i > 0; i--) { + sum32[0] = f32_add(sum32[0], ((float32_t*)x_bytes)[i-1]); } sum32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)sum32); @@ -536,8 +536,8 @@ case 6: { float64_t sum64[2]; sum64[0] = (float64_t){SB_REAL64_ZERO}; - for (c3_d i = 0; i < len_x; i++) { - sum64[0] = f64_add(sum64[0], ((float64_t*)x_bytes)[i]); + for (c3_d i = len_x; i > 0; i--) { + sum64[0] = f64_add(sum64[0], ((float64_t*)x_bytes)[i-1]); } sum64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)sum64); @@ -546,8 +546,8 @@ case 7: { float128_t sum128[2]; sum128[0] = (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; - for (c3_d i = 0; i < len_x; i++) { - f128M_add(&(sum128[0]), &(((float128_t*)x_bytes)[i]), &(sum128[0])); + for (c3_d i = len_x; i > 0; i--) { + f128M_add(&(sum128[0]), &(((float128_t*)x_bytes)[i-1]), &(sum128[0])); } sum128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)sum128); @@ -1883,8 +1883,9 @@ for (c3_d i = 1; i < n-1; i++) { ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); } - ((float16_t*)x_bytes16)[0] = a16; + // Assign in reverse order so that n=1 case is correctly left-hand bound. ((float16_t*)x_bytes16)[n-1] = b16; + ((float16_t*)x_bytes16)[0] = a16; x_bytes16[n*2] = 0x1; // pin head r_data = u3i_bytes((n*2+1)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); @@ -1900,8 +1901,8 @@ for (c3_d i = 1; i < n-1; i++) { ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); } - ((float32_t*)x_bytes32)[0] = a32; ((float32_t*)x_bytes32)[n-1] = b32; + ((float32_t*)x_bytes32)[0] = a32; x_bytes32[n*4] = 0x1; // pin head r_data = u3i_bytes((n*4+1)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); @@ -1917,8 +1918,8 @@ for (c3_d i = 1; i < n-1; i++) { ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); } - ((float64_t*)x_bytes64)[0] = a64; ((float64_t*)x_bytes64)[n-1] = b64; + ((float64_t*)x_bytes64)[0] = a64; x_bytes64[n*8] = 0x1; // pin head r_data = u3i_bytes((n*8+1)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); @@ -1941,8 +1942,8 @@ f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]); f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]); } - ((float128_t*)x_bytes128)[0] = a128; ((float128_t*)x_bytes128)[n-1] = b128; + ((float128_t*)x_bytes128)[0] = a128; x_bytes128[n*16] = 0x1; // pin head r_data = u3i_bytes((n*16+1)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); @@ -2460,7 +2461,7 @@ _set_rounding(rnd); u3_noun r_data = _soft_run(u3qi_la_cumsum_i754(x_data, x_shape, x_bloq)); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3nc(0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); default: return u3_none; @@ -3208,7 +3209,9 @@ x_fxp = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(x_kind) + c3n == u3ud(x_kind) || + c3n == u3ud(n) || + (n < 1) // crash on zero size ) { return u3m_bail(c3__exit); From 840760d003b66602a640f45308610fca302e2847 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Thu, 1 Aug 2024 18:16:11 -0500 Subject: [PATCH 40/41] Post second review tweaks. --- pkg/noun/jets/i/lagoon.c | 374 +++++++++++++-------------------------- 1 file changed, 127 insertions(+), 247 deletions(-) diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 64447176fb..5da333ecf0 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -30,16 +30,6 @@ c3_d c[2]; }; -/* soft check on u3_none return from q jet -*/ - static inline u3_noun _soft_run(u3_noun a) - { - if (u3_none == a) { - u3m_bail(c3__fail); - } - return a; - } - // $?(%n %u %d %z %a) static inline void _set_rounding(c3_w a) @@ -2161,31 +2151,22 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) - // fxp does not need to match here so no check + c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); @@ -2193,9 +2174,9 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_add_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_add_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -2217,31 +2198,22 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) - // fxp does not need to match here so no check + c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); @@ -2249,9 +2221,9 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_sub_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_sub_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -2273,31 +2245,22 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) - // fxp does not need to match here so no check + c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); @@ -2305,9 +2268,9 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_mul_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_mul_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -2329,31 +2292,22 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) - // fxp does not need to match here so no check + c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); @@ -2361,9 +2315,9 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_div_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_div_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -2385,31 +2339,22 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) - // fxp does not need to match here so no check + c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); @@ -2417,9 +2362,9 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_mod_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_mod_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -2442,12 +2387,12 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || @@ -2459,9 +2404,9 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_cumsum_i754(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_cumsum_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3nc(0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3nc(0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -2484,11 +2429,10 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) @@ -2498,7 +2442,7 @@ } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_argmin_i754(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_argmin_i754(x_data, x_shape, x_bloq); // bare atom (@ index) return r_data;} @@ -2523,7 +2467,7 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 @@ -2536,7 +2480,7 @@ } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_ravel_i754(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_ravel_i754(x_data, x_shape, x_bloq); // (list @) return r_data;} @@ -2561,11 +2505,10 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) @@ -2575,7 +2518,7 @@ } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_argmax_i754(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_argmax_i754(x_data, x_shape, x_bloq); // bare atom (@ index) return r_data;} @@ -2600,11 +2543,11 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) @@ -2614,9 +2557,9 @@ } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_min_i754(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_min_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} default: return u3_none; @@ -2639,11 +2582,11 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) @@ -2653,9 +2596,9 @@ } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_max_i754(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_max_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} default: return u3_none; @@ -2678,11 +2621,11 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) @@ -2691,9 +2634,9 @@ } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_abs_i754(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_abs_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} default: return u3_none; @@ -2715,39 +2658,27 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, - rnd; + u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || - c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) || - c3n == u3r_sing(x_fxp, y_fxp) + c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_gth_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_gth_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} + return u3nc(u3k(x_meta), r_data);} default: return u3_none; @@ -2769,39 +2700,27 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, - rnd; + u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || - c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) || - c3n == u3r_sing(x_fxp, y_fxp) + c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_gte_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_gte_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} + return u3nc(u3k(x_meta), r_data);} default: return u3_none; @@ -2823,39 +2742,27 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, - rnd; + u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || - c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) || - c3n == u3r_sing(x_fxp, y_fxp) + c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_lth_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_lth_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} + return u3nc(u3k(x_meta), r_data);} default: return u3_none; @@ -2877,39 +2784,27 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, - rnd; + u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || - c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) || - c3n == u3r_sing(x_fxp, y_fxp) + c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_lte_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_lte_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} + return u3nc(u3k(x_meta), r_data);} default: return u3_none; @@ -2934,19 +2829,19 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_adds_i754(x_data, n, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_adds_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -2970,19 +2865,19 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_subs_i754(x_data, n, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_subs_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -3006,19 +2901,19 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_muls_i754(x_data, n, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_muls_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -3042,19 +2937,19 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_divs_i754(x_data, n, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_divs_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -3078,19 +2973,19 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_mods_i754(x_data, n, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_mods_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -3111,31 +3006,21 @@ u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || + c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 - y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || - c3n == u3ud(y_bloq) || - c3n == u3ud(x_kind) || - c3n == u3ud(y_kind) || - c3n == u3r_sing(x_shape, y_shape) || - c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) || - c3n == u3r_sing(x_fxp, y_fxp) + c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); @@ -3143,10 +3028,10 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_dot_i754(x_data, y_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_dot_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } c3_d len_x0 = _get_dims(x_shape)[0]; - return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -3169,11 +3054,11 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(cor) @@ -3181,9 +3066,9 @@ { return u3m_bail(c3__exit); } else { - u3_noun r_data = _soft_run(u3qi_la_transpose(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_transpose(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); } } } @@ -3202,11 +3087,11 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || @@ -3219,10 +3104,10 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_linspace_i754(a, b, n, x_bloq)); + u3_noun r_data = u3qi_la_linspace_i754(a, b, n, x_bloq); if (r_data == u3_none) { return u3_none; } x_shape = u3nc(u3x_atom(n), u3_nul); - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -3245,11 +3130,11 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) @@ -3260,7 +3145,7 @@ switch (x_kind) { case c3__i754: _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_range_i754(a, b, d, x_bloq)); + u3_noun r_data = u3qi_la_range_i754(a, b, d, x_bloq); if (r_data == u3_none) { return u3_none; } c3_d a_, b_, d_; c3_ds n_; @@ -3289,14 +3174,14 @@ u3r_bytes(0, 16, (c3_y*)&b__, b); u3r_bytes(0, 16, (c3_y*)&d__, d); float128_t tmp; - f128M_sub((float128_t*){&b__}, (float128_t*){&a__}, &tmp); - f128M_div(&tmp, (float128_t*){&d__}, &tmp); + f128M_sub((float128_t*)&b__, (float128_t*)&a__, &tmp); + f128M_div(&tmp, (float128_t*)&d__, &tmp); n_ = f128M_to_i64(&tmp, softfloat_round_minMag, false); break;} } u3_noun n = u3i_chub(n_+1); x_shape = u3nt(u3k(n), 0x1, u3_nul); - return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; @@ -3319,11 +3204,11 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(cor) @@ -3331,10 +3216,10 @@ { return u3m_bail(c3__exit); } else { - u3_noun r_data = _soft_run(u3qi_la_diag(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_diag(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } c3_d len_x0 = _get_dims(x_shape)[0]; - return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data); + return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); } } } @@ -3353,12 +3238,12 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp; + u3_noun x_shape, x_bloq, x_kind, x_tail; if ( c3n == u3r_mean(x_meta, 2, &x_shape, 6, &x_bloq, 14, &x_kind, - 15, &x_fxp, + 15, &x_tail, 0) ) { @@ -3366,9 +3251,9 @@ } else { switch (x_kind) { case c3__i754: { - u3_noun r_data = _soft_run(u3qi_la_trace_i754(x_data, x_shape, x_bloq)); + u3_noun r_data = u3qi_la_trace_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } - return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_fxp)), r_data);} + return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} default: return u3_none; @@ -3383,7 +3268,7 @@ // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; - + fprintf(stderr, "mmul 1\n"); if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, @@ -3395,31 +3280,26 @@ { return u3m_bail(c3__exit); } else { - u3_noun x_shape, x_bloq, x_kind, x_fxp, - y_shape, y_bloq, y_kind, y_fxp, + u3_noun x_shape, x_bloq, x_kind, x_tail, + y_shape, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 - x_fxp = u3t(u3t(u3t(x_meta))); // 15 + x_tail = u3t(u3t(u3t(x_meta))); // 15 y_shape = u3h(y_meta); // 2 - y_bloq = u3h(u3t(y_meta)); // 6 - y_kind = u3h(u3t(u3t(y_meta))); // 14 - y_fxp = u3t(u3t(u3t(y_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 - if ( c3n == u3r_sing(x_bloq, y_bloq) || - c3n == u3r_sing(x_kind, y_kind) || - c3n == _check(u3nc(x_meta, x_data)) || + if ( c3n == _check(u3nc(x_meta, x_data)) || c3n == _check(u3nc(y_meta, y_data)) - // fxp does not need to match so no check ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: + fprintf(stderr, "mmul 2\n"); _set_rounding(rnd); - u3_noun r_data = _soft_run(u3qi_la_mmul_i754(x_data, y_data, x_shape, y_shape, x_bloq)); + u3_noun r_data = u3qi_la_mmul_i754(x_data, y_data, x_shape, y_shape, x_bloq); // result is already [meta data] return r_data; From 3d462d3fa5354ef210493a9943a6b2e7b8ab8bc8 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Tue, 13 Aug 2024 12:03:51 -0500 Subject: [PATCH 41/41] Add corrected range jets. --- WORKSPACE.bazel | 2 +- pkg/noun/jets/i/lagoon.c | 85 +++++++++++++++++++++------------------- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index ec6a637114..37d6659125 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -297,7 +297,7 @@ versioned_http_archive( strip_prefix = "SoftBLAS-{version}", # sha256 = "", url = "https://github.com/urbit/SoftBLAS/archive/{version}.tar.gz", - version = "29daa2f2fd0ad5070e405ad287f3623804f8fc67", + version = "cbffb33f19ea02f9ffbd184d445123c57929ec53", ) versioned_http_archive( diff --git a/pkg/noun/jets/i/lagoon.c b/pkg/noun/jets/i/lagoon.c index 5da333ecf0..13c0f2a138 100644 --- a/pkg/noun/jets/i/lagoon.c +++ b/pkg/noun/jets/i/lagoon.c @@ -10,6 +10,11 @@ #include // for pow() #include +#define f16_ceil(a) f16_roundToInt( a, softfloat_round_max, false ) +#define f32_ceil(a) f32_roundToInt( a, softfloat_round_max, false ) +#define f64_ceil(a) f64_roundToInt( a, softfloat_round_max, false ) +#define f128M_ceil(a, b) f128M_roundToInt( a, softfloat_round_max, false, b ) + union half { float16_t h; c3_w c; @@ -1964,15 +1969,14 @@ u3r_bytes(0, 2, (c3_y*)&(a16.v), a); u3r_bytes(0, 2, (c3_y*)&(b16.v), b); u3r_bytes(0, 2, (c3_y*)&(interval16.v), d); - c3_d n16 = f16_to_i64(f16_div(f16_sub(b16, a16), interval16), softfloat_round_minMag, false); - c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n16+1)*2+1)*sizeof(c3_y)); - for (c3_d i = 1; i <= n16; i++) { - ((float16_t*)x_bytes16)[n16-i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); - } - ((float16_t*)x_bytes16)[n16] = a16; - // ((float16_t*)x_bytes16)[0] = b16; - x_bytes16[(n16+1)*2] = 0x1; // pin head - r_data = u3i_bytes(((n16+1)*2+1)*sizeof(c3_y), x_bytes16); + c3_d n16 = f16_to_i64(f16_ceil(f16_div(f16_sub(b16, a16), interval16)), softfloat_round_minMag, false); + c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n16+1)*2)*sizeof(c3_y)); + ((float16_t*)x_bytes16)[0] = a16; + for (c3_d i = 1; i < n16; i++) { + ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); + } + ((float16_t*)x_bytes16)[n16].v = 0x1; // pin head + r_data = u3i_bytes(((n16+1)*2)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); break;} @@ -1981,15 +1985,14 @@ u3r_bytes(0, 4, (c3_y*)&(a32.v), a); u3r_bytes(0, 4, (c3_y*)&(b32.v), b); u3r_bytes(0, 4, (c3_y*)&(interval32.v), d); - c3_d n32 = f32_to_i64(f32_div(f32_sub(b32, a32), interval32), softfloat_round_minMag, false); - c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n32+1)*4+1)*sizeof(c3_y)); - for (c3_d i = 1; i <= n32; i++) { - ((float32_t*)x_bytes32)[n32-i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); - } - ((float32_t*)x_bytes32)[n32] = a32; - // ((float32_t*)x_bytes32)[0] = b32; - x_bytes32[(n32+1)*4] = 0x1; // pin head - r_data = u3i_bytes(((n32+1)*4+1)*sizeof(c3_y), x_bytes32); + c3_d n32 = f32_to_i64(f32_ceil(f32_div(f32_sub(b32, a32), interval32)), softfloat_round_minMag, false); + c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n32+1)*4)*sizeof(c3_y)); + ((float32_t*)x_bytes32)[0] = a32; + for (c3_d i = 1; i < n32; i++) { + ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); + } + ((float32_t*)x_bytes32)[n32].v = 0x1; // pin head + r_data = u3i_bytes(((n32+1)*4)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); break;} @@ -1998,15 +2001,14 @@ u3r_bytes(0, 8, (c3_y*)&(a64.v), a); u3r_bytes(0, 8, (c3_y*)&(b64.v), b); u3r_bytes(0, 8, (c3_y*)&(interval64.v), d); - c3_d n64 = f64_to_i64(f64_div(f64_sub(b64, a64), interval64), softfloat_round_minMag, false); - c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n64+1)*8+1)*sizeof(c3_y)); + c3_d n64 = f64_to_i64(f64_ceil(f64_div(f64_sub(b64, a64), interval64)), softfloat_round_minMag, false); + c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n64+1)*8)*sizeof(c3_y)); + ((float64_t*)x_bytes64)[0] = a64; for (c3_d i = 1; i < n64; i++) { - ((float64_t*)x_bytes64)[n64-i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); + ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); } - ((float64_t*)x_bytes64)[n64] = a64; - // ((float64_t*)x_bytes64)[0] = b64; - x_bytes64[(n64+1)*8] = 0x1; // pin head - r_data = u3i_bytes(((n64+1)*8+1)*sizeof(c3_y), x_bytes64); + ((float64_t*)x_bytes64)[n64].v = 0x1; // pin head + r_data = u3i_bytes(((n64+1)*8)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); break;} @@ -2014,22 +2016,23 @@ float128_t a128, b128, interval128; u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); - u3r_bytes(0, 16, (c3_y*)&(interval128.v), d); + u3r_bytes(0, 16, (c3_y*)&(interval128.v[0]), d); float128_t tmp; f128M_sub(&b128, &a128, &tmp); - f128M_div(&tmp, &interval128, &interval128); + f128M_div(&tmp, &interval128, &tmp); + f128M_ceil(&tmp, &tmp); c3_d n128 = f128M_to_i64(&tmp, softfloat_round_minMag, false); - c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n128+1)*16+1)*sizeof(c3_y)); + c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n128+1)*16)*sizeof(c3_y)); float128_t i128; + ((float128_t*)x_bytes128)[0] = a128; for (c3_d i = 1; i < n128; i++) { i32_to_f128M(i, &i128); - f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[n128-i]); - f128M_add(&a128, &((float128_t*)x_bytes128)[n128-i], &((float128_t*)x_bytes128)[n128-i]); + f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]); + f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]); } - ((float128_t*)x_bytes128)[n128] = a128; - // ((float128_t*)x_bytes128)[0] = b128; - x_bytes128[(n128+1)*16] = 0x1; // pin head - r_data = u3i_bytes(((n128+1)*16+1)*sizeof(c3_y), x_bytes128); + ((float128_t*)x_bytes128)[n128].v[0] = 0x1; // pin head + ((float128_t*)x_bytes128)[n128].v[1] = 0x0; // pin head + r_data = u3i_bytes(((n128+1)*16)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); break;} } @@ -3154,19 +3157,19 @@ u3r_bytes(0, 2, (c3_y*)&a_, a); u3r_bytes(0, 2, (c3_y*)&b_, b); u3r_bytes(0, 2, (c3_y*)&d_, d); - n_ = f16_to_i64(f16_div(f16_sub((float16_t){b_}, (float16_t){a_}), (float16_t){d_}), softfloat_round_minMag, false); + n_ = f16_to_i64(f16_ceil(f16_div(f16_sub((float16_t){b_}, (float16_t){a_}), (float16_t){d_})), softfloat_round_minMag, false) - 1; break; case 5: u3r_bytes(0, 4, (c3_y*)&a_, a); u3r_bytes(0, 4, (c3_y*)&b_, b); u3r_bytes(0, 4, (c3_y*)&d_, d); - n_ = f32_to_i64(f32_div(f32_sub((float32_t){b_}, (float32_t){a_}), (float32_t){d_}), softfloat_round_minMag, false); + n_ = f32_to_i64(f32_ceil(f32_div(f32_sub((float32_t){b_}, (float32_t){a_}), (float32_t){d_})), softfloat_round_minMag, false) - 1; break; case 6: u3r_bytes(0, 8, (c3_y*)&a_, a); u3r_bytes(0, 8, (c3_y*)&b_, b); u3r_bytes(0, 8, (c3_y*)&d_, d); - n_ = f64_to_i64(f64_div(f64_sub((float64_t){b_}, (float64_t){a_}), (float64_t){d_}), softfloat_round_minMag, false); + n_ = f64_to_i64(f64_ceil(f64_div(f64_sub((float64_t){b_}, (float64_t){a_}), (float64_t){d_})), softfloat_round_minMag, false) - 1; break; case 7: { c3_d a__[2], b__[2], d__[2]; @@ -3176,11 +3179,12 @@ float128_t tmp; f128M_sub((float128_t*)&b__, (float128_t*)&a__, &tmp); f128M_div(&tmp, (float128_t*)&d__, &tmp); - n_ = f128M_to_i64(&tmp, softfloat_round_minMag, false); + f128M_ceil(&tmp, &tmp); + n_ = f128M_to_i64(&tmp, softfloat_round_minMag, false) - 1; break;} } u3_noun n = u3i_chub(n_+1); - x_shape = u3nt(u3k(n), 0x1, u3_nul); + x_shape = u3nc(u3k(n), u3_nul); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: @@ -3268,7 +3272,7 @@ // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; - fprintf(stderr, "mmul 1\n"); + if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, @@ -3297,7 +3301,6 @@ } else { switch (x_kind) { case c3__i754: - fprintf(stderr, "mmul 2\n"); _set_rounding(rnd); u3_noun r_data = u3qi_la_mmul_i754(x_data, y_data, x_shape, y_shape, x_bloq); // result is already [meta data]