From 20883fe9182492e28f01c57d4e7e7ed7a5a21bf0 Mon Sep 17 00:00:00 2001 From: Peter Taoussanis Date: Thu, 2 May 2024 10:00:52 +0200 Subject: [PATCH] [new] Improve data compatibility when updating Nippy versions When support is added for a new type in Nippy version X, it necessarily means that data containing that new type and frozen with Nippy version X is unthawable with Nippy versions < X. Earlier versions of Nippy will throw an exception on thawing affected data: \"Unrecognized type id (). Data frozen with newer Nippy version?\" This can present a challenge when updating to new versions of Nippy, e.g.: - Rolling updates could lead to old and new versions of Nippy temporarily co-existing. - Data written with new types could limit your ability to revert a Nippy update. There's no easy solution to this in general, but we CAN at least help reduce the burden related to CHANGES in core data types by rolling out changed types in 2 phases: 1. Nippy vX reads new (changed) type, writes old type 2. Nippy vX+1 writes new (changed) type When relevant, we can then warn users in the CHANGELOG to not leapfrog necessary version updates (e.g. Nippy vX -> Nippy vX+2). This commit bootstraps the new compatibility feature by initially targeting core type compatibility with Nippy v3.2.0 (2022-07-18). A future Nippy version will then target v3.4.0, with an appropriate CHANGELOG instruction to update in steps for environments that may involved rolling updates. --- src/taoensso/nippy.clj | 151 +++++++++++++++++++++++++++++----- test/taoensso/nippy_tests.clj | 22 +++-- 2 files changed, 147 insertions(+), 26 deletions(-) diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index c6dcadbf..413fd619 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -330,6 +330,55 @@ (comment (get public-types-spec 96)) +;;;; Type history +;; To help support release targeting, we keep track of when new type ids are added + +(comment + (set! *print-length* nil) + (vec (sort (keys type-ids))) + + (let [id-history ; { #{type-ids}} + {340 ; v3.4.0 (2024-04-30), added 2 + ;; New: map-entry meta-protocol-key + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 + 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 + 105 106 110 111 112 113 114 115} + + 330 ; v3.3.0 (2023-10-11), added 11 + ;; New: long-pos-sm long-pos-md long-pos-lg long-neg-sm long-neg-md long-neg-lg + ;; str-sm* vec-sm* set-sm* map-sm* sql-date + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 + 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 105 106 + 110 111 112 113 114 115} + + 320 ; v3.2.0 (2022-07-18), added none + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 + 81 82 83 84 85 86 90 91 100 101 102 105 106 110 111 112 113 114 115} + + 313 ; v3.1.3 (2022-06-23) + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 + 81 82 83 84 85 86 90 91 100 101 102 105 106 110 111 112 113 114 115} + + 300 ; v3.0.0 (2020-09-20), added 5 + ;; New: time-instant time-duration time-period kw-md sym-md + #{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 + 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 + 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 80 + 81 82 90 91 100 101 102 105 106 110 111 112 113 114 115}}] + + (defn diff [new-release old-release] + (vec (sort (clojure.set/difference (id-history new-release) (id-history old-release)))))) + + (diff 340 330)) + ;;;; Ns imports (for convenience of lib consumers) (enc/defaliases @@ -427,6 +476,47 @@ (thaw (freeze [1 1 0 1 1])))) +(let [;; Initially target compatibility with v3.2.0 (2020-07-18) + ;; Next bump will be to v3.4.0 (2024-04-30) + target-release + (enc/get-env {:as :edn, :default 320} + :taoensso.nippy.target-release) + + target>= + (fn [min-release] + (if target-release + (>= (long target-release) (long min-release)) + true))] + + (defmacro ^:private target-release< [min-release] (not (target>= min-release))) + (defmacro ^:private target-release>= + "Returns true iff `target-release` is nil or >= given `min-release`. + Used to help ease data migration for changes to core data types. + + When support is added for a new type in Nippy version X, it necessarily means + that data containing that new type and frozen with Nippy version X is unthawable + with Nippy versions < X. + + Earlier versions of Nippy will throw an exception on thawing affected data: + \"Unrecognized type id (). Data frozen with newer Nippy version?\" + + This can present a challenge when updating to new versions of Nippy, e.g.: + + - Rolling updates could lead to old and new versions of Nippy temporarily co-existing. + - Data written with new types could limit your ability to revert a Nippy update. + + There's no easy solution to this in general, but we CAN at least help reduce the + burden related to CHANGES in core data types by rolling out changed types in 2 phases: + + 1. Nippy vX reads new (changed) type, writes old type + 2. Nippy vX+1 writes new (changed) type + + When relevant, we can then warn users in the CHANGELOG to not leapfrog necessary version + updates (e.g. Nippy vX -> Nippy vX+2)." + [min-release] (target>= min-release))) + +(comment (macroexpand '(target-release>= 340))) + ;;;; Java Serializable config ;; Unfortunately quite a bit of complexity to do this safely @@ -662,9 +752,10 @@ (let [ba (.getBytes s StandardCharsets/UTF_8) len (alength ba)] (enc/cond - (sm-count?* len) (do (write-id out id-str-sm*) (write-sm-count* out len)) - (md-count? len) (do (write-id out id-str-md) (write-md-count out len)) - :else (do (write-id out id-str-lg) (write-lg-count out len))) + (and (target-release>= 330) (sm-count?* len)) (do (write-id out id-str-sm*) (write-sm-count* out len)) + (and (target-release< 330) (sm-count? len)) (do (write-id out id-str-sm_) (write-sm-count out len)) + (md-count? len) (do (write-id out id-str-md) (write-md-count out len)) + :else (do (write-id out id-str-lg) (write-lg-count out len))) (.write out ba 0 len)))) @@ -692,8 +783,26 @@ (.write out ba 0 len))) +(defn- write-long-legacy [^DataOutput out ^long n] + (enc/cond + (zero? n) (write-id out id-long-0) + (pos? n) + (enc/cond + (<= n Byte/MAX_VALUE) (do (write-id out id-long-sm_) (.writeByte out n)) + (<= n Short/MAX_VALUE) (do (write-id out id-long-md_) (.writeShort out n)) + (<= n Integer/MAX_VALUE) (do (write-id out id-long-lg_) (.writeInt out n)) + :else (do (write-id out id-long-xl) (.writeLong out n))) + + :else + (enc/cond + (>= n Byte/MIN_VALUE) (do (write-id out id-long-sm_) (.writeByte out n)) + (>= n Short/MIN_VALUE) (do (write-id out id-long-md_) (.writeShort out n)) + (>= n Integer/MIN_VALUE) (do (write-id out id-long-lg_) (.writeInt out n)) + :else (do (write-id out id-long-xl) (.writeLong out n))))) + (defn- write-long [^DataOutput out ^long n] (enc/cond + (target-release< 330) (write-long-legacy out n) (zero? n) (write-id out id-long-0) (pos? n) (enc/cond @@ -719,14 +828,10 @@ (write-id out id-vec-0) (do (enc/cond - (sm-count?* cnt) - (enc/cond - (== cnt 2) (write-id out id-vec-2) - (== cnt 3) (write-id out id-vec-3) - :else (do (write-id out id-vec-sm*) (write-sm-count* out cnt))) - - (md-count? cnt) (do (write-id out id-vec-md) (write-md-count out cnt)) - :else (do (write-id out id-vec-lg) (write-lg-count out cnt))) + (and (target-release>= 330) (sm-count?* cnt)) (do (write-id out id-vec-sm*) (write-sm-count* out cnt)) + (and (target-release< 330) (sm-count? cnt)) (do (write-id out id-vec-sm_) (write-sm-count out cnt)) + (md-count? cnt) (do (write-id out id-vec-md) (write-md-count out cnt)) + :else (do (write-id out id-vec-lg) (write-lg-count out cnt))) (-run! (fn [in] (-freeze-with-meta! in out)) v))))) @@ -817,6 +922,8 @@ (write-counted-coll out id-empty id-sm id-md id-lg coll) (write-uncounted-coll out id-empty id-sm id-md id-lg coll)))) +(def ^:private ^:const meta-protocol-key ::meta-protocol-key) + ;; Micro-optimization: ;; As (write-kvs out id-map-0 id-map-sm id-map-md id-map-lg x) (defn- write-map [^DataOutput out m is-metadata?] @@ -825,9 +932,10 @@ (write-id out id-map-0) (do (enc/cond - (sm-count?* cnt) (do (write-id out id-map-sm*) (write-sm-count* out cnt)) - (md-count? cnt) (do (write-id out id-map-md) (write-md-count out cnt)) - :else (do (write-id out id-map-lg) (write-lg-count out cnt))) + (and (target-release>= 330) (sm-count?* cnt)) (do (write-id out id-map-sm*) (write-sm-count* out cnt)) + (and (target-release< 330) (sm-count? cnt)) (do (write-id out id-map-sm_) (write-sm-count out cnt)) + (md-count? cnt) (do (write-id out id-map-md) (write-md-count out cnt)) + :else (do (write-id out id-map-lg) (write-lg-count out cnt))) (-run-kv! (fn [k v] @@ -835,7 +943,9 @@ (do ;; Strip Clojure v1.10+ metadata protocol extensions ;; (used by defprotocol `:extend-via-metadata`) - (write-id out id-meta-protocol-key) + (if (target-release>= 340) + (write-id out id-meta-protocol-key) + (-freeze-without-meta! meta-protocol-key out)) (write-id out id-nil)) (do (-freeze-with-meta! k out) @@ -852,9 +962,10 @@ (write-id out id-set-0) (do (enc/cond - (sm-count?* cnt) (do (write-id out id-set-sm*) (write-sm-count* out cnt)) - (md-count? cnt) (do (write-id out id-set-md) (write-md-count out cnt)) - :else (do (write-id out id-set-lg) (write-lg-count out cnt))) + (and (target-release>= 330) (sm-count?* cnt)) (do (write-id out id-set-sm*) (write-sm-count* out cnt)) + (and (target-release< 330) (sm-count? cnt)) (do (write-id out id-set-sm_) (write-sm-count out cnt)) + (md-count? cnt) (do (write-id out id-set-md) (write-md-count out cnt)) + :else (do (write-id out id-set-lg) (write-lg-count out cnt))) (-run! (fn [in] (-freeze-with-meta! in out)) s))))) @@ -1533,11 +1644,11 @@ id-false false id-char (.readChar in) - id-meta-protocol-key ::meta-protocol-key + id-meta-protocol-key meta-protocol-key id-meta (let [m (thaw-from-in! in) ; Always consume from stream x (thaw-from-in! in)] - (if-let [m (when *incl-metadata?* (not-empty (dissoc m ::meta-protocol-key)))] + (if-let [m (when *incl-metadata?* (not-empty (dissoc m meta-protocol-key)))] (with-meta x m) (do x))) diff --git a/test/taoensso/nippy_tests.clj b/test/taoensso/nippy_tests.clj index 12e12232..b13489bd 100644 --- a/test/taoensso/nippy_tests.clj +++ b/test/taoensso/nippy_tests.clj @@ -192,6 +192,19 @@ (defn ba-hash [^bytes ba] (hash (seq ba))) +(defn gen-hashes [] (enc/map-vals (fn [v] (ba-hash (freeze v))) test-data)) +(defn cmp-hashes [new old] (vec (sort (reduce-kv (fn [s k v] (if (= (get old k) v) s (conj s k))) #{} new)))) + +(def ref-hashes-v341 + {:deftype -148586793, :lazy-seq-empty 1277437598, :true -1809580601, :long 598276629, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta -858252893, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 2005004017, :many-strings 1738215727, :nested -1350538572, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 813550992, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -921330463, :subvec 709331681, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1207486853, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord -553848560, :sorted-map -1160380145, :sql-date 80018667, :map-entry 1219306839, :false-boxed 1506926383, :uri 870148616, :period -2043530540, :many-longs -1109794519, :uuid -338331115, :set 1649942133, :kw-ns 1050084331, :map 1989337680, :many-doubles -827569787, :char 858269588}) + +(def ref-hashes-v340 + {:deftype 1529147805, :lazy-seq-empty 1277437598, :true -1809580601, :long 219451189, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta 352218350, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 1443292905, :many-strings 1777678883, :nested -1590473924, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 89425525, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -1097575232, :subvec -2047667173, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1113199651, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord 287634761, :sorted-map 1464032648, :sql-date 80018667, :map-entry -1353323498, :false-boxed 1506926383, :uri -1374752165, :period -2043530540, :many-longs 759118414, :uuid -338331115, :set -1515144175, :kw-ns 1050084331, :map 358912619, :many-doubles -827569787, :char 858269588}) + +(comment + (cmp-hashes ref-hashes-v341 ref-hashes-v340) + [:defrecord :deftype :list :long :many-longs :many-strings :map :map-entry :meta :nested :set :sorted-map :sorted-set :str-short :subvec :uri :vector]) + (deftest _stable-serialized-output (testing "Stable serialized output" @@ -204,12 +217,9 @@ (is (ba= (freeze (sorted-map :a 1 :b 1)) (freeze (sorted-map :b 1 :a 1))) "Sorted structures are generally safe") - ;; Track serialized output of stress data so that we can at least be aware of - ;; (and warn about) unintended changes for common/elementary types, etc. Note that - ;; reference hashes will need to be recalculated on changes to stress data. - (let [reference-hashes ; (enc/map-vals (fn [v] (ba-hash (freeze v))) test-data) - {:deftype 1529147805, :lazy-seq-empty 1277437598, :true -1809580601, :long 219451189, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta 352218350, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 1443292905, :many-strings 1777678883, :nested -1590473924, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 89425525, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -1097575232, :subvec -2047667173, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1113199651, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord 287634761, :sorted-map 1464032648, :sql-date 80018667, :map-entry -1353323498, :false-boxed 1506926383, :uri -1374752165, :period -2043530540, :many-longs 759118414, :uuid -338331115, :set -1515144175, :kw-ns 1050084331, :map 358912619, :many-doubles -827569787, :char 858269588} - + ;; Track serialized output of stress data so that we can warn about changes to + ;; core types. Hashes will need to be recalculated on changes to stress data. + (let [reference-hashes ref-hashes-v341 failures ; #{{:keys [k v]}} (reduce-kv (fn [failures k v]