Skip to content

Commit

Permalink
[new] Improve data compatibility when updating Nippy versions
Browse files Browse the repository at this point in the history
When support is added for a new type in Nippy version X, it necessarily means
that data containing that new type and frozen with Nippy version X is unthawable
with Nippy versions < X.

Earlier versions of Nippy will throw an exception on thawing affected data:
  \"Unrecognized type id (<n>). Data frozen with newer Nippy version?\"

This can present a challenge when updating to new versions of Nippy, e.g.:

  - Rolling updates could lead to old and new versions of Nippy temporarily co-existing.
  - Data written with new types could limit your ability to revert a Nippy update.

There's no easy solution to this in GENERAL, but we CAN at least help reduce the
burden related to CHANGES in core data types by introducing changes over 2 phases:

  1. Nippy vX   reads  new (changed) type, writes old type
  2. Nippy vX+1 writes new (changed) type

When relevant, we can then warn users in the CHANGELOG to not leapfrog
(e.g. Nippy vX -> Nippy vX+2) when doing rolling updates.

This commit bootstraps the new compatibility feature by initially targeting core type
compatibility with Nippy v3.2.0 (2022-07-18).

A future Nippy version (e.g. v3.5.0) will then target v3.4.0, with an appropriate
CHANGELOG instruction to update in phases for environments that involve rolling
updates.
  • Loading branch information
ptaoussanis committed May 2, 2024
1 parent bd4d520 commit c9f08ad
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 26 deletions.
61 changes: 41 additions & 20 deletions src/taoensso/nippy.clj
Original file line number Diff line number Diff line change
Expand Up @@ -662,9 +662,10 @@
(let [ba (.getBytes s StandardCharsets/UTF_8)
len (alength ba)]
(enc/cond
(sm-count?* len) (do (write-id out id-str-sm*) (write-sm-count* out len))
(md-count? len) (do (write-id out id-str-md) (write-md-count out len))
:else (do (write-id out id-str-lg) (write-lg-count out len)))
(and (impl/target-release>= 330) (sm-count?* len)) (do (write-id out id-str-sm*) (write-sm-count* out len))
(and (impl/target-release< 330) (sm-count? len)) (do (write-id out id-str-sm_) (write-sm-count out len))
(md-count? len) (do (write-id out id-str-md) (write-md-count out len))
:else (do (write-id out id-str-lg) (write-lg-count out len)))

(.write out ba 0 len))))

Expand Down Expand Up @@ -692,8 +693,26 @@

(.write out ba 0 len)))

(defn- write-long-legacy [^DataOutput out ^long n]
(enc/cond
(zero? n) (write-id out id-long-0)
(pos? n)
(enc/cond
(<= n Byte/MAX_VALUE) (do (write-id out id-long-sm_) (.writeByte out n))
(<= n Short/MAX_VALUE) (do (write-id out id-long-md_) (.writeShort out n))
(<= n Integer/MAX_VALUE) (do (write-id out id-long-lg_) (.writeInt out n))
:else (do (write-id out id-long-xl) (.writeLong out n)))

:else
(enc/cond
(>= n Byte/MIN_VALUE) (do (write-id out id-long-sm_) (.writeByte out n))
(>= n Short/MIN_VALUE) (do (write-id out id-long-md_) (.writeShort out n))
(>= n Integer/MIN_VALUE) (do (write-id out id-long-lg_) (.writeInt out n))
:else (do (write-id out id-long-xl) (.writeLong out n)))))

(defn- write-long [^DataOutput out ^long n]
(enc/cond
(impl/target-release< 330) (write-long-legacy out n)
(zero? n) (write-id out id-long-0)
(pos? n)
(enc/cond
Expand All @@ -719,14 +738,10 @@
(write-id out id-vec-0)
(do
(enc/cond
(sm-count?* cnt)
(enc/cond
(== cnt 2) (write-id out id-vec-2)
(== cnt 3) (write-id out id-vec-3)
:else (do (write-id out id-vec-sm*) (write-sm-count* out cnt)))

(md-count? cnt) (do (write-id out id-vec-md) (write-md-count out cnt))
:else (do (write-id out id-vec-lg) (write-lg-count out cnt)))
(and (impl/target-release>= 330) (sm-count?* cnt)) (do (write-id out id-vec-sm*) (write-sm-count* out cnt))
(and (impl/target-release< 330) (sm-count? cnt)) (do (write-id out id-vec-sm_) (write-sm-count out cnt))
(md-count? cnt) (do (write-id out id-vec-md) (write-md-count out cnt))
:else (do (write-id out id-vec-lg) (write-lg-count out cnt)))

(-run! (fn [in] (-freeze-with-meta! in out)) v)))))

Expand Down Expand Up @@ -817,6 +832,8 @@
(write-counted-coll out id-empty id-sm id-md id-lg coll)
(write-uncounted-coll out id-empty id-sm id-md id-lg coll))))

(def ^:private ^:const meta-protocol-key ::meta-protocol-key)

;; Micro-optimization:
;; As (write-kvs out id-map-0 id-map-sm id-map-md id-map-lg x)
(defn- write-map [^DataOutput out m is-metadata?]
Expand All @@ -825,17 +842,20 @@
(write-id out id-map-0)
(do
(enc/cond
(sm-count?* cnt) (do (write-id out id-map-sm*) (write-sm-count* out cnt))
(md-count? cnt) (do (write-id out id-map-md) (write-md-count out cnt))
:else (do (write-id out id-map-lg) (write-lg-count out cnt)))
(and (impl/target-release>= 330) (sm-count?* cnt)) (do (write-id out id-map-sm*) (write-sm-count* out cnt))
(and (impl/target-release< 330) (sm-count? cnt)) (do (write-id out id-map-sm_) (write-sm-count out cnt))
(md-count? cnt) (do (write-id out id-map-md) (write-md-count out cnt))
:else (do (write-id out id-map-lg) (write-lg-count out cnt)))

(-run-kv!
(fn [k v]
(if (and is-metadata? (fn? v) (qualified-symbol? k))
(do
;; Strip Clojure v1.10+ metadata protocol extensions
;; (used by defprotocol `:extend-via-metadata`)
(write-id out id-meta-protocol-key)
(if (impl/target-release>= 340)
(write-id out id-meta-protocol-key)
(-freeze-without-meta! meta-protocol-key out))
(write-id out id-nil))
(do
(-freeze-with-meta! k out)
Expand All @@ -852,9 +872,10 @@
(write-id out id-set-0)
(do
(enc/cond
(sm-count?* cnt) (do (write-id out id-set-sm*) (write-sm-count* out cnt))
(md-count? cnt) (do (write-id out id-set-md) (write-md-count out cnt))
:else (do (write-id out id-set-lg) (write-lg-count out cnt)))
(and (impl/target-release>= 330) (sm-count?* cnt)) (do (write-id out id-set-sm*) (write-sm-count* out cnt))
(and (impl/target-release< 330) (sm-count? cnt)) (do (write-id out id-set-sm_) (write-sm-count out cnt))
(md-count? cnt) (do (write-id out id-set-md) (write-md-count out cnt))
:else (do (write-id out id-set-lg) (write-lg-count out cnt)))

(-run! (fn [in] (-freeze-with-meta! in out)) s)))))

Expand Down Expand Up @@ -1533,11 +1554,11 @@
id-false false
id-char (.readChar in)

id-meta-protocol-key ::meta-protocol-key
id-meta-protocol-key meta-protocol-key
id-meta
(let [m (thaw-from-in! in) ; Always consume from stream
x (thaw-from-in! in)]
(if-let [m (when *incl-metadata?* (not-empty (dissoc m ::meta-protocol-key)))]
(if-let [m (when *incl-metadata?* (not-empty (dissoc m meta-protocol-key)))]
(with-meta x m)
(do x)))

Expand Down
90 changes: 90 additions & 0 deletions src/taoensso/nippy/impl.clj
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,96 @@
See that function's docstring for more info."
[] (trim nmax (state_))))

;;;; Release targeting

(comment
(set! *print-length* nil)
(vec (sort (keys taoensso.nippy/public-types-spec)))

;; To help support release targeting, we keep track of when new type ids are added
(let [id-history ; {<release> #{type-ids}}
{340 ; v3.4.0 (2024-04-30), added 2
;; New: map-entry meta-protocol-key
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
105 106 110 111 112 113 114 115}

330 ; v3.3.0 (2023-10-11), added 11
;; New: long-pos-sm long-pos-md long-pos-lg long-neg-sm long-neg-md long-neg-lg
;; str-sm* vec-sm* set-sm* map-sm* sql-date
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 105 106
110 111 112 113 114 115}

320 ; v3.2.0 (2022-07-18), added none
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 90 91 100 101 102 105 106 110 111 112 113 114 115}

313 ; v3.1.3 (2022-06-23)
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 90 91 100 101 102 105 106 110 111 112 113 114 115}

300 ; v3.0.0 (2020-09-20), added 5
;; New: time-instant time-duration time-period kw-md sym-md
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 80
81 82 90 91 100 101 102 105 106 110 111 112 113 114 115}}]

(defn diff [new-release old-release]
(vec (sort (clojure.set/difference (id-history new-release) (id-history old-release))))))

(diff 340 330))

(let [;; Initially target compatibility with v3.2.0 (2020-07-18)
;; Next release will target v3.4.0 (2024-04-30), etc.
target-release
(enc/get-env {:as :edn, :default 320}
:taoensso.nippy.target-release)

target>=
(fn [min-release]
(if target-release
(>= (long target-release) (long min-release))
true))]

(defmacro target-release< [min-release] (not (target>= min-release)))
(defmacro target-release>=
"Returns true iff `target-release` is nil or >= given `min-release`.
Used to help ease data migration for changes to core data types.
When support is added for a new type in Nippy version X, it necessarily means
that data containing that new type and frozen with Nippy version X is unthawable
with Nippy versions < X.
Earlier versions of Nippy will throw an exception on thawing affected data:
\"Unrecognized type id (<n>). Data frozen with newer Nippy version?\"
This can present a challenge when updating to new versions of Nippy, e.g.:
- Rolling updates could lead to old and new versions of Nippy temporarily co-existing.
- Data written with new types could limit your ability to revert a Nippy update.
There's no easy solution to this in GENERAL, but we CAN at least help reduce the
burden related to CHANGES in core data types by introducing changes over 2 phases:
1. Nippy vX reads new (changed) type, writes old type
2. Nippy vX+1 writes new (changed) type
When relevant, we can then warn users in the CHANGELOG to not leapfrog
(e.g. Nippy vX -> Nippy vX+2) when doing rolling updates."
[min-release] (target>= min-release)))

(comment (macroexpand '(target-release>= 340)))

;;;

(comment
Expand Down
22 changes: 16 additions & 6 deletions test/taoensso/nippy_tests.clj
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,19 @@

(defn ba-hash [^bytes ba] (hash (seq ba)))

(defn gen-hashes [] (enc/map-vals (fn [v] (ba-hash (freeze v))) test-data))
(defn cmp-hashes [new old] (vec (sort (reduce-kv (fn [s k v] (if (= (get old k) v) s (conj s k))) #{} new))))

(def ref-hashes-v341
{:deftype -148586793, :lazy-seq-empty 1277437598, :true -1809580601, :long 598276629, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta -858252893, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 2005004017, :many-strings 1738215727, :nested -1350538572, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 813550992, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -921330463, :subvec 709331681, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1207486853, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord -553848560, :sorted-map -1160380145, :sql-date 80018667, :map-entry 1219306839, :false-boxed 1506926383, :uri 870148616, :period -2043530540, :many-longs -1109794519, :uuid -338331115, :set 1649942133, :kw-ns 1050084331, :map 1989337680, :many-doubles -827569787, :char 858269588})

(def ref-hashes-v340
{:deftype 1529147805, :lazy-seq-empty 1277437598, :true -1809580601, :long 219451189, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta 352218350, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 1443292905, :many-strings 1777678883, :nested -1590473924, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 89425525, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -1097575232, :subvec -2047667173, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1113199651, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord 287634761, :sorted-map 1464032648, :sql-date 80018667, :map-entry -1353323498, :false-boxed 1506926383, :uri -1374752165, :period -2043530540, :many-longs 759118414, :uuid -338331115, :set -1515144175, :kw-ns 1050084331, :map 358912619, :many-doubles -827569787, :char 858269588})

(comment
(cmp-hashes ref-hashes-v341 ref-hashes-v340)
[:defrecord :deftype :list :long :many-longs :many-strings :map :map-entry :meta :nested :set :sorted-map :sorted-set :str-short :subvec :uri :vector])

(deftest _stable-serialized-output
(testing "Stable serialized output"

Expand All @@ -204,12 +217,9 @@
(is (ba= (freeze (sorted-map :a 1 :b 1))
(freeze (sorted-map :b 1 :a 1))) "Sorted structures are generally safe")

;; Track serialized output of stress data so that we can at least be aware of
;; (and warn about) unintended changes for common/elementary types, etc. Note that
;; reference hashes will need to be recalculated on changes to stress data.
(let [reference-hashes ; (enc/map-vals (fn [v] (ba-hash (freeze v))) test-data)
{:deftype 1529147805, :lazy-seq-empty 1277437598, :true -1809580601, :long 219451189, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta 352218350, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 1443292905, :many-strings 1777678883, :nested -1590473924, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 89425525, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -1097575232, :subvec -2047667173, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1113199651, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord 287634761, :sorted-map 1464032648, :sql-date 80018667, :map-entry -1353323498, :false-boxed 1506926383, :uri -1374752165, :period -2043530540, :many-longs 759118414, :uuid -338331115, :set -1515144175, :kw-ns 1050084331, :map 358912619, :many-doubles -827569787, :char 858269588}

;; Track serialized output of stress data so that we can detect unintentional changes,
;; and warn about intended ones. Hashes will need to be recalculated on changes to stress data.
(let [reference-hashes ref-hashes-v341
failures ; #{{:keys [k v]}}
(reduce-kv
(fn [failures k v]
Expand Down

0 comments on commit c9f08ad

Please sign in to comment.