Skip to content

Commit

Permalink
Durable persistent set (#503)
Browse files Browse the repository at this point in the history
* Make persistent set durable

* set->pset correction

* Fix io handlers

* avoid flushing for mem db

* Move index config to index namespaces

* Undo reformatting

* Fix config test

* Bump persistent-sorted-set and add cache with freeing.

* Fix temporal-upsert speed by using efficient index to lookup old value.

* Add threadsafe persistent-sorted-set, reactivate benchmarks.

* Use polymorphism instead of manual dispatch with satisfies?, add type hints.

* Handle storage configuration through cache-size parameter. Must be > 0 now.

* Add type hints

* Add switch for flush

* Improve upsert comparators

* Separate concepts of search cache and store cache

* Use eavt index also for upsert to retrieve old-datom once instead of doing indexwise lookups.

* Use upstream pss implementation from storage branch. Cleanups.

* Make store and hashing configurable. Improve performance.

* Fix cache configuration loading including setting proper defaults.

* Default config to no crypto-hashing to provide higher performance.

* Use upstream pss.

* Set default-index to persistent-set and add configuration guard.

* Update default index in API test.

* Don't fail config index mismatch, but use stored setting and warn.

Reduce log level for storage to trace, same for transact data.

* Fix reinitialization of read handlers.

* Bump konserve version.

Co-authored-by: Christian Weilbach <[email protected]>
  • Loading branch information
jsmassa and whilo authored Nov 16, 2022
1 parent b1ea55b commit 6c1468f
Show file tree
Hide file tree
Showing 37 changed files with 902 additions and 667 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
/classes
/checkouts
pom.xml.asc
*.jar
replikativ-datahike.jar
*.class
/.lein-*
/.nrepl-port
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
- Improve code samples using transact with arg-map @podgorniy
- Insert into persistent sorted set does not replace existing datom with identical EAV
- Single datom retraction fixed for persistent set index
- Refactor index namespaces
- Make persistent set durable

## 0.4.0

Expand Down
15 changes: 11 additions & 4 deletions benchmark/src/benchmark/cli.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
[benchmark.store :refer [save]]
[clojure.string :refer [join]]
[datahike.store :as ds]
[datahike.config :as dc]
[datahike.index :as di]))

(def output-formats (set (keys (methods save))))
Expand Down Expand Up @@ -74,12 +75,18 @@
:parse-fn read-string
:validate [(conj backend-names :all) #(str "A backend named " % " has not been implemented. "
"Available backends are: " backend-names)]]
["-k" "--cache SIZES"
(str "Cache sizes for which measurements should be done")
:default [0]
["-k" "--search-caches SIZES"
(str "Search cache sizes for which measurements should be done")
:default [dc/default-search-cache-size]
:parse-fn read-string
:validate [vector? "Must be a vector of non-negative integers."
#(every? nat-int? %) "Vector must consist of non-negative integers."]]
["-m" "--store-caches SIZES"
(str "Store cache sizes for which measurements should be done")
:default [dc/default-store-cache-size]
:parse-fn read-string
:validate [vector? "Must be a vector of positive integers."
#(every? pos-int? %) "Vector must consist of non-negative integers."]]
["-j" "--schema VALUE"
(str "Schema flexibility configuration. Available are: " #{:read :write})
:default :write
Expand Down Expand Up @@ -185,4 +192,4 @@
(-main "run" "-x" "[0 10000 5000]" "-t" "test-bench" "-o" "edn" "bench.edn")
)

;TIMBRE_LEVEL=":info" clj -M:benchmark run --backend :file --index :datahike.index/persistent-set -t pss -o edn pss.edn --schema :write --history false
;TIMBRE_LEVEL=":info" clj -M:benchmark run --backend :file --index :datahike.index/persistent-set -t pss -o edn pss.edn --schema :write --history false
12 changes: 8 additions & 4 deletions benchmark/src/benchmark/config.clj
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,26 @@
{"mem-set" {:store {:backend :mem :id "performance-set"}
:index :datahike.index/persistent-set
:keep-history? false
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:schema-flexibility :write}
"mem-hht" {:store {:backend :mem :id "performance-hht"}
:index :datahike.index/hitchhiker-tree
:keep-history? false
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:schema-flexibility :write}
"file-set" {:store {:backend :file :path "/tmp/performance-set"}
:index :datahike.index/persistent-set
:keep-history? false
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:schema-flexibility :write}
"file-hht" {:store {:backend :file :path "/tmp/performance-hht"}
:index :datahike.index/hitchhiker-tree
:keep-history? false
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:schema-flexibility :write}})

(def schema
Expand Down
10 changes: 6 additions & 4 deletions benchmark/src/benchmark/measure.clj
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
:count n
:observations (vec times)}))

(defn requested-configs [{:keys [config-name history backend cache schema index] :as _options}]
(defn requested-configs [{:keys [config-name history backend search-caches store-caches schema index] :as _options}]
(if config-name
[(get c/named-db-configs config-name)]
(vec (for [index-type (if (= :all index)
Expand All @@ -153,14 +153,16 @@
keep-history (if (= :all history)
[true false]
[history])
cache-size cache
schema-flexibility (if (= :all schema)
[:read :write]
[schema])]
[schema])
search-cache search-caches
store-cache store-caches]
{:index index-type
:store {:backend backend-type}
:keep-history? keep-history
:cache-size cache-size
:search-cache-size search-cache
:store-cache-size store-cache
:schema-flexibility schema-flexibility}))))

(defn get-measurements
Expand Down
30 changes: 20 additions & 10 deletions benchmark/test/benchmark/measure_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,17 @@
(is (= '({:db-datoms 4
:db-entities 1
:dh-config {:backend :mem
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/persistent-set
:keep-history? false
:schema-flexibility :write}
:function :connection}
{:db-datoms 40
:db-entities 10
:dh-config {:backend :mem
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/persistent-set
:keep-history? false
:schema-flexibility :write}
Expand All @@ -81,63 +83,71 @@
(is (= '({:db-datoms 4
:db-entities 1
:dh-config {:backend :mem
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/persistent-set
:keep-history? false
:schema-flexibility :write}
:function :connection}
{:db-datoms 40
:db-entities 10
:dh-config {:backend :mem
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/persistent-set
:keep-history? false
:schema-flexibility :write}
:function :connection}
{:db-datoms 4
:db-entities 1
:dh-config {:backend :mem
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/hitchhiker-tree
:keep-history? false
:schema-flexibility :write}
:function :connection}
{:db-datoms 40
:db-entities 10
:dh-config {:backend :mem
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/hitchhiker-tree
:keep-history? false
:schema-flexibility :write}
:function :connection}
{:db-datoms 4
:db-entities 1
:dh-config {:backend :file
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/persistent-set
:keep-history? false
:schema-flexibility :write}
:function :connection}
{:db-datoms 40
:db-entities 10
:dh-config {:backend :file
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/persistent-set
:keep-history? false
:schema-flexibility :write}
:function :connection}
{:db-datoms 4
:db-entities 1
:dh-config {:backend :file
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/hitchhiker-tree
:keep-history? false
:schema-flexibility :write}
:function :connection}
{:db-datoms 40
:db-entities 10
:dh-config {:backend :file
:cache-size 0
:search-cache-size 0
:store-cache-size 1
:index :datahike.index/hitchhiker-tree
:keep-history? false
:schema-flexibility :write}
Expand Down
1 change: 1 addition & 0 deletions bin/run-all-tests
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

set -o errexit
set -o pipefail

echo "Recompiling Java"
clj -T:build clean
clj -T:build compile
Expand Down
6 changes: 0 additions & 6 deletions bin/run-fast-unittests

This file was deleted.

6 changes: 6 additions & 0 deletions bin/run-hht-tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash

set -o errexit
set -o pipefail

TIMBRE_LEVEL=':fatal' ./bin/kaocha --focus :clj-hht "$@"
6 changes: 6 additions & 0 deletions bin/run-pss-tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash

set -o errexit
set -o pipefail

TIMBRE_LEVEL=':fatal' ./bin/kaocha --focus :clj-pss "$@"
6 changes: 3 additions & 3 deletions deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
io.replikativ/hasch {:mvn/version "0.3.7"}
io.replikativ/hitchhiker-tree {:mvn/version "0.2.222"}
io.replikativ/incognito {:mvn/version "0.3.66"}
io.replikativ/konserve {:mvn/version "0.7.275"}
persistent-sorted-set/persistent-sorted-set {:mvn/version "0.1.4"}
io.replikativ/konserve {:mvn/version "0.7.285"}
persistent-sorted-set/persistent-sorted-set {:mvn/version "0.2.1"}
environ/environ {:mvn/version "1.2.0"}
com.taoensso/timbre {:mvn/version "5.2.1"}
io.replikativ/superv.async {:mvn/version "0.3.43"}
Expand Down Expand Up @@ -47,7 +47,7 @@
:extra-deps {clj-http/clj-http {:mvn/version "3.12.3"}
org.clojure/tools.cli {:mvn/version "1.0.206"}}}

:benchmark {:main-opts ["-m" "benchmark.core"]
:benchmark {:main-opts ["-m" "benchmark.cli"]
:extra-paths ["benchmark/src"]
:extra-deps {clj-http/clj-http {:mvn/version "3.12.3"}
org.clojure/tools.cli {:mvn/version "1.0.206"}
Expand Down
10 changes: 8 additions & 2 deletions doc/benchmarking.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ TIMBRE_LEVEL=':warn' clj -M:benchmark run -f :query -q :simple-query -i 10
Options for `-c`:
- `mem-set` for in-memory database with persistent-set index
- `mem-hht` for in-memory database with hitchhiker-tree index
- `file` for database with file store backend and hitchhiker-tree index
- `file-set` for database with file store backend and persistent-set index
- `file-hht` for database with file store backend and hitchhiker-tree index

Implementations:

Expand All @@ -78,7 +79,12 @@ Implementations:
:schema-flexibility :write
:keep-history? false
:index :datahike.index/hitchhiker-tree}}
{:config-name "file"
{:config-name "file-set"
:config {:store {:backend :file :path "/tmp/performance-hht"}
:schema-flexibility :write
:keep-history? false
:index :datahike.index/hitchhiker-tree}}
{:config-name "file-hht"
:config {:store {:backend :file :path "/tmp/performance-hht"}
:schema-flexibility :write
:keep-history? false
Expand Down
49 changes: 32 additions & 17 deletions src/datahike/config.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@
[environ.core :refer [env]]
[datahike.tools :as tools]
[datahike.store :as ds]
[datahike.constants :as c])
[datahike.index :as di])
(:import [java.net URI]))

(def ^:dynamic default-index :datahike.index/persistent-set)
(def ^:dynamic default-search-cache-size 10000)
(def ^:dynamic default-store-cache-size 1000)

(s/def ::index #{:datahike.index/hitchhiker-tree :datahike.index/persistent-set})
(s/def ::keep-history? boolean?)
(s/def ::schema-flexibility #{:read :write})
(s/def ::attribute-refs? boolean?)
(s/def ::search-cache-size nat-int?)
(s/def ::store-cache-size pos-int?)
(s/def ::crypto-hash? boolean?)
(s/def ::entity (s/or :map associative? :vec vector?))
(s/def ::initial-tx (s/nilable (s/or :data (s/coll-of ::entity) :path string?)))
(s/def ::name string?)
Expand All @@ -32,6 +39,9 @@
::keep-history?
::schema-flexibility
::attribute-refs?
::search-cache-size
::store-cache-size
::crypto-hash?
::initial-tx
::name
::middleware]))
Expand All @@ -42,9 +52,9 @@
:opt-un [:deprecated/temporal-index :deprecated/schema-on-read]))

(defn from-deprecated
[{:keys [backend username password path host port] :as backend-cfg}
[{:keys [backend username password path host port] :as _backend-cfg}
& {:keys [schema-on-read temporal-index index initial-tx]
:as index-cfg
:as _index-cfg
:or {schema-on-read false
index :datahike.index/hitchhiker-tree
temporal-index true}}]
Expand All @@ -60,14 +70,14 @@
:level {:path path}
:file {:path path}))
:index index
:index-config {:index-b-factor c/default-index-b-factor
:index-log-size c/default-index-log-size
:index-data-node-size c/default-index-data-node-size}
:index-config (di/default-index-config index)
:keep-history? temporal-index
:attribute-refs? false
:initial-tx initial-tx
:schema-flexibility (if (true? schema-on-read) :read :write)
:cache-size 100000})
:crypto-hash? false
:search-cache-size default-search-cache-size
:store-cache-size default-store-cache-size})

(defn int-from-env
[key default]
Expand Down Expand Up @@ -104,11 +114,11 @@
:schema-flexibility :read
:name (z/rand-german-mammal)
:attribute-refs? false
:index :datahike.index/hitchhiker-tree
:cache-size 100000
:index-config {:index-b-factor c/default-index-b-factor
:index-log-size c/default-index-log-size
:index-data-node-size c/default-index-data-node-size}})
:index default-index
:search-cache-size default-search-cache-size
:store-cache-size default-store-cache-size
:crypto-hash? false
:index-config (di/default-index-config default-index)})

(defn remove-nils
"Thanks to https://stackoverflow.com/a/34221816"
Expand All @@ -133,17 +143,22 @@
store-config (ds/default-config (merge
{:backend (keyword (:datahike-store-backend env :mem))}
(:store config-as-arg)))
index (if (:datahike-index env)
(keyword "datahike.index" (:datahike-index env))
default-index)
config {:store store-config
:initial-tx (:datahike-intial-tx env)
:keep-history? (bool-from-env :datahike-keep-history true)
:attribute-refs? (bool-from-env :datahike-attribute-refs false)
:name (:datahike-name env (z/rand-german-mammal))
:schema-flexibility (keyword (:datahike-schema-flexibility env :write))
:index (keyword "datahike.index" (:datahike-index env "hitchhiker-tree"))
:cache-size (:cache-size env 100000)
:index-config {:index-b-factor (int-from-env :datahike-b-factor c/default-index-b-factor)
:index-log-size (int-from-env :datahike-log-size c/default-index-log-size)
:index-data-node-size (int-from-env :datahike-data-node-size c/default-index-data-node-size)}}
:index index
:crypto-hash? false
:search-cache-size (int-from-env :datahike-search-cache-size default-search-cache-size)
:store-cache-size (int-from-env :datahike-store-cache-size default-store-cache-size)
:index-config (if-let [index-config (map-from-env :datahike-index-config nil)]
index-config
(di/default-index-config index))}
merged-config ((comp remove-nils tools/deep-merge) config config-as-arg)
{:keys [schema-flexibility initial-tx store attribute-refs?]} merged-config
config-spec (ds/config-spec store)]
Expand Down
Loading

0 comments on commit 6c1468f

Please sign in to comment.