From eb03a20356940d807cf2cfdb3007160ff6a3f4f0 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 31 Aug 2023 17:26:43 -0700 Subject: [PATCH 01/17] Update project dependencies --- project.clj | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/project.clj b/project.clj index 379a5a9..9f10d98 100644 --- a/project.clj +++ b/project.clj @@ -25,7 +25,6 @@ [com.fasterxml.jackson.core/jackson-annotations] [com.fasterxml.jackson.core/jackson-databind] [com.fasterxml.jackson.core/jackson-core]]] - [clojurewerkz/elastisch "2.2.1"] [com.novemberain/langohr "3.5.1"] [liberator "0.15.3"] [compojure "1.1.8"] @@ -39,7 +38,9 @@ [org.cyverse/service-logging "2.8.2"] [net.logstash.logback/logstash-logback-encoder "4.11"] [org.cyverse/event-messages "0.0.1"] - [me.raynes/fs "1.4.6"]] + [me.raynes/fs "1.4.6"] + [cc.qbits/spandex "0.7.11"] + [org.apache.httpcomponents/httpcore "4.4.11"] ] :eastwood {:exclude-namespaces [:test-paths] :linters [:wrong-arity :wrong-ns-form :wrong-pre-post :wrong-tag :misplaced-docstrings]} :plugins [[test2junit "1.1.3"] From cba84bbb700bdaef789e54e9cee0444e8249bf7e Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 31 Aug 2023 17:27:33 -0700 Subject: [PATCH 02/17] Update ES client to Spandex to initialize connection --- src/dewey/core.clj | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/dewey/core.clj b/src/dewey/core.clj index a78bbb9..7b94fe9 100644 --- a/src/dewey/core.clj +++ b/src/dewey/core.clj @@ -3,7 +3,7 @@ (:use [slingshot.slingshot :only [try+ throw+]]) (:require [clojure.tools.cli :as cli] [clojure.tools.logging :as log] - [clojurewerkz.elastisch.rest :as es] + [qbits.spandex :as s] [clj-jargon.init :as irods] [clojure-commons.config :as config] [dewey.amq :as amq] @@ -21,16 +21,16 @@ (defn- init-es "Establishes a connection to elasticsearch" [] - (let [url (URL. (cfg/es-uri)) - http-opts (if (or (empty? (cfg/es-user)) (empty? (cfg/es-password))) - {} - {:basic-auth [(cfg/es-user) (cfg/es-password)] - :content-type :application/json}) - conn (try - (es/connect (str url) http-opts) - (catch Exception e - (log/debug e) - nil))] + (let [url (URL. (cfg/es-uri)) + host-map {:hosts [(str url)]} + opts (if (or (empty? (cfg/es-user)) (empty? (cfg/es-password))) + host-map + (merge host-map {:http-client {:basic-auth + {:user (cfg/es-user) + :password (cfg/es-password)}}})) + conn (try + (s/client opts) + (catch Exception e (log/debug e) nil))] (if conn (do (log/info (format "Successfully connected to Elasticsearch: %s" url)) From 1fe6261b784336f043fbc673d0beef96f91ff84b Mon Sep 17 00:00:00 2001 From: sboleyn Date: Wed, 6 Sep 2023 10:27:49 -0700 Subject: [PATCH 03/17] Update config.clj to use local OpenSearch values --- src/dewey/config.clj | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dewey/config.clj b/src/dewey/config.clj index 5dc0c9b..69b1d48 100644 --- a/src/dewey/config.clj +++ b/src/dewey/config.clj @@ -70,22 +70,22 @@ (cc/defprop-optstr es-uri "The hostname for the Elasticsearch server" [props config-valid configs] - "dewey.es.uri" "http://elasticsearch:9200") + "dewey.es.uri" "https://localhost:9200") (cc/defprop-optstr es-user "The username for the Elasticsearch server" [props config-valid configs] - "dewey.es.username" nil) + "dewey.es.username" "admin") (cc/defprop-optstr es-password "The password for the Elasticsearch server" [props config-valid configs] - "dewey.es.password" nil) + "dewey.es.password" "admin") (cc/defprop-optstr es-index "The Elasticsearch index" [props config-valid configs] - "dewey.es.index" "data") + "dewey.es.index" "data_test1") (cc/defprop-optstr irods-host "The hostname for the iRODS server" From 80c752e4add0f94563d5bf8d971395b38dbb0eea Mon Sep 17 00:00:00 2001 From: sboleyn Date: Wed, 6 Sep 2023 11:07:01 -0700 Subject: [PATCH 04/17] Check whether an error is due to a missing entity or something else in entity-indexed? --- src/dewey/indexing.clj | 139 +++++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 62 deletions(-) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index a5a6f07..1ee78a1 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -1,51 +1,52 @@ (ns dewey.indexing "This is the logic for making changes to search index." - (:require [clojurewerkz.elastisch.query :as es-query] - [clojurewerkz.elastisch.rest.document :as es-doc] - [clojurewerkz.elastisch.rest :as rest] + (:use [slingshot.slingshot :only [try+]]) + (:require [qbits.spandex :as s] [clojure-commons.file-utils :as file] [dewey.doc-prep :as prep] [dewey.config :as cfg] - [dewey.entity :as entity]) - (:import [java.util Map] - [clojure.lang Keyword])) - - -(def ^{:private true} collection-type "folder") -(def ^{:private true} data-object-type "file") - - -(defmulti ^{:private true} mapping-type-of type) - -(defmethod mapping-type-of Map - [entity] - (mapping-type-of (entity/entity-type entity))) - -(defmethod mapping-type-of Keyword - [entity-type] - (case entity-type - :collection collection-type - :data-object data-object-type)) + [dewey.entity :as entity] + [clojure.tools.logging :as log])) (defn- index-doc - [es mapping-type doc] - (es-doc/create es (cfg/es-index) mapping-type doc :id (str (:id doc)))) - +;;This should throw an error message if it cannot connect + [es doc] + (s/request es {:url + [(cfg/es-index) :_doc (str (:id doc))] + :method :put + :headers {"Content-Type" "application/json"} + :body doc})) (defn- update-doc "Scripted updates which are only compatible with Elasticsearch 5.x and greater." [es entity script params] - (rest/post es - (rest/record-update-url es - (cfg/es-index) - (mapping-type-of entity) - (str (entity/id entity))) - {:body {:script {:inline script :lang "painless" :params params}}})) + (s/request es {:url [(cfg/es-index) :_update (str (entity/id entity))] + :method :post + :headers {"Content-Type" "application/json"} + :body {"script" {"source" script "lang" "painless" "params" params}}})) -(defn entity-indexed? - ([es entity] - ^{:doc "Determines whether or not an iRODS entity has been indexed. +(defn entity-type + [entity] + (cond + (string? entity) :string + (map? entity) :map)) + +(defn index-error + [e] + (let [resp (ex-data e)] + (println (:status resp)) + (cond + (= 404 (:status resp)) + (do (log/info (format "Entity %s not found in index %s" entity-id (cfg/es-index))) + false) + :else + (do (log/info (format "Elasticsearch is not responding as expected.")) + (throw e) + nil)))) + +(defmulti + ^{:doc "Determines whether or not an iRODS entity has been indexed. Parameters: es - the elasticsearch connection @@ -53,20 +54,33 @@ Throws: This function can throw an exception if it can't connect to elasticsearch."} - (es-doc/present? es (cfg/es-index) (mapping-type-of entity) (str (entity/id entity)))) + entity-indexed? (fn [_es entity] (entity-type entity))) - ([es entity-type entity-id] - ^{:doc "Determines whether or not an iRODS entity has been indexed. - Parameters: - es - the elasticsearch connection - entity-type - :collection|:data-object - entity-id - the UUID of the entity being checked +(defmethod entity-indexed? :string + [es entity-id] + (try+ + (s/request es {:url [(cfg/es-index) :_doc entity-id] + :method :head}) true + (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo + (index-error e)) - Throws: - This function can throw an exception if it can't connect to elasticsearch."} - (es-doc/present? es (cfg/es-index) (mapping-type-of entity-type) (str entity-id)))) + (catch Exception e + (log/info "Elasticsearch is not responding.") + (throw e)))) + + +(defmethod entity-indexed? :map + [es entity] + (try + (s/request es {:url [(cfg/es-index) :_doc (str (entity/id entity))] + :method :head}) true + (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo + (index-error e)) + (catch Exception e + (log/info "Elasticsearch is not responding.") + (throw e)))) (defn index-collection "Indexes a collection. @@ -86,7 +100,7 @@ (entity/creation-time coll) (entity/modification-time coll) (entity/metadata coll))] - (index-doc es collection-type folder))) + (index-doc es folder))) (defn index-data-object @@ -112,7 +126,7 @@ (entity/metadata obj) (or file-size (entity/size obj)) (or file-type (entity/media-type obj)))] - (index-doc es data-object-type file))) + (index-doc es file))) (defn remove-entity @@ -125,13 +139,13 @@ Throws: This function can throw an exception if it can't connect to elasticsearch." - [es entity-type entity-id] - (when (entity-indexed? es entity-type entity-id) - (es-doc/delete es (cfg/es-index) (mapping-type-of entity-type) (str entity-id)))) - + [es entity-id] + (when (entity-indexed? es (str entity-id)) + (s/request es {:url [(cfg/es-index) :_doc (str entity-id)]}))) +;; START HERE ON FRIDAY (defn remove-entities-like - "Removes iRODS entities from the search index that have a path matching the provide glob. The glob + "Removes iRODS entities from the search index that have a path matching the provided glob. The glob supports * and ? wildcards with their typical meanings. This method uses the Elasticsearch 5.x Delete By Query API, and is not backward compatible with @@ -144,9 +158,10 @@ Throws: This function can throw an exception if it can't connect to elasticsearch." [es path-glob] - (rest/post es - (rest/url-with-path es (cfg/es-index) "_delete_by_query") - {:body {:query (es-query/wildcard :path path-glob)}})) + (s/request es {:url [(cfg/es-index) :_delete_by_query] + :query-string "analyze_wildcard=true" + :method :post + :body {:query {:wildcard {:path path-glob}}}})) ; XXX - I wish I could think of a way to cleanly and simply separate out the document update logic @@ -171,14 +186,14 @@ :label (file/basename path)})) ([es entity path mod-time] - (update-doc es - entity - "ctx._source.path = params.path; + (update-doc es + entity + "ctx._source.path = params.path; ctx._source.label = params.label; - if (params.dateModified > ctx._source.dateModified) { ctx._source.dateModified = params.dateModified };" - {:path path - :label (file/basename path) - :dateModified (prep/format-time mod-time)}))) + if (params.dateModified > ctx._source.dateModified) { ctx._source.dateModified = params.dateModified }" + {:path path + :label (file/basename path) + :dateModified (prep/format-time mod-time)}))) (defn update-acl From 76a2896a6c3b5811e157ce67d5816dd27cb218a8 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Wed, 6 Sep 2023 11:12:32 -0700 Subject: [PATCH 05/17] Remove unneeded comments --- src/dewey/indexing.clj | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index 1ee78a1..fcf40b4 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -10,7 +10,6 @@ (defn- index-doc -;;This should throw an error message if it cannot connect [es doc] (s/request es {:url [(cfg/es-index) :_doc (str (:id doc))] @@ -143,7 +142,6 @@ (when (entity-indexed? es (str entity-id)) (s/request es {:url [(cfg/es-index) :_doc (str entity-id)]}))) -;; START HERE ON FRIDAY (defn remove-entities-like "Removes iRODS entities from the search index that have a path matching the provided glob. The glob supports * and ? wildcards with their typical meanings. From a0b86b2cf75ce3b5a6fa1958378f63862bc82fec Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 7 Sep 2023 12:21:10 -0700 Subject: [PATCH 06/17] Add directories to gitignore relating to Calva extension in VS Code --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index eac0b22..ae7c08a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ build.xml .nrepl-port *.jar !opentelemetry-javaagent.jar +/.clj-kondo +/.lsp/.cache From 9538186aa5bd1a2ef0ce6c3d35d2b505168ca48e Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 7 Sep 2023 12:27:24 -0700 Subject: [PATCH 07/17] Resolved issues relating to arity and type mismatches with legacy code --- src/dewey/curation.clj | 14 ++++++------ src/dewey/indexing.clj | 50 ++++++++++++++++++------------------------ 2 files changed, 28 insertions(+), 36 deletions(-) diff --git a/src/dewey/curation.clj b/src/dewey/curation.clj index 06cf182..4ee4269 100644 --- a/src/dewey/curation.clj +++ b/src/dewey/curation.clj @@ -58,11 +58,11 @@ (log/trace "apply-or-remove " entity-type entity-id " called") (if-let [entity (entity/lookup-entity irods entity-type entity-id)] (when (or (= type :data-object) (indexable? entity)) (op entity)) - (indexing/remove-entity es entity-type entity-id))) + (indexing/remove-entity es entity-id))) (defn- apply-if-indexed - [irods es entity-type entity-id op] - (when (indexing/entity-indexed? es entity-type entity-id) + [irods es entity-id op] + (when (indexing/entity-indexed? es (str entity-id)) (op))) ; This function is recursive and could blow the stack if a collection tree is deep, like 500 or more @@ -176,7 +176,7 @@ reindex (fn [] (if-let [entity (entity/lookup-entity irods :collection id)] (when (indexable? entity) (indexing/update-metadata es entity))))] - (apply-if-indexed irods es :collection id reindex))) + (apply-if-indexed irods es id reindex))) (defn- reindex-coll-dest-metadata-handler [irods es msg] @@ -203,7 +203,7 @@ reindex (fn [] (if-let [entity (entity/lookup-entity irods :data-object id)] (indexing/update-metadata es entity)))] - (apply-if-indexed irods es :data-object id reindex))) + (apply-if-indexed irods es id reindex))) (defn- reindex-obj-dest-metadata-handler @@ -227,13 +227,13 @@ (defn- rm-collection-handler [irods es msg] - (indexing/remove-entity es :collection (extract-entity-id msg)) + (indexing/remove-entity es (extract-entity-id msg)) (update-parent-modify-time irods es (:path msg))) (defn- rm-data-object-handler [irods es msg] - (indexing/remove-entity es :data-object (extract-entity-id msg)) + (indexing/remove-entity es (extract-entity-id msg)) (update-parent-modify-time irods es (:path msg))) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index fcf40b4..6a546ca 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -25,24 +25,22 @@ :headers {"Content-Type" "application/json"} :body {"script" {"source" script "lang" "painless" "params" params}}})) -(defn entity-type +(defn- entity-type [entity] (cond (string? entity) :string (map? entity) :map)) -(defn index-error - [e] - (let [resp (ex-data e)] - (println (:status resp)) - (cond - (= 404 (:status resp)) - (do (log/info (format "Entity %s not found in index %s" entity-id (cfg/es-index))) - false) - :else - (do (log/info (format "Elasticsearch is not responding as expected.")) - (throw e) - nil)))) +(defn- index-error + [e entity-id] + (let [resp (ex-data e)] (cond + (= 404 (:status resp)) + (do (log/info (format "Entity %s not found in index %s" entity-id (cfg/es-index))) + false) + :else + (do (log/info (format "Elasticsearch is not responding as expected.")) + (throw e))))) + (defmulti ^{:doc "Determines whether or not an iRODS entity has been indexed. @@ -60,26 +58,21 @@ [es entity-id] (try+ (s/request es {:url [(cfg/es-index) :_doc entity-id] - :method :head}) true + :method :head}) + true (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo - (index-error e)) - - (catch Exception e - (log/info "Elasticsearch is not responding.") - (throw e)))) + (index-error e entity-id)))) (defmethod entity-indexed? :map [es entity] - (try - (s/request es {:url [(cfg/es-index) :_doc (str (entity/id entity))] - :method :head}) true - (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo - (index-error e)) - - (catch Exception e - (log/info "Elasticsearch is not responding.") - (throw e)))) + (let [entity-id (str (entity/id entity))] + (try+ + (s/request es {:url [(cfg/es-index) :_doc entity-id] + :method :head}) + true + (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo + (index-error e entity-id))))) (defn index-collection "Indexes a collection. @@ -157,7 +150,6 @@ This function can throw an exception if it can't connect to elasticsearch." [es path-glob] (s/request es {:url [(cfg/es-index) :_delete_by_query] - :query-string "analyze_wildcard=true" :method :post :body {:query {:wildcard {:path path-glob}}}})) From ca99ad2177123107c10c3d49ad2a943046f278fa Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 7 Sep 2023 18:08:12 -0700 Subject: [PATCH 08/17] Update metadata location in document --- src/dewey/doc_prep.clj | 4 ++-- src/dewey/indexing.clj | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dewey/doc_prep.clj b/src/dewey/doc_prep.clj index c96ba8c..c1588c4 100644 --- a/src/dewey/doc_prep.clj +++ b/src/dewey/doc_prep.clj @@ -80,7 +80,7 @@ :creator (format-user creator) :dateCreated (format-time date-created) :dateModified (format-time date-modified) - :metadata (format-metadata metadata) + :metadata {:irods (format-metadata metadata)} :fileSize file-size :fileType file-type}) @@ -104,4 +104,4 @@ :creator (format-user creator) :dateCreated (format-time date-created) :dateModified (format-time date-modified) - :metadata (format-metadata metadata)}) + :metadata {:irods (format-metadata metadata)}}) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index 6a546ca..e6e50ca 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -216,8 +216,8 @@ [es entity] (update-doc es entity - "ctx._source.metadata = params.metadata" - {:metadata (prep/format-metadata (entity/metadata entity))})) + "ctx._source.metadata.irods = params.metadata.irods" + {:metadata {:irods (prep/format-metadata (entity/metadata entity))}})) (defn update-collection-modify-time From 31f9ad8ed633744774be0f82e82586a7e41244a7 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Fri, 8 Sep 2023 16:38:08 -0700 Subject: [PATCH 09/17] Add headers to post request in indexing --- src/dewey/indexing.clj | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index e6e50ca..01e9d88 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -9,7 +9,7 @@ [clojure.tools.logging :as log])) -(defn- index-doc +(defn index-doc [es doc] (s/request es {:url [(cfg/es-index) :_doc (str (:id doc))] @@ -35,8 +35,8 @@ [e entity-id] (let [resp (ex-data e)] (cond (= 404 (:status resp)) - (do (log/info (format "Entity %s not found in index %s" entity-id (cfg/es-index))) - false) + + false :else (do (log/info (format "Elasticsearch is not responding as expected.")) (throw e))))) @@ -59,6 +59,7 @@ (try+ (s/request es {:url [(cfg/es-index) :_doc entity-id] :method :head}) + (log/info (format "Entity %s found!!!!!" entity-id)) true (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo (index-error e entity-id)))) @@ -92,6 +93,7 @@ (entity/creation-time coll) (entity/modification-time coll) (entity/metadata coll))] + (log/info "INDEX-COLLECTION CALLED") (index-doc es folder))) @@ -151,6 +153,7 @@ [es path-glob] (s/request es {:url [(cfg/es-index) :_delete_by_query] :method :post + :headers {"Content-Type" "application/json"} :body {:query {:wildcard {:path path-glob}}}})) @@ -174,6 +177,7 @@ ctx._source.label = params.label;" {:path path :label (file/basename path)})) + ([es entity path mod-time] (update-doc es From 83c698abdf8a1a19ffb2c26e9305003e81d2e1b8 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Tue, 12 Sep 2023 16:34:45 -0700 Subject: [PATCH 10/17] Fix comparison by using entity-type instead of type --- src/dewey/curation.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dewey/curation.clj b/src/dewey/curation.clj index 4ee4269..6c3e115 100644 --- a/src/dewey/curation.clj +++ b/src/dewey/curation.clj @@ -57,7 +57,7 @@ [irods es entity-type entity-id op] (log/trace "apply-or-remove " entity-type entity-id " called") (if-let [entity (entity/lookup-entity irods entity-type entity-id)] - (when (or (= type :data-object) (indexable? entity)) (op entity)) + (when (or (= entity-type :data-object) (indexable? entity)) (op entity)) (indexing/remove-entity es entity-id))) (defn- apply-if-indexed From 5c48a65de0a30213f73e7ec1663d8d3062f59770 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Tue, 12 Sep 2023 16:38:51 -0700 Subject: [PATCH 11/17] Clean code by removing unused arguments and unneeded log/info --- src/dewey/indexing.clj | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index 01e9d88..4e4617c 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -32,10 +32,9 @@ (map? entity) :map)) (defn- index-error - [e entity-id] + [e] (let [resp (ex-data e)] (cond (= 404 (:status resp)) - false :else (do (log/info (format "Elasticsearch is not responding as expected.")) @@ -59,10 +58,9 @@ (try+ (s/request es {:url [(cfg/es-index) :_doc entity-id] :method :head}) - (log/info (format "Entity %s found!!!!!" entity-id)) true (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo - (index-error e entity-id)))) + (index-error e)))) (defmethod entity-indexed? :map @@ -73,7 +71,7 @@ :method :head}) true (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo - (index-error e entity-id))))) + (index-error e))))) (defn index-collection "Indexes a collection. @@ -93,7 +91,6 @@ (entity/creation-time coll) (entity/modification-time coll) (entity/metadata coll))] - (log/info "INDEX-COLLECTION CALLED") (index-doc es folder))) From ecc5294e4dad442fd1f5139512df0c29c21ab824 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 14 Sep 2023 12:05:18 -0700 Subject: [PATCH 12/17] Fix missing DELETE method from remove-entity --- src/dewey/indexing.clj | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index 4e4617c..dd997f9 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -35,7 +35,7 @@ [e] (let [resp (ex-data e)] (cond (= 404 (:status resp)) - false + false :else (do (log/info (format "Elasticsearch is not responding as expected.")) (throw e))))) @@ -132,7 +132,8 @@ This function can throw an exception if it can't connect to elasticsearch." [es entity-id] (when (entity-indexed? es (str entity-id)) - (s/request es {:url [(cfg/es-index) :_doc (str entity-id)]}))) + (s/request es {:url [(cfg/es-index) :_doc (str entity-id)] + :method :delete}))) (defn remove-entities-like "Removes iRODS entities from the search index that have a path matching the provided glob. The glob @@ -174,7 +175,7 @@ ctx._source.label = params.label;" {:path path :label (file/basename path)})) - + ([es entity path mod-time] (update-doc es From 0b304a61fdbc76e969c2185a47c563d203d19293 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 14 Sep 2023 12:21:02 -0700 Subject: [PATCH 13/17] Clean up index-error logic --- src/dewey/indexing.clj | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index dd997f9..9ea33d1 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -33,11 +33,10 @@ (defn- index-error [e] - (let [resp (ex-data e)] (cond - (= 404 (:status resp)) + (let [resp (ex-data e)] (if + (= 404 (:status resp)) false - :else - (do (log/info (format "Elasticsearch is not responding as expected.")) + (do (log/info "Elasticsearch is not responding as expected.") (throw e))))) From 7bbefa888ef2919e6d7bc9ad62f75beefdaef5f0 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 14 Sep 2023 12:47:35 -0700 Subject: [PATCH 14/17] Update index-doc to never overwrite an already indexed document --- src/dewey/indexing.clj | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index 9ea33d1..8a16e22 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -10,9 +10,18 @@ (defn index-doc + "Indexes a document + + Parameters: + es - the elasticsearch connection + doc - the document to index + + Throws: + This function can throw an exception if it can't connect to elasticsearch or iRODS. The + function can also throw one if the document is already indexed." [es doc] (s/request es {:url - [(cfg/es-index) :_doc (str (:id doc))] + [(cfg/es-index) :_create (str (:id doc))] :method :put :headers {"Content-Type" "application/json"} :body doc})) From b50fc0a2e0a720b80881e333e63b00b2fec83a54 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 14 Sep 2023 12:49:48 -0700 Subject: [PATCH 15/17] Reconfigure elastic uri and index name --- src/dewey/config.clj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dewey/config.clj b/src/dewey/config.clj index 69b1d48..00abdd8 100644 --- a/src/dewey/config.clj +++ b/src/dewey/config.clj @@ -70,7 +70,7 @@ (cc/defprop-optstr es-uri "The hostname for the Elasticsearch server" [props config-valid configs] - "dewey.es.uri" "https://localhost:9200") + "dewey.es.uri" "http://elasticsearch:9200") (cc/defprop-optstr es-user "The username for the Elasticsearch server" @@ -85,7 +85,7 @@ (cc/defprop-optstr es-index "The Elasticsearch index" [props config-valid configs] - "dewey.es.index" "data_test1") + "dewey.es.index" "data") (cc/defprop-optstr irods-host "The hostname for the iRODS server" From 011d812048914876acbff48c4e1bfd84449c182f Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 14 Sep 2023 12:51:05 -0700 Subject: [PATCH 16/17] Update src/dewey/curation.clj Co-authored-by: Ian McEwen --- src/dewey/curation.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dewey/curation.clj b/src/dewey/curation.clj index 6c3e115..6f03671 100644 --- a/src/dewey/curation.clj +++ b/src/dewey/curation.clj @@ -57,7 +57,7 @@ [irods es entity-type entity-id op] (log/trace "apply-or-remove " entity-type entity-id " called") (if-let [entity (entity/lookup-entity irods entity-type entity-id)] - (when (or (= entity-type :data-object) (indexable? entity)) (op entity)) + (when (indexable? entity) (op entity)) (indexing/remove-entity es entity-id))) (defn- apply-if-indexed From 306f5477851bc754fab804ac084409263f4fb987 Mon Sep 17 00:00:00 2001 From: sboleyn Date: Thu, 14 Sep 2023 12:59:46 -0700 Subject: [PATCH 17/17] Move string vs. map logic into anonymous function --- src/dewey/indexing.clj | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index 8a16e22..96086e0 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -34,12 +34,6 @@ :headers {"Content-Type" "application/json"} :body {"script" {"source" script "lang" "painless" "params" params}}})) -(defn- entity-type - [entity] - (cond - (string? entity) :string - (map? entity) :map)) - (defn- index-error [e] (let [resp (ex-data e)] (if @@ -58,7 +52,9 @@ Throws: This function can throw an exception if it can't connect to elasticsearch."} - entity-indexed? (fn [_es entity] (entity-type entity))) + entity-indexed? (fn [_es entity] (cond + (string? entity) :string + (map? entity) :map))) (defmethod entity-indexed? :string