diff --git a/.gitignore b/.gitignore index eac0b22..ae7c08a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ build.xml .nrepl-port *.jar !opentelemetry-javaagent.jar +/.clj-kondo +/.lsp/.cache diff --git a/project.clj b/project.clj index 379a5a9..9f10d98 100644 --- a/project.clj +++ b/project.clj @@ -25,7 +25,6 @@ [com.fasterxml.jackson.core/jackson-annotations] [com.fasterxml.jackson.core/jackson-databind] [com.fasterxml.jackson.core/jackson-core]]] - [clojurewerkz/elastisch "2.2.1"] [com.novemberain/langohr "3.5.1"] [liberator "0.15.3"] [compojure "1.1.8"] @@ -39,7 +38,9 @@ [org.cyverse/service-logging "2.8.2"] [net.logstash.logback/logstash-logback-encoder "4.11"] [org.cyverse/event-messages "0.0.1"] - [me.raynes/fs "1.4.6"]] + [me.raynes/fs "1.4.6"] + [cc.qbits/spandex "0.7.11"] + [org.apache.httpcomponents/httpcore "4.4.11"] ] :eastwood {:exclude-namespaces [:test-paths] :linters [:wrong-arity :wrong-ns-form :wrong-pre-post :wrong-tag :misplaced-docstrings]} :plugins [[test2junit "1.1.3"] diff --git a/src/dewey/config.clj b/src/dewey/config.clj index 5dc0c9b..00abdd8 100644 --- a/src/dewey/config.clj +++ b/src/dewey/config.clj @@ -75,12 +75,12 @@ (cc/defprop-optstr es-user "The username for the Elasticsearch server" [props config-valid configs] - "dewey.es.username" nil) + "dewey.es.username" "admin") (cc/defprop-optstr es-password "The password for the Elasticsearch server" [props config-valid configs] - "dewey.es.password" nil) + "dewey.es.password" "admin") (cc/defprop-optstr es-index "The Elasticsearch index" diff --git a/src/dewey/core.clj b/src/dewey/core.clj index a78bbb9..7b94fe9 100644 --- a/src/dewey/core.clj +++ b/src/dewey/core.clj @@ -3,7 +3,7 @@ (:use [slingshot.slingshot :only [try+ throw+]]) (:require [clojure.tools.cli :as cli] [clojure.tools.logging :as log] - [clojurewerkz.elastisch.rest :as es] + [qbits.spandex :as s] [clj-jargon.init :as irods] [clojure-commons.config :as config] [dewey.amq :as amq] @@ -21,16 +21,16 @@ (defn- init-es "Establishes a connection to elasticsearch" [] - (let [url (URL. (cfg/es-uri)) - http-opts (if (or (empty? (cfg/es-user)) (empty? (cfg/es-password))) - {} - {:basic-auth [(cfg/es-user) (cfg/es-password)] - :content-type :application/json}) - conn (try - (es/connect (str url) http-opts) - (catch Exception e - (log/debug e) - nil))] + (let [url (URL. (cfg/es-uri)) + host-map {:hosts [(str url)]} + opts (if (or (empty? (cfg/es-user)) (empty? (cfg/es-password))) + host-map + (merge host-map {:http-client {:basic-auth + {:user (cfg/es-user) + :password (cfg/es-password)}}})) + conn (try + (s/client opts) + (catch Exception e (log/debug e) nil))] (if conn (do (log/info (format "Successfully connected to Elasticsearch: %s" url)) diff --git a/src/dewey/curation.clj b/src/dewey/curation.clj index 06cf182..6f03671 100644 --- a/src/dewey/curation.clj +++ b/src/dewey/curation.clj @@ -57,12 +57,12 @@ [irods es entity-type entity-id op] (log/trace "apply-or-remove " entity-type entity-id " called") (if-let [entity (entity/lookup-entity irods entity-type entity-id)] - (when (or (= type :data-object) (indexable? entity)) (op entity)) - (indexing/remove-entity es entity-type entity-id))) + (when (indexable? entity) (op entity)) + (indexing/remove-entity es entity-id))) (defn- apply-if-indexed - [irods es entity-type entity-id op] - (when (indexing/entity-indexed? es entity-type entity-id) + [irods es entity-id op] + (when (indexing/entity-indexed? es (str entity-id)) (op))) ; This function is recursive and could blow the stack if a collection tree is deep, like 500 or more @@ -176,7 +176,7 @@ reindex (fn [] (if-let [entity (entity/lookup-entity irods :collection id)] (when (indexable? entity) (indexing/update-metadata es entity))))] - (apply-if-indexed irods es :collection id reindex))) + (apply-if-indexed irods es id reindex))) (defn- reindex-coll-dest-metadata-handler [irods es msg] @@ -203,7 +203,7 @@ reindex (fn [] (if-let [entity (entity/lookup-entity irods :data-object id)] (indexing/update-metadata es entity)))] - (apply-if-indexed irods es :data-object id reindex))) + (apply-if-indexed irods es id reindex))) (defn- reindex-obj-dest-metadata-handler @@ -227,13 +227,13 @@ (defn- rm-collection-handler [irods es msg] - (indexing/remove-entity es :collection (extract-entity-id msg)) + (indexing/remove-entity es (extract-entity-id msg)) (update-parent-modify-time irods es (:path msg))) (defn- rm-data-object-handler [irods es msg] - (indexing/remove-entity es :data-object (extract-entity-id msg)) + (indexing/remove-entity es (extract-entity-id msg)) (update-parent-modify-time irods es (:path msg))) diff --git a/src/dewey/doc_prep.clj b/src/dewey/doc_prep.clj index c96ba8c..c1588c4 100644 --- a/src/dewey/doc_prep.clj +++ b/src/dewey/doc_prep.clj @@ -80,7 +80,7 @@ :creator (format-user creator) :dateCreated (format-time date-created) :dateModified (format-time date-modified) - :metadata (format-metadata metadata) + :metadata {:irods (format-metadata metadata)} :fileSize file-size :fileType file-type}) @@ -104,4 +104,4 @@ :creator (format-user creator) :dateCreated (format-time date-created) :dateModified (format-time date-modified) - :metadata (format-metadata metadata)}) + :metadata {:irods (format-metadata metadata)}}) diff --git a/src/dewey/indexing.clj b/src/dewey/indexing.clj index a5a6f07..96086e0 100644 --- a/src/dewey/indexing.clj +++ b/src/dewey/indexing.clj @@ -1,51 +1,50 @@ (ns dewey.indexing "This is the logic for making changes to search index." - (:require [clojurewerkz.elastisch.query :as es-query] - [clojurewerkz.elastisch.rest.document :as es-doc] - [clojurewerkz.elastisch.rest :as rest] + (:use [slingshot.slingshot :only [try+]]) + (:require [qbits.spandex :as s] [clojure-commons.file-utils :as file] [dewey.doc-prep :as prep] [dewey.config :as cfg] - [dewey.entity :as entity]) - (:import [java.util Map] - [clojure.lang Keyword])) + [dewey.entity :as entity] + [clojure.tools.logging :as log])) -(def ^{:private true} collection-type "folder") -(def ^{:private true} data-object-type "file") +(defn index-doc + "Indexes a document + Parameters: + es - the elasticsearch connection + doc - the document to index -(defmulti ^{:private true} mapping-type-of type) - -(defmethod mapping-type-of Map - [entity] - (mapping-type-of (entity/entity-type entity))) - -(defmethod mapping-type-of Keyword - [entity-type] - (case entity-type - :collection collection-type - :data-object data-object-type)) - - -(defn- index-doc - [es mapping-type doc] - (es-doc/create es (cfg/es-index) mapping-type doc :id (str (:id doc)))) - + Throws: + This function can throw an exception if it can't connect to elasticsearch or iRODS. The + function can also throw one if the document is already indexed." + [es doc] + (s/request es {:url + [(cfg/es-index) :_create (str (:id doc))] + :method :put + :headers {"Content-Type" "application/json"} + :body doc})) (defn- update-doc "Scripted updates which are only compatible with Elasticsearch 5.x and greater." [es entity script params] - (rest/post es - (rest/record-update-url es - (cfg/es-index) - (mapping-type-of entity) - (str (entity/id entity))) - {:body {:script {:inline script :lang "painless" :params params}}})) + (s/request es {:url [(cfg/es-index) :_update (str (entity/id entity))] + :method :post + :headers {"Content-Type" "application/json"} + :body {"script" {"source" script "lang" "painless" "params" params}}})) + +(defn- index-error + [e] + (let [resp (ex-data e)] (if + (= 404 (:status resp)) + false + (do (log/info "Elasticsearch is not responding as expected.") + (throw e))))) -(defn entity-indexed? - ([es entity] - ^{:doc "Determines whether or not an iRODS entity has been indexed. + +(defmulti + ^{:doc "Determines whether or not an iRODS entity has been indexed. Parameters: es - the elasticsearch connection @@ -53,20 +52,30 @@ Throws: This function can throw an exception if it can't connect to elasticsearch."} - (es-doc/present? es (cfg/es-index) (mapping-type-of entity) (str (entity/id entity)))) + entity-indexed? (fn [_es entity] (cond + (string? entity) :string + (map? entity) :map))) - ([es entity-type entity-id] - ^{:doc "Determines whether or not an iRODS entity has been indexed. - Parameters: - es - the elasticsearch connection - entity-type - :collection|:data-object - entity-id - the UUID of the entity being checked +(defmethod entity-indexed? :string + [es entity-id] + (try+ + (s/request es {:url [(cfg/es-index) :_doc entity-id] + :method :head}) + true + (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo + (index-error e)))) - Throws: - This function can throw an exception if it can't connect to elasticsearch."} - (es-doc/present? es (cfg/es-index) (mapping-type-of entity-type) (str entity-id)))) +(defmethod entity-indexed? :map + [es entity] + (let [entity-id (str (entity/id entity))] + (try+ + (s/request es {:url [(cfg/es-index) :_doc entity-id] + :method :head}) + true + (catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo + (index-error e))))) (defn index-collection "Indexes a collection. @@ -86,7 +95,7 @@ (entity/creation-time coll) (entity/modification-time coll) (entity/metadata coll))] - (index-doc es collection-type folder))) + (index-doc es folder))) (defn index-data-object @@ -112,7 +121,7 @@ (entity/metadata obj) (or file-size (entity/size obj)) (or file-type (entity/media-type obj)))] - (index-doc es data-object-type file))) + (index-doc es file))) (defn remove-entity @@ -125,13 +134,13 @@ Throws: This function can throw an exception if it can't connect to elasticsearch." - [es entity-type entity-id] - (when (entity-indexed? es entity-type entity-id) - (es-doc/delete es (cfg/es-index) (mapping-type-of entity-type) (str entity-id)))) - + [es entity-id] + (when (entity-indexed? es (str entity-id)) + (s/request es {:url [(cfg/es-index) :_doc (str entity-id)] + :method :delete}))) (defn remove-entities-like - "Removes iRODS entities from the search index that have a path matching the provide glob. The glob + "Removes iRODS entities from the search index that have a path matching the provided glob. The glob supports * and ? wildcards with their typical meanings. This method uses the Elasticsearch 5.x Delete By Query API, and is not backward compatible with @@ -144,9 +153,10 @@ Throws: This function can throw an exception if it can't connect to elasticsearch." [es path-glob] - (rest/post es - (rest/url-with-path es (cfg/es-index) "_delete_by_query") - {:body {:query (es-query/wildcard :path path-glob)}})) + (s/request es {:url [(cfg/es-index) :_delete_by_query] + :method :post + :headers {"Content-Type" "application/json"} + :body {:query {:wildcard {:path path-glob}}}})) ; XXX - I wish I could think of a way to cleanly and simply separate out the document update logic @@ -170,15 +180,16 @@ {:path path :label (file/basename path)})) + ([es entity path mod-time] - (update-doc es - entity - "ctx._source.path = params.path; + (update-doc es + entity + "ctx._source.path = params.path; ctx._source.label = params.label; - if (params.dateModified > ctx._source.dateModified) { ctx._source.dateModified = params.dateModified };" - {:path path - :label (file/basename path) - :dateModified (prep/format-time mod-time)}))) + if (params.dateModified > ctx._source.dateModified) { ctx._source.dateModified = params.dateModified }" + {:path path + :label (file/basename path) + :dateModified (prep/format-time mod-time)}))) (defn update-acl @@ -211,8 +222,8 @@ [es entity] (update-doc es entity - "ctx._source.metadata = params.metadata" - {:metadata (prep/format-metadata (entity/metadata entity))})) + "ctx._source.metadata.irods = params.metadata.irods" + {:metadata {:irods (prep/format-metadata (entity/metadata entity))}})) (defn update-collection-modify-time