Skip to content

Commit

Permalink
Merge pull request #14 from sboleyn/os-migration
Browse files Browse the repository at this point in the history
Opensearch migration
  • Loading branch information
ianmcorvidae authored Sep 18, 2023
2 parents 3fe7575 + 306f547 commit 163f9c3
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 88 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ build.xml
.nrepl-port
*.jar
!opentelemetry-javaagent.jar
/.clj-kondo
/.lsp/.cache
5 changes: 3 additions & 2 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
[com.fasterxml.jackson.core/jackson-annotations]
[com.fasterxml.jackson.core/jackson-databind]
[com.fasterxml.jackson.core/jackson-core]]]
[clojurewerkz/elastisch "2.2.1"]
[com.novemberain/langohr "3.5.1"]
[liberator "0.15.3"]
[compojure "1.1.8"]
Expand All @@ -39,7 +38,9 @@
[org.cyverse/service-logging "2.8.2"]
[net.logstash.logback/logstash-logback-encoder "4.11"]
[org.cyverse/event-messages "0.0.1"]
[me.raynes/fs "1.4.6"]]
[me.raynes/fs "1.4.6"]
[cc.qbits/spandex "0.7.11"]
[org.apache.httpcomponents/httpcore "4.4.11"] ]
:eastwood {:exclude-namespaces [:test-paths]
:linters [:wrong-arity :wrong-ns-form :wrong-pre-post :wrong-tag :misplaced-docstrings]}
:plugins [[test2junit "1.1.3"]
Expand Down
4 changes: 2 additions & 2 deletions src/dewey/config.clj
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@
(cc/defprop-optstr es-user
"The username for the Elasticsearch server"
[props config-valid configs]
"dewey.es.username" nil)
"dewey.es.username" "admin")

(cc/defprop-optstr es-password
"The password for the Elasticsearch server"
[props config-valid configs]
"dewey.es.password" nil)
"dewey.es.password" "admin")

(cc/defprop-optstr es-index
"The Elasticsearch index"
Expand Down
22 changes: 11 additions & 11 deletions src/dewey/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
(:use [slingshot.slingshot :only [try+ throw+]])
(:require [clojure.tools.cli :as cli]
[clojure.tools.logging :as log]
[clojurewerkz.elastisch.rest :as es]
[qbits.spandex :as s]
[clj-jargon.init :as irods]
[clojure-commons.config :as config]
[dewey.amq :as amq]
Expand All @@ -21,16 +21,16 @@
(defn- init-es
"Establishes a connection to elasticsearch"
[]
(let [url (URL. (cfg/es-uri))
http-opts (if (or (empty? (cfg/es-user)) (empty? (cfg/es-password)))
{}
{:basic-auth [(cfg/es-user) (cfg/es-password)]
:content-type :application/json})
conn (try
(es/connect (str url) http-opts)
(catch Exception e
(log/debug e)
nil))]
(let [url (URL. (cfg/es-uri))
host-map {:hosts [(str url)]}
opts (if (or (empty? (cfg/es-user)) (empty? (cfg/es-password)))
host-map
(merge host-map {:http-client {:basic-auth
{:user (cfg/es-user)
:password (cfg/es-password)}}}))
conn (try
(s/client opts)
(catch Exception e (log/debug e) nil))]
(if conn
(do
(log/info (format "Successfully connected to Elasticsearch: %s" url))
Expand Down
16 changes: 8 additions & 8 deletions src/dewey/curation.clj
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@
[irods es entity-type entity-id op]
(log/trace "apply-or-remove <irods> <es>" entity-type entity-id "<op> called")
(if-let [entity (entity/lookup-entity irods entity-type entity-id)]
(when (or (= type :data-object) (indexable? entity)) (op entity))
(indexing/remove-entity es entity-type entity-id)))
(when (indexable? entity) (op entity))
(indexing/remove-entity es entity-id)))

(defn- apply-if-indexed
[irods es entity-type entity-id op]
(when (indexing/entity-indexed? es entity-type entity-id)
[irods es entity-id op]
(when (indexing/entity-indexed? es (str entity-id))
(op)))

; This function is recursive and could blow the stack if a collection tree is deep, like 500 or more
Expand Down Expand Up @@ -176,7 +176,7 @@
reindex (fn []
(if-let [entity (entity/lookup-entity irods :collection id)]
(when (indexable? entity) (indexing/update-metadata es entity))))]
(apply-if-indexed irods es :collection id reindex)))
(apply-if-indexed irods es id reindex)))

(defn- reindex-coll-dest-metadata-handler
[irods es msg]
Expand All @@ -203,7 +203,7 @@
reindex (fn []
(if-let [entity (entity/lookup-entity irods :data-object id)]
(indexing/update-metadata es entity)))]
(apply-if-indexed irods es :data-object id reindex)))
(apply-if-indexed irods es id reindex)))


(defn- reindex-obj-dest-metadata-handler
Expand All @@ -227,13 +227,13 @@

(defn- rm-collection-handler
[irods es msg]
(indexing/remove-entity es :collection (extract-entity-id msg))
(indexing/remove-entity es (extract-entity-id msg))
(update-parent-modify-time irods es (:path msg)))


(defn- rm-data-object-handler
[irods es msg]
(indexing/remove-entity es :data-object (extract-entity-id msg))
(indexing/remove-entity es (extract-entity-id msg))
(update-parent-modify-time irods es (:path msg)))


Expand Down
4 changes: 2 additions & 2 deletions src/dewey/doc_prep.clj
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
:creator (format-user creator)
:dateCreated (format-time date-created)
:dateModified (format-time date-modified)
:metadata (format-metadata metadata)
:metadata {:irods (format-metadata metadata)}
:fileSize file-size
:fileType file-type})

Expand All @@ -104,4 +104,4 @@
:creator (format-user creator)
:dateCreated (format-time date-created)
:dateModified (format-time date-modified)
:metadata (format-metadata metadata)})
:metadata {:irods (format-metadata metadata)}})
137 changes: 74 additions & 63 deletions src/dewey/indexing.clj
Original file line number Diff line number Diff line change
@@ -1,72 +1,81 @@
(ns dewey.indexing
"This is the logic for making changes to search index."
(:require [clojurewerkz.elastisch.query :as es-query]
[clojurewerkz.elastisch.rest.document :as es-doc]
[clojurewerkz.elastisch.rest :as rest]
(:use [slingshot.slingshot :only [try+]])
(:require [qbits.spandex :as s]
[clojure-commons.file-utils :as file]
[dewey.doc-prep :as prep]
[dewey.config :as cfg]
[dewey.entity :as entity])
(:import [java.util Map]
[clojure.lang Keyword]))
[dewey.entity :as entity]
[clojure.tools.logging :as log]))


(def ^{:private true} collection-type "folder")
(def ^{:private true} data-object-type "file")
(defn index-doc
"Indexes a document
Parameters:
es - the elasticsearch connection
doc - the document to index
(defmulti ^{:private true} mapping-type-of type)

(defmethod mapping-type-of Map
[entity]
(mapping-type-of (entity/entity-type entity)))

(defmethod mapping-type-of Keyword
[entity-type]
(case entity-type
:collection collection-type
:data-object data-object-type))


(defn- index-doc
[es mapping-type doc]
(es-doc/create es (cfg/es-index) mapping-type doc :id (str (:id doc))))

Throws:
This function can throw an exception if it can't connect to elasticsearch or iRODS. The
function can also throw one if the document is already indexed."
[es doc]
(s/request es {:url
[(cfg/es-index) :_create (str (:id doc))]
:method :put
:headers {"Content-Type" "application/json"}
:body doc}))

(defn- update-doc
"Scripted updates which are only compatible with Elasticsearch 5.x and greater."
[es entity script params]
(rest/post es
(rest/record-update-url es
(cfg/es-index)
(mapping-type-of entity)
(str (entity/id entity)))
{:body {:script {:inline script :lang "painless" :params params}}}))
(s/request es {:url [(cfg/es-index) :_update (str (entity/id entity))]
:method :post
:headers {"Content-Type" "application/json"}
:body {"script" {"source" script "lang" "painless" "params" params}}}))

(defn- index-error
[e]
(let [resp (ex-data e)] (if
(= 404 (:status resp))
false
(do (log/info "Elasticsearch is not responding as expected.")
(throw e)))))

(defn entity-indexed?
([es entity]
^{:doc "Determines whether or not an iRODS entity has been indexed.

(defmulti
^{:doc "Determines whether or not an iRODS entity has been indexed.
Parameters:
es - the elasticsearch connection
entity - the entity being checked
Throws:
This function can throw an exception if it can't connect to elasticsearch."}
(es-doc/present? es (cfg/es-index) (mapping-type-of entity) (str (entity/id entity))))
entity-indexed? (fn [_es entity] (cond
(string? entity) :string
(map? entity) :map)))

([es entity-type entity-id]
^{:doc "Determines whether or not an iRODS entity has been indexed.

Parameters:
es - the elasticsearch connection
entity-type - :collection|:data-object
entity-id - the UUID of the entity being checked
(defmethod entity-indexed? :string
[es entity-id]
(try+
(s/request es {:url [(cfg/es-index) :_doc entity-id]
:method :head})
true
(catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo
(index-error e))))

Throws:
This function can throw an exception if it can't connect to elasticsearch."}
(es-doc/present? es (cfg/es-index) (mapping-type-of entity-type) (str entity-id))))

(defmethod entity-indexed? :map
[es entity]
(let [entity-id (str (entity/id entity))]
(try+
(s/request es {:url [(cfg/es-index) :_doc entity-id]
:method :head})
true
(catch clojure.lang.ExceptionInfo e ;;qbits.spandex.ResponseException is wrapped in clojure.lang.ExceptionInfo
(index-error e)))))

(defn index-collection
"Indexes a collection.
Expand All @@ -86,7 +95,7 @@
(entity/creation-time coll)
(entity/modification-time coll)
(entity/metadata coll))]
(index-doc es collection-type folder)))
(index-doc es folder)))


(defn index-data-object
Expand All @@ -112,7 +121,7 @@
(entity/metadata obj)
(or file-size (entity/size obj))
(or file-type (entity/media-type obj)))]
(index-doc es data-object-type file)))
(index-doc es file)))


(defn remove-entity
Expand All @@ -125,13 +134,13 @@
Throws:
This function can throw an exception if it can't connect to elasticsearch."
[es entity-type entity-id]
(when (entity-indexed? es entity-type entity-id)
(es-doc/delete es (cfg/es-index) (mapping-type-of entity-type) (str entity-id))))

[es entity-id]
(when (entity-indexed? es (str entity-id))
(s/request es {:url [(cfg/es-index) :_doc (str entity-id)]
:method :delete})))

(defn remove-entities-like
"Removes iRODS entities from the search index that have a path matching the provide glob. The glob
"Removes iRODS entities from the search index that have a path matching the provided glob. The glob
supports * and ? wildcards with their typical meanings.
This method uses the Elasticsearch 5.x Delete By Query API, and is not backward compatible with
Expand All @@ -144,9 +153,10 @@
Throws:
This function can throw an exception if it can't connect to elasticsearch."
[es path-glob]
(rest/post es
(rest/url-with-path es (cfg/es-index) "_delete_by_query")
{:body {:query (es-query/wildcard :path path-glob)}}))
(s/request es {:url [(cfg/es-index) :_delete_by_query]
:method :post
:headers {"Content-Type" "application/json"}
:body {:query {:wildcard {:path path-glob}}}}))


; XXX - I wish I could think of a way to cleanly and simply separate out the document update logic
Expand All @@ -170,15 +180,16 @@
{:path path
:label (file/basename path)}))


([es entity path mod-time]
(update-doc es
entity
"ctx._source.path = params.path;
(update-doc es
entity
"ctx._source.path = params.path;
ctx._source.label = params.label;
if (params.dateModified > ctx._source.dateModified) { ctx._source.dateModified = params.dateModified };"
{:path path
:label (file/basename path)
:dateModified (prep/format-time mod-time)})))
if (params.dateModified > ctx._source.dateModified) { ctx._source.dateModified = params.dateModified }"
{:path path
:label (file/basename path)
:dateModified (prep/format-time mod-time)})))


(defn update-acl
Expand Down Expand Up @@ -211,8 +222,8 @@
[es entity]
(update-doc es
entity
"ctx._source.metadata = params.metadata"
{:metadata (prep/format-metadata (entity/metadata entity))}))
"ctx._source.metadata.irods = params.metadata.irods"
{:metadata {:irods (prep/format-metadata (entity/metadata entity))}}))


(defn update-collection-modify-time
Expand Down

0 comments on commit 163f9c3

Please sign in to comment.