Skip to content

Commit

Permalink
impl querying, configgable timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
lgessler committed Mar 25, 2022
1 parent 8dc30d7 commit 26d7b1a
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 11 deletions.
6 changes: 5 additions & 1 deletion docs/book.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ java -jar conllu-rest.jar token revoke --secret "gold;secret=84EO60tU6lhcBhplbuE
```

== NLP Services
qwe
See https://github.com/lgessler/conllu-rest/blob/master/services/sample_xpos.py[`services/sample_xpos.py`] for a sample service.
Only XPOS, UPOS, and HEAD are currently supported.

== Configuration
By default, the uberjar will use its copy of the config located at https://github.com/lgessler/conllu-rest/blob/master/env/dev/resources/config.edn[`env/prod/resources/config.edn`].
Expand All @@ -108,6 +109,9 @@ Config keys:
|`:nlp-services`
| A vector of three-key maps. Each map should have a `:type` (currently always `:http`), a `:anno-type` (must be `:xpos`, `:upos`, or `:head`), and a url (must be pointed at a running <<NLP Services>>)

|`:nlp-retry-wait-period-ms`
| Time, in milliseconds, to wait after a failure before attempting to contact an HTTP NLP service again. Defaults to `10000` (10 seconds).

|`:port`
| Port used for the main web server. Must be a number.

Expand Down
88 changes: 79 additions & 9 deletions src/conllu_rest/routes/conllu/document.clj
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,96 @@
[conllu-rest.xtdb.queries.diff :as diff]
[conllu-rest.xtdb.serialization :as serial]
[spec-tools.data-spec :as ds]
[xtdb.api :as xt]))
[xtdb.api :as xt]
[xtdb.query]))

(def ^:private sort-map
{"name-inc" '[?dn :asc]
"name-dec" '[?dn :desc]
"xpos-gold-inc" '[(count-contents ?xpos-gold) :asc]
"xpos-gold-dec" '[(count-contents ?xpos-gold) :desc]
"upos-gold-inc" '[(count-contents ?upos-gold) :asc]
"upos-gold-dec" '[(count-contents ?upos-gold) :desc]
"head-gold-inc" '[(count-contents ?head-gold) :asc]
"head-gold-dec" '[(count-contents ?head-gold) :desc]})

(defmethod xtdb.query/aggregate 'count-contents [_]
(fn
([] 0)
([acc] acc)
([acc x]
(cond (nil? x) acc
(coll? x) (+ acc (count x))
:else (+ acc 1)))))

(defn document-query [{:keys [node] :as req}]
(let [{:keys [limit offset]} (-> req :parameters :query)]
(let [offset (or (and (int? offset) (>= offset 0) offset) 0)]
(let [{:keys [limit offset order-by]} (-> req :parameters :query)]
(let [offset (or (and (int? offset) (>= offset 0) offset) 0)
sort-set (-> sort-map keys set)]
(cond (not (and (some? limit) (int? limit) (<= limit 100) (> limit 0)))
(bad-request (str "Limit must be an int between 1 and 100, but got " limit))

(not (and (int? offset) (>= offset 0)))
(bad-request (str "Offset must be a non-negative integer: " offset))

(not (sort-set order-by))
(bad-request (str "order-by parameter must be one of the following: " sort-set))

:else
(let [query {:find '[?d ?dn]
(let [query {:find '[?d ?dn (count ?s) (count-contents ?t)
(count-contents ?xpos-gold)
(count-contents ?upos-gold)
(count-contents ?head-gold)]
:where '[[?d :document/id]
[?d :document/name ?dn]]
:order-by '[[?dn :desc]]
[?d :document/name ?dn]
[?d :document/sentences ?s]
;; subquery: find tokens
[(q {:find [?t],
:where [[?s :sentence/tokens ?t]]
:in [?s]} ?s)
?t]

;; subquery: find amount of gold xpos
[(q {:find [?xpos]
:where [[?s :sentence/tokens ?t]
[?t :token/xpos ?xpos]
[?xpos :xpos/quality "gold"]]
:in [?s]}
?s)
?xpos-gold]

;; subquery: find amount of gold upos
[(q {:find [?upos]
:where [[?d :document/sentences ?s]
[?s :sentence/tokens ?t]
[?t :token/upos ?upos]
[?upos :upos/quality "gold"]]
:in [?d]}
?d)
?upos-gold]

;; subquery: find amount of gold head
[(q {:find [?head]
:where [[?d :document/sentences ?s]
[?s :sentence/tokens ?t]
[?t :token/head ?head]
[?head :head/quality "gold"]]
:in [?d]}
?d)
?head-gold]]
:order-by [(sort-map order-by)]
:limit limit
:offset offset}
count-query {:find '[(count ?d)]
:where '[[?d :document/id]]}]
(ok {:docs (mapv (fn [[id name]] {:id id :name name})
(ok {:docs (mapv (fn [[id name scount tcount xpos-gold upos-gold head-gold :as vals]]
{:id id
:name name
:sentence_count scount
:token_count tcount
:xpos_gold_rate (/ xpos-gold tcount)
:upos_gold_rate (/ upos-gold tcount)
:head_gold_rate (/ head-gold tcount)})
(xt/q (xt/db node) query))
:total (ffirst (xt/q (xt/db node) count-query))}))))))

Expand Down Expand Up @@ -69,8 +138,9 @@
["/document"
[""
{:get {:summary "Fetch a page's worth of docs (at \"docs\") and a total count of docs (at \"total\")"
:parameters {:query {:offset int?
:limit int?}}
:parameters {:query {:offset int?
:limit int?
:order-by (ds/maybe (s/spec (-> sort-map keys set)))}}
:handler document-query}}]
["/id/:id"
{:get {:summary (str "Produce representation of a document. Use \"format\" query param to get "
Expand Down
3 changes: 2 additions & 1 deletion src/conllu_rest/server/nlp/http.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
[cheshire.core :as json]
[conllu-rest.xtdb.serialization :as serialization]
[conllu-rest.xtdb.easy :as cxe]
[conllu-rest.server.config :refer [env]]
[conllu-rest.server.nlp.common :refer [SentenceLevelProbDistProvider complete-job get-sentence-ids-to-process]]
[conllu-rest.xtdb.queries :as cxq]
[xtdb.api :as xt]
Expand Down Expand Up @@ -61,7 +62,7 @@
(throw (ex-info "Invalid probas key:" {:key key})))
(-write-probas node key token-probas-pairs))

(def ^:dynamic *retry-wait-period* 10000)
(def ^:dynamic *retry-wait-period* (or (:nlp-retry-wait-period-ms env) 10000))
(defn get-probas
"Attempt to contact an NLP service, defensively dealing with request failures and bad data"
[node url anno-type sentence-id]
Expand Down

0 comments on commit 26d7b1a

Please sign in to comment.