From 4eccc6cc7ab57f697847497f232e409ce541ad42 Mon Sep 17 00:00:00 2001 From: Kaiwen He Date: Mon, 10 Jun 2024 10:42:36 -0500 Subject: [PATCH] added auto-grow example; move auto-grow to neo-utils/neo-helpers-without-db.rkt; fixed two-hop query procedures --- .../neo/Kaiwen/auto-grow-example.rkt | 77 +++++++++++++++++++ .../query-low-level-multi-db.rkt | 24 +++--- .../neo/neo-server/neo-server-utils.rkt | 19 ----- medikanren2/neo/neo-server/neo-server.rkt | 2 +- .../neo/neo-utils/neo-helpers-multi-db.rkt | 2 + .../neo/neo-utils/neo-helpers-without-db.rkt | 23 +++++- 6 files changed, 115 insertions(+), 32 deletions(-) create mode 100644 contrib/medikanren2/neo/Kaiwen/auto-grow-example.rkt diff --git a/contrib/medikanren2/neo/Kaiwen/auto-grow-example.rkt b/contrib/medikanren2/neo/Kaiwen/auto-grow-example.rkt new file mode 100644 index 00000000..cee1b781 --- /dev/null +++ b/contrib/medikanren2/neo/Kaiwen/auto-grow-example.rkt @@ -0,0 +1,77 @@ +#lang racket/base + +(require + "../../../../medikanren2/neo/neo-low-level/query-low-level-multi-db.rkt" + "../../../../medikanren2/neo/neo-utils/neo-helpers-multi-db.rkt" + racket/set) + +;; Numbers of the top bucket of the RoboKop KG, Text Mining KG, and RTX-KG2 KG. +(define TOP_BUCKET_NUMBERS (list (list (get-highest-bucket-number-robokop)) + (list (get-highest-bucket-number-text-mining)) + (list (get-highest-bucket-number-rtx-kg2)))) + +;; a comparison between one-hop query with or without using the bucket setting +(define regulates-EGFR + (time (query:X->Known + #f + '("biolink:regulates") + (curies->synonyms-in-db (list "HGNC:3236"))))) +(length regulates-EGFR) +; 44 + +(define regulates-EGFR-faster + (time (query:X->Known-scored + #f + '("biolink:regulates") + (curies->synonyms-in-db (list "HGNC:3236")) + TOP_BUCKET_NUMBERS))) +(length regulates-EGFR-faster) +; 0 + +;; There is a problem when we use the bucketing approach is that we might +;; receive zero answer from the buckets that has a higer score (amount of supports). +;; Hence, you can either manually decrease the bucket number and redo the +;; query, or you may write a procedure to realize auto growing until reach +;; the amount of answers you want, like the procedure auto-grow shown below. + + +;; The procedure 1-hop-proc takes a list of bucket numbers. +(define (1-hop-proc bucket*) + (query:X->Known-scored + #f + '("biolink:regulates") + (curies->synonyms-in-db (list "HGNC:3236")) + bucket*)) + +;; The procedure auto-grow takes a query procedure (how do you want to +;; query against the mediKanren neo server?), a list of buket numbers to +;; start with, and a number representing the least amount of anwers +;; you expected to receive. It is possible you do not get 'enough' +;; answers, it is because those are all the answers it can return +;; from the KGs. You may try to modify the 1-hop-proc to get more: +;; more input curies, taking the subclasses of it/them, etc. +(define example (time (auto-grow 1-hop-proc TOP_BUCKET_NUMBERS 100))) +(length example) +; 44 + + +;; The implementation of auto-grow is constomized with the need of the +;; mediKanren neo-server. So it starts querying with the given +;; list of bucket numbers and decreases the level of confidency (score of +;; the edge) each round. It does not have to behavior this way. You may +;; write your own procedure to achieve a different goal. + +;; A trick to not query against a specific KG can be achived by manually +;; set the bucket number to be #f, like (#f '(10) '(10)) which means +;; 'I would not like answers from the robokop KG, and please querying +;; text-mining KG and rtx-kg2 KG with the bucket 10 for each.'. As you +;; may have noticed from the definition of TOP_BUCKET_NUMBERS, the bucket list +;; represents the KGs in the order of robokop, text-mining, and rtx-kg2. +(define regulates-EGFR-ramdom + (time (query:X->Known-scored + #f + '("biolink:regulates") + (curies->synonyms-in-db (list "HGNC:3236")) + (list #f '(10) '(10))))) +(length regulates-EGFR-ramdom) +; 0 diff --git a/medikanren2/neo/neo-low-level/query-low-level-multi-db.rkt b/medikanren2/neo/neo-low-level/query-low-level-multi-db.rkt index 6c93f3fb..7acf1f32 100644 --- a/medikanren2/neo/neo-low-level/query-low-level-multi-db.rkt +++ b/medikanren2/neo/neo-low-level/query-low-level-multi-db.rkt @@ -313,12 +313,14 @@ (define (query:X->Y->Known category*.X predicate*.X->Y category*.Y predicate*.Y->K curie*.K) (query:X->Y->Known-helper (and category*.X - (filter curie-in-db? category*.X)) - (filter curie-in-db? predicate*.X->Y) + (curies-in-db category*.X)) + (and predicate*.X->Y + (curies-in-db predicate*.X->Y)) (and category*.Y - (filter curie-in-db? category*.Y)) - (filter curie-in-db? predicate*.Y->K) - (filter curie-in-db? curie*.K) + (curies-in-db category*.Y)) + (and predicate*.Y->K + (curies-in-db predicate*.Y->K)) + (curies-in-db curie*.K) #f)) (define (query:X->Y->Known-scored category*.X predicate*.X->Y category*.Y predicate*.Y->K curie*.K score*) @@ -359,13 +361,15 @@ (define (query:Known->Y->X curie*.K predicate*.K->Y category*.Y predicate*.Y->X category*.X) (query:Known->Y->X-helper - (filter curie-in-db? curie*.K) - (filter curie-in-db? predicate*.K->Y) + (curies-in-db curie*.K) + (and predicate*.K->Y + (curies-in-db predicate*.K->Y)) (and category*.Y - (filter curie-in-db? category*.Y)) - (filter curie-in-db? predicate*.Y->X) + (curies-in-db category*.Y)) + (and predicate*.Y->X + (curies-in-db predicate*.Y->X)) (and category*.X - (filter curie-in-db? category*.X)) + (curies-in-db category*.X)) #f)) (define (query:Known->Y->X-scored curie*.K predicate*.K->Y category*.Y predicate*.Y->X category*.X score*) diff --git a/medikanren2/neo/neo-server/neo-server-utils.rkt b/medikanren2/neo/neo-server/neo-server-utils.rkt index 89bdd578..6a0d6f28 100644 --- a/medikanren2/neo/neo-server/neo-server-utils.rkt +++ b/medikanren2/neo/neo-server/neo-server-utils.rkt @@ -5,7 +5,6 @@ get-publications mvp2-1hop-filter mvp2-2hop-filter - auto-grow merge-list merge-hash minus-one-before-zero @@ -115,24 +114,6 @@ A decreases B increases C = A decreases C (mvp2-filter eprop direction))) q))) - -(define (auto-grow hop-proc score* result_amount) - (let ((half-result (exact-round (/ result_amount 2.0)))) - (let loop ((r '()) (sl score*)) - (cond - [(> (length r) half-result) - (printf "return ~a answers\n" (length r)) - r] - [(andmap not sl) - (printf "return ~a answers\n" (length r)) - r] - [else - #;(printf "number of answers: ~a, take next round\n" (length r)) - (loop (append r (hop-proc sl)) - (list (minus-one-before-zero (list-ref sl 0)) - (minus-one-before-zero (list-ref sl 1)) - (minus-one-before-zero (list-ref sl 2))))])))) - (define find-max-number (lambda (num*) (let loop ((n* (cdr num*)) (greatest (car num*))) diff --git a/medikanren2/neo/neo-server/neo-server.rkt b/medikanren2/neo/neo-server/neo-server.rkt index 5fc4bb6c..bb79c35d 100644 --- a/medikanren2/neo/neo-server/neo-server.rkt +++ b/medikanren2/neo/neo-server/neo-server.rkt @@ -29,7 +29,7 @@ (define DEFAULT_PORT 8384) -(define NEO_SERVER_VERSION "1.46") +(define NEO_SERVER_VERSION "1.47") ;; Maximum number of results to be returned from *each individual* KP, ;; or from mediKanren itself. diff --git a/medikanren2/neo/neo-utils/neo-helpers-multi-db.rkt b/medikanren2/neo/neo-utils/neo-helpers-multi-db.rkt index 7c9427b2..d85e4f2d 100644 --- a/medikanren2/neo/neo-utils/neo-helpers-multi-db.rkt +++ b/medikanren2/neo/neo-utils/neo-helpers-multi-db.rkt @@ -20,6 +20,8 @@ iota pretty-print-json-string take-at-most + ;; + auto-grow ) (require "../neo-low-level/query-low-level-multi-db.rkt" diff --git a/medikanren2/neo/neo-utils/neo-helpers-without-db.rkt b/medikanren2/neo/neo-utils/neo-helpers-without-db.rkt index b9759858..9c7739f3 100644 --- a/medikanren2/neo/neo-utils/neo-helpers-without-db.rkt +++ b/medikanren2/neo/neo-utils/neo-helpers-without-db.rkt @@ -2,7 +2,8 @@ (require racket/set - racket/unsafe/ops) + racket/unsafe/ops + racket/math) (provide maybe-time @@ -15,7 +16,8 @@ unsafe-bytes-split-tab bytes-base10->fxnat ;; - minus-one-before-zero) + minus-one-before-zero + auto-grow) ;; Use the second definition of 'maybe-time' to see the time use for ;; low-level query calls. @@ -142,3 +144,20 @@ (if (eq? (car n*) 0) #f (list (- (car n*) 1)))))) + +(define (auto-grow hop-proc score* result_amount) + (let ((half-result (exact-round (/ result_amount 2.0)))) + (let loop ((r '()) (sl score*)) + (cond + [(> (length r) half-result) + (printf "return ~a answers\n" (length r)) + r] + [(andmap not sl) + (printf "return ~a answers\n" (length r)) + r] + [else + #;(printf "number of answers: ~a, take next round\n" (length r)) + (loop (append r (hop-proc sl)) + (list (minus-one-before-zero (list-ref sl 0)) + (minus-one-before-zero (list-ref sl 1)) + (minus-one-before-zero (list-ref sl 2))))]))))