-
Notifications
You must be signed in to change notification settings - Fork 2
/
kgtk_browser_app.py
4367 lines (3595 loc) · 184 KB
/
kgtk_browser_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Kypher backend support for the KGTK browser.
"""
import multiprocessing
from pathlib import Path
import shutil
import datetime
import hashlib
from http import HTTPStatus
import math
import os
import os.path
import json
import ast
import pandas as pd
import random
import sys
import traceback
from typing import Tuple, Set, List, MutableMapping, Optional, Mapping, Dict
import re
import time
import flask
from dateutil import parser
from dateutil.relativedelta import relativedelta
from operator import itemgetter
import browser.backend.kypher as kybe
import tempfile
from kgtk.kgtkformat import KgtkFormat
from kgtk.value.kgtkvalue import KgtkValue, KgtkValueFields
from kgtk.visualize.visualize_api import KgtkVisualize
from browser.backend.kypher_queries import KypherAPIObject
from venice import peer
import logging
# set the desired log level of the server
log = logging.getLogger('werkzeug')
log.setLevel(50)
# map emotion Qnode ids to labels
emotions_mapping = {
'Q00_anticipation': 'anticipation',
'Q00_love': 'love',
'Q00_joy': 'joy',
'Q00_pessimism': 'pessimism',
'Q00_optimism': 'optimism',
'Q00_sadness': 'sadness',
'Q00_disgust': 'disgust',
'Q00_anger': 'anger',
'Q00_surprise': 'surprise',
'Q00_fear': 'fear',
'Q00_trust': 'trust',
}
# map moral foundation Qnode ids to labels
scores_mapping = {
'Q00_subversion': 'subversion',
'Q00_authority': 'authority',
'Q00_cheating': 'cheating',
'Q00_fairness': 'fairness',
'Q00_harm': 'harm',
'Q00_care': 'care',
'Q00_betrayal': 'betrayal',
'Q00_loyalty': 'loyalty',
'Q00_degradation': 'degradation',
'Q00_sanctity': 'sanctity',
'Q00_concreteness': 'concreteness',
}
# How to run for local-system access:
# > export FLASK_APP=kgtk_browser_app.py
# > export FLASK_ENV=development
# > export KGTK_BROWSER_CONFIG=$PWD/kgtk_browser_config.py
# > flask run
# Example URLs for local server access:
# http://127.0.0.1:5000/kgtk/browser/backend/get_all_node_data?node=Q5
# http://127.0.0.1:5000/kgtk/kb
# http://127.0.0.1:5000/kgtk/kb/Q42
# How to run as a more general server (but please use nginx for
# deployment)::
# > export FLASK_APP=kgtk_browser_app.py
# > export FLASK_ENV=development
# > export KGTK_BROWSER_CONFIG=$PWD/kgtk_browser_config.py
# > flask run --host 0.0.0.0 --port 1234
# Example URL for named server access:
# http://ckg07.isi.edu:1234/kgtk/browser/backend/get_all_node_data?node=Q5
# http://ckg07.isi.edu:1234/kb
# http://ckg07.isi.edu:1234/kb/Q42
# Flask application
STATIC_URL_PATH = '/browser'
if 'KGTK_BROWSER_STATIC_URL' in os.environ:
STATIC_URL_PATH = os.environ['KGTK_BROWSER_STATIC_URL']
# Flask application
app = flask.Flask(__name__,
static_url_path=STATIC_URL_PATH,
static_folder='app/build',
template_folder='web/templates')
if 'KGTK_BROWSER_CONFIG' not in os.environ:
os.environ['KGTK_BROWSER_CONFIG'] = 'browser/backend/kgtk_browser_config.py'
app.config.from_envvar('KGTK_BROWSER_CONFIG')
# Allow urls with trailing slashes
app.url_map.strict_slashes = False
# Do not sort keys when returning Python dictionaries as JSON
app.config['JSON_SORT_KEYS'] = False
DEFAULT_SERVICE_PREFIX = '/kgtk/browser/backend/'
DEFAULT_LANGUAGE = 'en'
ID_SEARCH_THRESHOLD: int = -1 # force the code to not use IN queries for fetching qualifiers
ID_SEARCH_USING_IN: bool = True
DEFAULT_MATCH_ITEM_EXACTLY: bool = True
DEFAULT_MATCH_ITEM_PREFIXES: bool = True
DEFAULT_MATCH_ITEM_PREFIXES_LIMIT: int = 20
DEFAULT_MATCH_ITEM_IGNORE_CASE: bool = True
DEFAULT_MATCH_LABEL_EXACTLY: bool = True
DEFAULT_MATCH_LABEL_PREFIXES: bool = True
DEFAULT_MATCH_LABEL_PREFIXES_LIMIT: int = 20
DEFAULT_MATCH_LABEL_IGNORE_CASE: bool = True
DEFAULT_MATCH_LABEL_TEXT_LIKE: bool = False
DEFAULT_PROPLIST_MAX_LEN: int = 2000
DEFAULT_VALUELIST_MAX_LEN: int = 20
DEFAULT_QUAL_PROPLIST_MAX_LEN: int = 50
DEFAULT_QUAL_VALUELIST_MAX_LEN: int = 20
DEFAULT_QUERY_LIMIT: int = 300000
DEFAULT_QUAL_QUERY_LIMIT: int = 300000
DEFAULT_VERBOSE: bool = False
DEFAULT_KYPHER_OBJECTS_NUM: int = 5
DEFAULT_PROPERTY_VALUES_COUNT_LIMIT: int = 25
DEFAULT_PROPERTY_SKIP_NUM: int = 0
DEFAULT_PROPERTY_LIMIT_NUM: int = 50
# Allow urls with trailing slashes
app.url_map.strict_slashes = False
app.config['SERVICE_PREFIX'] = app.config.get('SERVICE_PREFIX', DEFAULT_SERVICE_PREFIX)
app.config['DEFAULT_LANGUAGE'] = app.config.get('DEFAULT_LANGUAGE', DEFAULT_LANGUAGE)
app.config['MATCH_ITEM_EXACTLY'] = app.config.get('MATCH_ITEM_EXACTLY', DEFAULT_MATCH_ITEM_EXACTLY)
app.config['MATCH_ITEM_PREFIXES'] = app.config.get('MATCH_ITEM_PREFIXES', DEFAULT_MATCH_ITEM_PREFIXES)
app.config['MATCH_ITEM_PREFIXES_LIMIT'] = app.config.get('MATCH_ITEM_PREFIXES_LIMIT',
DEFAULT_MATCH_ITEM_PREFIXES_LIMIT)
app.config['MATCH_ITEM_IGNORE_CASE'] = app.config.get('MATCH_ITEM_IGNORE_CSE', DEFAULT_MATCH_ITEM_IGNORE_CASE)
app.config['MATCH_LABEL_EXACTLY'] = app.config.get('MATCH_LABEL_EXACTLY', DEFAULT_MATCH_LABEL_EXACTLY)
app.config['MATCH_LABEL_PREFIXES'] = app.config.get('MATCH_LABEL_PREFIXES', DEFAULT_MATCH_LABEL_PREFIXES)
app.config['MATCH_LABEL_PREFIXES_LIMIT'] = app.config.get('MATCH_LABEL_PREFIXES_LIMIT',
DEFAULT_MATCH_LABEL_PREFIXES_LIMIT)
app.config['MATCH_LABEL_IGNORE_CASE'] = app.config.get('MATCH_LABEL_IGNORE_CASE', DEFAULT_MATCH_LABEL_IGNORE_CASE)
app.config['MATCH_LABEL_TEXT_LIKE'] = app.config.get('MATCH_LABEL_TEXT_LIKE', DEFAULT_MATCH_LABEL_TEXT_LIKE)
app.config['MATCH_LABEL_IS_CLASS'] = app.config.get('MATCH_LABEL_IS_CLASS')
app.config['MATCH_LABEL_INSTANCE_OF'] = app.config.get('MATCH_LABEL_INSTANCE_OF')
app.config['PROPLIST_MAX_LEN'] = app.config.get('PROPLIST_MAX_LEN', DEFAULT_PROPLIST_MAX_LEN)
app.config['VALUELIST_MAX_LEN'] = app.config.get('VALUELIST_MAX_LEN', DEFAULT_VALUELIST_MAX_LEN)
app.config['QUAL_PROPLIST_MAX_LEN'] = app.config.get('QUAL_PROPLIST_MAX_LEN', DEFAULT_QUAL_PROPLIST_MAX_LEN)
app.config['QUAL_VALUELIST_MAX_LEN'] = app.config.get('QUAL_VALUELIST_MAX_LEN', DEFAULT_QUAL_VALUELIST_MAX_LEN)
app.config['QUERY_LIMIT'] = app.config.get('QUERY_LIMIT', DEFAULT_QUERY_LIMIT)
app.config['QUAL_QUERY_LIMIT'] = app.config.get('QUAL_QUERY_LIMIT', DEFAULT_QUAL_QUERY_LIMIT)
app.config['VERBOSE'] = app.config.get('VERBOSE', DEFAULT_VERBOSE)
app.config['KYPHER_OBJECTS_NUM'] = app.config.get('KYPHER_OBJECTS_NUM', DEFAULT_KYPHER_OBJECTS_NUM)
app.config['PROPERTY_VALUES_COUNT_LIMIT'] = app.config.get('PROPERTY_VALUES_COUNT_LIMIT',
DEFAULT_PROPERTY_VALUES_COUNT_LIMIT)
app.config['PROPERTY_SKIP_NUM'] = DEFAULT_PROPERTY_SKIP_NUM
app.config['PROPERTY_LIMIT_NUM'] = DEFAULT_PROPERTY_LIMIT_NUM
kgtk_backends = {}
logging.info('loading kgtk api..')
for i in range(app.config['KYPHER_OBJECTS_NUM']):
k_api = KypherAPIObject()
_api = kybe.BrowserBackend(api=k_api)
_api.set_app_config(app)
kgtk_backends[i] = _api
item_regex = re.compile(f"^[q|Q|p|P]\d+$")
def get_backend():
epoch = int(time.time())
key = epoch % 5
return kgtk_backends[key]
sync_properties_sort_metadata = app.config['SYNC_PROPERTIES_SORT_METADATA']
ajax_properties_sort_metadata = app.config['AJAX_PROPERTIES_SORT_METADATA']
profiled_property_metadata = app.config['PROFILED_PROPERTY_METADATA']
WIKIDATA_URL_LABEL = app.config['KG_WIKIPEDIA_URL_LABEL']
wikidata_languages = app.config['WIKIDATA_LANGUAGES']
url_formatter_templates = app.config['KGTK_URL_FORMATTER_TEMPLATES']
# List the properties in the order that you want them to appear. All unlisted
# properties will appear after these.
rb_property_priority_list: List[str] = [
"P31", # instance of
"P279", # subclass of
"P21", # sex or gender
"P2561*", # name
"P138", # named after
"P580*", # start time
"P582*", # end time
"P509", # cause of death
"P1196", # manner of death
"P20", # place of death
"P1038*", # relative
"P3342*", # significant person
]
rb_qualifier_priority_list: List[str] = [
"P585", # point in time
"P580", # start time
"P582", # end time
]
rb_qualifier_priority_map: Mapping[str, int] = {val: idx for idx, val in enumerate(rb_qualifier_priority_list)}
item_regex = re.compile(r"^[q|Q|p|P]\d+$")
wikipedia_url_regex = re.compile(r'https:\/\/(.*)\.wikipedia\.org\/wiki\/(.*)')
k_api = KypherAPIObject()
backend = kybe.BrowserBackend(api=k_api)
backend.set_app_config(app)
@app.route('/kb/info', methods=['GET'])
def get_info():
"""
Returns project configuration information
"""
info = {
'graph_id': app.config.get('GRAPH_ID'),
'graph_cache': app.config.get('GRAPH_CACHE'),
'version': app.config.get('VERSION'),
'hasClassGraphVisualization': False,
'hasIdentifiers': True,
'hasGallery': False,
}
return flask.jsonify(info), 200
@app.route('/browser', methods=['GET'])
@app.route('/browser/<string:node>', methods=['GET'])
def rb_get_kb(node=None):
"""This is the basic entrypoint for starting the KGTK browser.
It sends the initial HTML file, "kb.html".
"""
return flask.send_from_directory('app/build', 'index.html')
@app.route('/kb/get_class_graph_data/<string:qnode>', methods=['GET'])
def get_class_graph_data(qnode=None):
"""
Get the data for your class graph visualization here!
This endpoint takes in a qnode id to look up the class
And returns a json object representing a graph, like so:
{
"nodes": [{
"id": <str: qnode>,
"label": <str: label>,
"showLabel": <bool: show label?>,
"tooltip": <str: description>,
"color": <int: color>,
"size": <float: value>
}, {
...
}],
"links": [{
"source": <str: source qnode>,
"target": <str: target qnode>,
"label": <str: edge label>,
"color": <int: color>,
"width_orig": <int: width>
}, {
...
}]
}
"""
args = flask.request.args
refresh: bool = args.get("refresh", type=rb_is_true,
default=False)
temp_dir = tempfile.mkdtemp()
class_viz_dir = app.config['CLASS_VIZ_DIR']
if not Path(class_viz_dir).exists():
Path(class_viz_dir).mkdir(parents=True, exist_ok=True)
edge_file_name = f"{temp_dir}/{qnode}.edge.tsv"
node_file_name = f"{temp_dir}/{qnode}.node.tsv"
html_file_name = f"{temp_dir}/{qnode}.html"
output_file_name = f"{class_viz_dir}/{qnode}.graph.json"
empty_output_file_name = f"{class_viz_dir}/{qnode}.graph.empty.json"
if Path(output_file_name).exists():
return flask.jsonify(json.load(open(output_file_name)))
if Path(empty_output_file_name).exists():
return flask.jsonify(json.load(open(empty_output_file_name)))
try:
edge_results = backend.get_classviz_edge_results(qnode).to_records_dict()
if len(edge_results) == 0:
open(empty_output_file_name, 'w').write(json.dumps({}))
return flask.jsonify({}), 200
node_results = backend.get_classviz_node_results(qnode).to_records_dict()
if len(node_results) == 0:
open(empty_output_file_name, 'w').write(json.dumps({}))
return flask.jsonify({}), 200
for edge_result in edge_results:
if edge_result['edge_type'] == 'subclass':
edge_result['color'] = app.config['RED_EDGE_HEX']
elif edge_result['edge_type'] == 'superclass':
edge_result['color'] = app.config['BLUE_EDGE_HEX']
for node_result in node_results:
if node_result['node_type'] == 'few_subclasses':
node_result['color'] = app.config['BLUE_NODE_HEX']
elif node_result['node_type'] == 'many_subclasses':
node_result['color'] = app.config['ORANGE_NODE_HEX']
edge_df = pd.DataFrame(edge_results)
node_df = pd.DataFrame(node_results)
edge_df.to_csv(edge_file_name, sep='\t', index=False)
node_df.to_csv(node_file_name, sep='\t', index=False)
kv = KgtkVisualize(input_file=edge_file_name,
output_file=html_file_name,
node_file=node_file_name,
direction='arrow',
edge_color_column='color',
edge_color_hex=True,
node_color_column='color',
node_color_hex=True,
node_size_column='instance_count',
node_size_default=5.0,
node_size_minimum=2.0,
node_size_maximum=8.0,
node_size_scale='log',
tooltip_column='tooltip',
show_text='above',
node_file_id='node1')
visualization_graph = kv.compute_visualization_graph()
# check nodes for incoming edges and set showLabel prop
# count all incoming edges and save that number as a node property
for node in visualization_graph['nodes']:
incoming_edges = [
link
for link
in visualization_graph['links']
if link['target'] == node['id']
]
node['incoming_edges'] = len(incoming_edges)
# check nodes for incoming edges and set showLabel prop
for node in visualization_graph['nodes']:
# always show the label for the original node
if node['id'] == qnode:
node['showLabel'] = True
continue
# show the label if the node has any incoming edges
if node['incoming_edges']:
node['showLabel'] = True
continue
# show the label if the node has no incoming edges
if not node['incoming_edges']:
node['showLabel'] = True
# gather all neighboring nodes
neighboring_nodes = []
for link in visualization_graph['links']:
if link['source'] == node['id']:
for other_node in visualization_graph['nodes']:
if other_node['id'] == link['target']:
neighboring_nodes.append(other_node)
# show the label if there are more than 1 neighbors
if len(neighboring_nodes) > 1:
node['showLabel'] = True
else:
# don't show the label when there's only one neighbor and
# that neighbor has a cluster with more than 5 incoming edges
for neighbor_node in neighboring_nodes:
if neighbor_node['incoming_edges'] >= 5:
node['showLabel'] = False
# write visualization graph to the output file
open(output_file_name, 'w').write(json.dumps(visualization_graph))
shutil.rmtree(temp_dir)
return flask.jsonify(visualization_graph), 200
except Exception as e:
print('ERROR: ' + str(e))
flask.abort(HTTPStatus.INTERNAL_SERVER_ERROR.value)
def rb_is_true(value: str) -> bool:
"""String to bool conversion function for use with args.get(...).
"""
return value.lower() == "true"
def rb_sort_query_results(results: List[List[str]]) -> List[List[str]]:
"""If the database holds a large number of candidate matches and we want to
limit the number of returned matches, there may be a performance problem
because the database will first collect all candidate metches, then sort,
then limit.
Instead, we ask the database for unordered results. We'll sort them ourselves.
Since we're sorting the results ourselves, let's assume that item names
take the form "[PQ]\d+". We'd like to sort Q42 before Q102. This also
has the beneficial side-effect of putting most-wanted search results
first, assuming that most-wanted results have a lower Q or P number.
Note: We assume that each item name appears at most once in the results.
TODO: Generalize this to allow any alpha+ digit+ sequence, and fallback to
an alpha sort when the pattern fails.
TODO: Add a parameter to the query that controls whether or not the
results are sorted in this fancy way.
"""
# Optimize the common cases of empty or singleton results:
if len(results) <= 1:
return results
result_map: MutableMapping[str, List[str]] = dict()
# Determine the maximum number of digits per item name in the results:
maxdigits: int = 0
result: List[str]
for result in results:
digits: str = result[0][1:]
if len(digits) > maxdigits:
maxdigits = len(digits)
# Build a map from the zero-filled item name to each result pair:
for result in results:
item: str = result[0]
label: str = result[1]
key: str = item[0] + item[1:].zfill(maxdigits)
result_map[key] = result
# Sort and return the results.
sorted_results: List[List[str]] = list()
key: str
for key in sorted(result_map.keys()):
sorted_results.append(result_map[key])
return sorted_results
@app.route('/kb/query', methods=['GET'])
def rb_get_kb_query():
"""This API is used to generate lists of items (Qnodes od Pnodes) that
match a query string. Depending upon the parameter settings, the search
string may make an exact and/or prefix match against an item name
(P#### or Q####) or an item label (e.g., "Douglas Adams").
Parameter Usage
========= ==================================================================================
q this is the search string, e.g. "Q42" or "Douglas Adams"
verbose This debugging parameter controls debugging output on the server. The default is False.
lang This controls the language code of matching labels. The default is "en",
match_item_exactly This controls whether or not to perform an exact-length item match.
Item names are assumed to be stored in upper-case in the database.
The default is True.
Example: http://kgtk.isi.edu/kb/query/q=Q42&match_item_exactly=True
match_item_prefixes This controls whether or not to return item prefix matches.
Item names are assumed to be stored in upper-case in the database.
Prefix matching is slower than exact matching.
The default is True.
match_item_prefixes_limit Limit the number of item prefix match results that will
be presented.
match_item_ignore_case When true, ignore case when matching labels. This applies
to both exact-length item searches and item prefix searches.
The default is True.
match_label_exactly This controls whether or not to perform an exact-length label match.
Labels are assumed to be stored in mixed case in the database. The
"match_label_ignore_case" parameter(see below) determines whether
the match is case sensitive or case insensitive.
The default is True.
Example: kttp:/kgtk.isi.edu//kb/query/q=Douglas Adams&match_label_exactly=True
match_label_prefixes This controls whether or not to return label prefix matches.
Prefix matching is slower than exact matching.
Labels are assumed to be stored in mixed case in the database. The
"match_label_ignore_case" parameter(see below) determines whether
the match is case sensitive or case insensitive.
The default is True.
match_label_prefixes_limit Limit the number of label prefix match results that will
be presented.
match_label_ignore_case When true, ignore case when matching labels. This applies
to both exact-length label searches and label prefix searches.
The default is True.
The result returned is:
[
{
"ref: "QNODE",
"text"; "QNODE",
"description": "LABEL"
} ,
...
]
where QNODE is the Q### or P### item identifier and LABEL is the
label value corresponding to that identifier.
"ref": "QNODE" This provides the identifier used to retrieve the
full details of an item using:
http://hostname/kb/item?q=QNODE
"text": "QNODE" This provides the identifier that is displayed to
the user.
"description": "LABEL" This provides the descriptive text for the item.
The KGTK browser server currently sends the items's
label as a description. This response should be short
as it will probably be used to generate a pop-up/pull-down menu.
"""
args = flask.request.args
q = args.get('q')
verbose: bool = args.get("verbose", default=app.config['VERBOSE'], type=rb_is_true)
if verbose:
print("rb_get_kb_query: " + q)
lang: str = args.get("lang", app.config['DEFAULT_LANGUAGE'])
match_item_exactly: bool = args.get("match_item_exactly", type=rb_is_true,
default=app.config['MATCH_ITEM_EXACTLY'])
match_label_exactly: bool = args.get("match_label_exactly", type=rb_is_true,
default=app.config['MATCH_LABEL_EXACTLY'])
match_label_prefixes: bool = args.get("match_label_prefixes", type=rb_is_true,
default=app.config['MATCH_LABEL_PREFIXES'])
match_label_prefixes_limit: int = args.get("match_label_prefixes_limit", type=int,
default=int(
os.environ.get("KGTK_BROWSER_MATCH_LABEL_PREFIXES_LIMIT", "20")))
match_label_ignore_case: bool = args.get("match_label_ignore_case", type=rb_is_true,
default=app.config['MATCH_LABEL_IGNORE_CASE'])
match_label_text_like: bool = args.get("match_label_text_like", type=rb_is_true,
default=app.config["MATCH_LABEL_TEXT_LIKE"])
is_class: bool = args.get("is_class", type=rb_is_true, default=app.config['MATCH_LABEL_IS_CLASS'])
instance_of: str = args.get("instance_of", type=str, default=app.config['MATCH_LABEL_INSTANCE_OF'])
try:
response_data = p.apply(query_helper, args=(q,
lang,
match_item_exactly,
match_label_exactly,
match_label_ignore_case,
match_label_prefixes,
match_label_prefixes_limit,
match_label_text_like,
is_class,
instance_of,
verbose,))
return flask.jsonify(response_data), 200
except Exception as e:
print('ERROR: ' + str(e))
flask.abort(HTTPStatus.INTERNAL_SERVER_ERROR.value)
def query_helper(q: str,
lang: str,
match_item_exactly: bool,
match_label_exactly: bool,
match_label_ignore_case: bool,
match_label_prefixes: bool,
match_label_prefixes_limit: int,
match_label_text_like: bool,
is_class: bool,
instance_of: str,
verbose: bool):
matches = []
# We keep track of the matches we've seen and produce only one match per node.
items_seen: Set[str] = set()
# We will look for matches in the following order. Each
# match category may be disabled by a parameter.
#
# 1) exact length match on the node name
# 2) exact length match on the label
# 3) prefix match (startswith) on the node name
# 4) prefix match on the label
#
# node name matches are always case-insensitive, because we know that
# node names in the database are upper-case, and we raise the case
# of the q parameter in the search routine.
#
# Label matches may be case-sensitive or case-insensitive,
# according to "match_label_ignore_case".
if re.match(item_regex, q) and match_item_exactly:
# We don't explicitly limit the number of results from this
# query. Should we? The underlying code imposes a default
# limit, currently 1000.
if verbose:
print("Searching for item %s" % repr(q), file=sys.stderr, flush=True)
# Look for an exact match for the node name:
results = backend.rb_get_node_labels(q, is_class=is_class, instance_of=instance_of)
if verbose:
print("Got %d matches" % len(results), file=sys.stderr, flush=True)
for result in results:
item = result[0]
if item in items_seen:
continue
items_seen.add(item)
label = KgtkFormat.unstringify(result[1])
description = KgtkFormat.unstringify(result[2]) if result[2].strip() != "" else ""
matches.append(
{
"ref": item,
"text": item,
"description": label,
"ref_description": description
}
)
query_text_like = True
if match_label_prefixes and len(q) >= 3:
# Query the labels, looking for a prefix match. The search may
# be case-sensitive or case-insensitive, according to
# "match_label_ignore_case".
#
# Labels are assumed to be encoded as language-qualified
# strings in the database. We want to do a prefix match, so
# we stringify to a plain string, replace the leading '"' with
# "'", and remove the trailing '"'
#
if verbose:
print("Searching for label prefix, textmatch %s (ignore_case=%s)" % (
repr(q), repr(match_label_ignore_case)), file=sys.stderr, flush=True)
results = backend.search_labels(q,
lang=lang,
limit=match_label_prefixes_limit,
is_class=is_class,
instance_of=instance_of)
if verbose:
print("Got %d matches" % len(results), file=sys.stderr, flush=True)
if len(results) > 0:
query_text_like = False
for result in results:
item = result[0]
if item in items_seen:
continue
items_seen.add(item)
label = KgtkFormat.unstringify(result[1])
description = KgtkFormat.unstringify(result[4]) if result[4].strip() != "" else ""
matches.append(
{
"ref": item,
"text": item,
"description": label,
"ref_description": description
}
)
if match_label_text_like and query_text_like and len(q) >= 3:
# Query the labels, using the %like% match in sqlite FTS5.
# split the input string at space and insert % between every token
search_label = f"%{'%'.join(q.split(' '))}%"
if verbose:
print("Searching for label, textlike %s " % (repr(q)), file=sys.stderr, flush=True)
results = backend.search_labels_textlike(search_label,
lang=lang,
limit=match_label_prefixes_limit,
is_class=is_class,
instance_of=instance_of)
if verbose:
print("Got %d matches" % len(results), file=sys.stderr, flush=True)
for result in results:
item = result[0]
if item in items_seen:
continue
items_seen.add(item)
label = KgtkFormat.unstringify(result[1])
description = KgtkFormat.unstringify(result[4]) if result[4].strip() != "" else ""
matches.append(
{
"ref": item,
"text": item,
"description": label,
"ref_description": description
}
)
if match_label_exactly:
# Query the labels, looking for an exact length match. The
# search may be case-sensitive or case-insensitive, according
# to "match_label_ignore_case".
#
# We don't explicitly limit the number of results from this
# query. Should we? The underlying code imposes a default
# limit, currently 1000.
# The simple approach, using stringify, will not work when
# "lang" is "any"! We will have to do a prefix match
# including the initial and final "'" delimiters, but
# excluding the "@lang" suffix.
# We will use kgtk_lqstring_text() function to get the text part of the language qualified string,
# and kgtk_lqstring_lang() to get the language.
if verbose:
print("Searching for label, exact match %s (ignore_case=%s)" %
(repr(q), repr(match_label_ignore_case)),
file=sys.stderr, flush=True)
results = backend.search_labels_exactly(q,
lang=lang,
limit=match_label_prefixes_limit,
is_class=is_class,
instance_of=instance_of)
if verbose:
print("Got %d matches" % len(results), file=sys.stderr, flush=True)
for result in results:
item = result[0]
if item in items_seen:
continue
items_seen.add(item)
label = KgtkFormat.unstringify(result[1])
description = KgtkFormat.unstringify(result[4]) if result[4].strip() != "" else ""
matches.append(
{
"ref": item,
"text": item,
"description": label,
"ref_description": description
}
)
if verbose:
print("Got %d matches total" % len(matches), file=sys.stderr, flush=True)
# Build the final response:
response_data = {
"matches": matches
}
return response_data
def rb_link_to_url(text_value, current_value, lang: str = "en", prop: Optional[str] = None) -> bool:
if text_value is None:
return False
# Look for text strings that are URLs:
if text_value.startswith(("https://", "http://")):
# print("url spotted: %s" % repr(text_value)) # ***
current_value["url"] = text_value
return True
elif text_value.endswith((".jpg", ".svg")):
image_url: str = "https://commons.wikimedia.org/wiki/File:" + text_value
# print("image spotted: %s" % repr(image_url)) # ***
current_value["url"] = image_url
return True
return False
def rb_unstringify(item: str, default: str = "") -> str:
return KgtkFormat.unstringify(item) if item is not None and len(item) > 0 else default
rb_image_formatter_cache: MutableMapping[str, Optional[str]] = dict()
def get_image_formatter(backend, relationship: str) -> Optional[str]:
# if relationship not in rb_image_formatter_cache:
# result: List[List[str]] = backend.rb_get_image_formatter(relationship)
# if len(result) == 0:
# rb_image_formatter_cache[relationship] = None
# else:
# rb_image_formatter_cache[relationship] = rb_unstringify(result[0][0])
#
# return rb_image_formatter_cache[relationship]
url_formatter = url_formatter_templates.get(relationship, None)
if url_formatter is not None:
url_formatter = rb_unstringify(url_formatter[0])
return url_formatter
rb_units_node_cache: MutableMapping[str, Optional[str]] = dict()
def rb_format_number_or_quantity(
backend,
target_node: str,
value: KgtkValue,
datatype: str,
lang: str,
) -> Tuple[str, str, str]:
number_value: str
number_units: Optional[str] = None
number_ref: Optional[str] = None
if datatype == KgtkFormat.DataType.NUMBER:
numberstr: str = target_node
if numberstr.startswith("+"): # Remove any leading "+"
number_value = numberstr[1:]
else:
number_value = numberstr
else:
if value.do_parse_fields():
newnum: str = value.fields.numberstr
if newnum.startswith("+"): # Remove any leading "+"
newnum = newnum[1:]
if value.fields.low_tolerancestr is not None or value.fields.high_tolerancestr is not None:
newnum += "["
if value.fields.low_tolerancestr is not None:
lowtolstr: str = value.fields.low_tolerancestr
if lowtolstr.startswith("+"):
lowtolstr = lowtolstr[1:]
newnum += lowtolstr
newnum += ","
if value.fields.high_tolerancestr is not None:
hitolstr: str = value.fields.high_tolerancestr
if hitolstr.startswith("+"):
hitolstr = hitolstr[1:]
newnum += hitolstr
newnum += "]"
if value.fields.si_units is not None:
# TODO: supply a node reference for each SI unit.
number_units = value.fields.si_units
elif value.fields.units_node is not None:
# Here's where it gets fancy:
units_node: str = value.fields.units_node
if units_node not in rb_units_node_cache:
units_node_labels: List[List[str]] = backend.get_node_labels(units_node, lang=lang)
if len(units_node_labels) > 0:
units_node_label: str = units_node_labels[0][1]
rb_units_node_cache[units_node] = rb_unstringify(units_node_label)
else:
rb_units_node_cache[units_node] = None # Remember the failure.
if rb_units_node_cache[units_node] is not None:
number_units = rb_units_node_cache[units_node]
else:
number_units = units_node # We could not find a label for this node when we looked last time.
number_ref = units_node
number_value = newnum
else:
# Validation failed.
#
# TODO: Add a validation failure indicator?
number_value = target_node
return number_value, number_units, number_ref
def rb_iso_format_time(
target_node: str,
value: KgtkValue,
) -> str:
if value.do_parse_fields() and value.fields.precision is not None:
f: KgtkValueFields = value.fields
precision: int = f.precision
if precision <= 9 and f.yearstr is not None:
return f.yearstr
elif precision == 10 and f.yearstr is not None and f.monthstr is not None:
return f.yearstr + "-" + f.monthstr
elif precision == 11 and f.yearstr is not None and f.monthstr is not None and f.daystr is not None:
return f.yearstr + "-" + f.monthstr + "-" + f.daystr
elif precision in (12, 13, 14) \
and f.yearstr is not None \
and f.monthstr is not None \
and f.daystr is not None \
and f.hourstr is not None and f.minutesstr is not None:
return f.yearstr + "-" + f.monthstr + "-" + f.daystr + " " + f.hourstr + ":" + f.minutesstr
else:
return target_node[1:]
else:
# Validation failed.
#
# TODO: Add a validation failure indicator?
return target_node[1:]
def rb_human_format_time(
target_node: str,
value: KgtkValue,
) -> str:
if value.do_parse_fields() and value.fields.precision is not None:
f: KgtkValueFields = value.fields
d: datetime = datetime.datetime(f.year, f.month, f.day, f.hour, f.minutes, f.seconds)
precision: int = f.precision
if precision <= 9 and f.yearstr is not None:
return f.yearstr
elif precision == 10:
return d.strftime("%B %Y")
elif precision == 11:
return d.strftime("%B %d, %Y")
elif precision == 12:
return d.strftime("%I %p, %B %d, %Y")
elif precision == 13:
return d.strftime("%I:%M %p, %B %d, %Y")
else:
return d.strftime("%I:%M:%S %p, %B %d, %Y")
else:
# Validation failed.
#
# TODO: Add a validation failure indicator?
return target_node[1:]
def rb_format_time(
target_node: str,
value: KgtkValue,
use_iso_format: bool = False,
) -> str:
if use_iso_format:
return rb_iso_format_time(target_node, value)
else:
return rb_human_format_time(target_node, value)
def rb_dd_to_dms(degs: float) -> Tuple[bool, int, int, float]:
# Taken from:
# https://stackoverflow.com/questions/2579535/convert-dd-decimal-degrees-to-dms-degrees-minutes-seconds-in-python
neg: bool = degs < 0
if neg:
degs = - degs
d_int: int
degs, d_int = math.modf(degs)
m_int: int
mins, m_int = math.modf(60 * degs)
secs: float = 60 * mins
return neg, d_int, m_int, secs
def rm_format_dms(degs: float,
is_lat: bool) -> str:
neg: bool
d_int: int
m_int: int
secs: float
neg, d_int, m_int, secs = rb_dd_to_dms(degs)
degree_sign = u"\N{DEGREE SIGN}"
if is_lat:
letter: str = "W" if neg else "E"
return "%3d%s%2d\"%2.5f'%s" % (d_int, degree_sign, m_int, secs, letter)
else:
letter: str = "S" if neg else "N"
return "%2d%s%2d\"%2.5f'%s" % (d_int, degree_sign, m_int, secs, letter)
def rb_format_geo(latlon: str,
use_decimal_format: bool = False,
) -> str: