From ca4698bd866edd8a82a2bbc0a421cb1df77ed782 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Thu, 10 Oct 2024 23:29:32 +0200 Subject: [PATCH] #212 #238 Hindi/Urdu under Hindustani and combining queries --- .../Hindi/Adjectives/query_adjectives.sparql | 56 ------ .../Hindi/emoji_keywords/___init__.py | 1 - .../Hindi/adjectives/query_adjectives.sparql | 162 ++++++++++++++++++ .../Hindi/adverbs/query_adverbs.sparql | 3 +- .../Hindi/emoji_keywords/___init__.py | 0 .../emoji_keywords/generate_emoji_keywords.py | 4 +- .../Hindi/nouns/query_nouns.sparql | 3 +- .../postpositions}/query_postpositions.sparql | 3 +- .../Hindi/verbs/query_verbs.sparql | 3 +- .../Urdu/adjectives/query_adjectives.sparql | 30 +++- .../Urdu/adverbs/query_adverbs.sparql | 5 +- .../Urdu/nouns/query_nouns.sparql | 3 +- .../postpositions/query_postpositions.sparql | 17 ++ .../Urdu/verbs/query_verbs.sparql | 3 +- .../Punjabi/nouns/query_nouns.sparql | 3 +- 15 files changed, 223 insertions(+), 73 deletions(-) delete mode 100644 src/scribe_data/language_data_extraction/Hindi/Adjectives/query_adjectives.sparql delete mode 100644 src/scribe_data/language_data_extraction/Hindi/emoji_keywords/___init__.py create mode 100644 src/scribe_data/language_data_extraction/Hindustani/Hindi/adjectives/query_adjectives.sparql rename src/scribe_data/language_data_extraction/{ => Hindustani}/Hindi/adverbs/query_adverbs.sparql (84%) create mode 100644 src/scribe_data/language_data_extraction/Hindustani/Hindi/emoji_keywords/___init__.py rename src/scribe_data/language_data_extraction/{ => Hindustani}/Hindi/emoji_keywords/generate_emoji_keywords.py (89%) rename src/scribe_data/language_data_extraction/{ => Hindustani}/Hindi/nouns/query_nouns.sparql (93%) rename src/scribe_data/language_data_extraction/{Hindi/Postpositions => Hindustani/Hindi/postpositions}/query_postpositions.sparql (85%) rename src/scribe_data/language_data_extraction/{ => Hindustani}/Hindi/verbs/query_verbs.sparql (97%) rename src/scribe_data/language_data_extraction/{ => Hindustani}/Urdu/adjectives/query_adjectives.sparql (84%) rename src/scribe_data/language_data_extraction/{ => Hindustani}/Urdu/adverbs/query_adverbs.sparql (70%) rename src/scribe_data/language_data_extraction/{ => Hindustani}/Urdu/nouns/query_nouns.sparql (93%) create mode 100644 src/scribe_data/language_data_extraction/Hindustani/Urdu/postpositions/query_postpositions.sparql rename src/scribe_data/language_data_extraction/{ => Hindustani}/Urdu/verbs/query_verbs.sparql (95%) diff --git a/src/scribe_data/language_data_extraction/Hindi/Adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Hindi/Adjectives/query_adjectives.sparql deleted file mode 100644 index 8870fde0..00000000 --- a/src/scribe_data/language_data_extraction/Hindi/Adjectives/query_adjectives.sparql +++ /dev/null @@ -1,56 +0,0 @@ -# tool: scribe-data -# All Hindi (from Hindustani Q11051) adjectives with the included grammatical forms. -# Each form is filtered to include only those where the language is set to "hi" (Hindi). -# Tested this query at https://query.wikidata.org/. - -SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) - ?adjective - ?directCase - ?singulativeNumeral - ?collectiveNumeral - ?obliqueCase - -WHERE { - # Adjective Lexeme - ?lexeme dct:language wd:Q11051 ; - wikibase:lexicalCategory wd:Q34698 ; - wikibase:lemma ?adjective . - FILTER(lang(?adjective) = "hi") - - # MARK: Direct Case - - OPTIONAL { - ?lexeme ontolex:lexicalForm ?directCaseForm . - ?directCaseForm ontolex:representation ?directCase ; - wikibase:grammaticalFeature wd:Q1751855 . - FILTER(LANG(?directCase) = "hi") - } - - # MARK: Singulative Numeral - - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singulativeNumeralForm . - ?singulativeNumeralForm ontolex:representation ?singulativeNumeral ; - wikibase:grammaticalFeature wd:Q110786 . - FILTER(LANG(?singulativeNumeral) = "hi") - } - - # MARK: Collective Numeral - - OPTIONAL { - ?lexeme ontolex:lexicalForm ?collectiveNumeralForm . - ?collectiveNumeralForm ontolex:representation ?collectiveNumeral ; - wikibase:grammaticalFeature wd:Q146786 . - FILTER(LANG(?collectiveNumeral) = "hi") - } - - # MARK: Oblique Case - - OPTIONAL { - ?lexeme ontolex:lexicalForm ?obliqueCaseForm . - ?obliqueCaseForm ontolex:representation ?obliqueCase ; - wikibase:grammaticalFeature wd:Q1233197 . - FILTER(LANG(?obliqueCase) = "hi") - } -} diff --git a/src/scribe_data/language_data_extraction/Hindi/emoji_keywords/___init__.py b/src/scribe_data/language_data_extraction/Hindi/emoji_keywords/___init__.py deleted file mode 100644 index 8b137891..00000000 --- a/src/scribe_data/language_data_extraction/Hindi/emoji_keywords/___init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/scribe_data/language_data_extraction/Hindustani/Hindi/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/adjectives/query_adjectives.sparql new file mode 100644 index 00000000..166de38d --- /dev/null +++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/adjectives/query_adjectives.sparql @@ -0,0 +1,162 @@ +# tool: scribe-data +# All Hindi (from Hindustani Q11051) adjectives with the included grammatical forms. +# Enter this query at https://query.wikidata.org/. + +# Note: We need to filter for "hi" to remove Urdu (ur) words. + +SELECT DISTINCT + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?adjective + ?singulativeNumeral + ?collectiveNumeral + ?femSingularDirect + ?masSingularDirect + ?femPluralDirect + ?masPluralDirect + ?femSingularOblique + ?masSingularOblique + ?femPluralOblique + ?masPluralOblique + ?femSingularVocative + ?masSingularVocative + ?femPluralVocative + ?masPluralVocative + +WHERE { + ?lexeme dct:language wd:Q11051 ; + wikibase:lexicalCategory wd:Q34698 ; + wikibase:lemma ?adjective . + FILTER(lang(?adjective) = "hi") + + # MARK: Singulative Numeral + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?singulativeNumeralForm . + ?singulativeNumeralForm ontolex:representation ?singulativeNumeral ; + wikibase:grammaticalFeature wd:Q110786 . + FILTER(LANG(?singulativeNumeral) = "hi") + } + + # MARK: Collective Numeral + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?collectiveNumeralForm . + ?collectiveNumeralForm ontolex:representation ?collectiveNumeral ; + wikibase:grammaticalFeature wd:Q146786 . + FILTER(LANG(?collectiveNumeral) = "hi") + } + + # MARK: Direct + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?femSingularDirectForm . + ?femSingularDirectForm ontolex:representation ?femSingularDirect ; + wikibase:grammaticalFeature wd:Q1775415 ; + wikibase:grammaticalFeature wd:Q110786 ; + wikibase:grammaticalFeature wd:Q1751855 ; + FILTER(LANG(?femSingularDirect) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?masSingularDirectForm . + ?masSingularDirectForm ontolex:representation ?masSingularDirect ; + wikibase:grammaticalFeature wd:Q499327 ; + wikibase:grammaticalFeature wd:Q110786 ; + wikibase:grammaticalFeature wd:Q1751855 ; + FILTER(LANG(?masSingularDirect) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?femPluralDirectForm . + ?femPluralDirectForm ontolex:representation ?femPluralDirect ; + wikibase:grammaticalFeature wd:Q1775415 ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q1751855 ; + FILTER(LANG(?femPluralDirect) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?masPluralDirectForm . + ?masPluralDirectForm ontolex:representation ?masPluralDirect ; + wikibase:grammaticalFeature wd:Q499327 ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q1751855 ; + FILTER(LANG(?masPluralDirect) = "hi") + } . + + # MARK: Oblique + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?femSingularObliqueForm . + ?femSingularObliqueForm ontolex:representation ?femSingularOblique ; + wikibase:grammaticalFeature wd:Q1775415 ; + wikibase:grammaticalFeature wd:Q110786 ; + wikibase:grammaticalFeature wd:Q1233197 ; + FILTER(LANG(?femSingularOblique) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?masSingularObliqueForm . + ?masSingularObliqueForm ontolex:representation ?masSingularOblique ; + wikibase:grammaticalFeature wd:Q499327 ; + wikibase:grammaticalFeature wd:Q110786 ; + wikibase:grammaticalFeature wd:Q1233197 ; + FILTER(LANG(?masSingularOblique) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?femPluralObliqueForm . + ?femPluralObliqueForm ontolex:representation ?femPluralOblique ; + wikibase:grammaticalFeature wd:Q1775415 ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q1233197 ; + FILTER(LANG(?femPluralOblique) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?masPluralObliqueForm . + ?masPluralObliqueForm ontolex:representation ?masPluralOblique ; + wikibase:grammaticalFeature wd:Q499327 ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q1233197 ; + FILTER(LANG(?masPluralOblique) = "hi") + } . + + # MARK: Vocative + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?femSingularVocativeForm . + ?femSingularVocativeForm ontolex:representation ?femSingularVocative ; + wikibase:grammaticalFeature wd:Q1775415 ; + wikibase:grammaticalFeature wd:Q110786 ; + wikibase:grammaticalFeature wd:Q185077 ; + FILTER(LANG(?femSingularVocative) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?masSingularVocativeForm . + ?masSingularVocativeForm ontolex:representation ?masSingularVocative ; + wikibase:grammaticalFeature wd:Q499327 ; + wikibase:grammaticalFeature wd:Q110786 ; + wikibase:grammaticalFeature wd:Q185077 ; + FILTER(LANG(?masSingularVocative) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?femPluralVocativeForm . + ?femPluralVocativeForm ontolex:representation ?femPluralVocative ; + wikibase:grammaticalFeature wd:Q1775415 ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q185077 ; + FILTER(LANG(?femPluralVocative) = "hi") + } . + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?masPluralVocativeForm . + ?masPluralVocativeForm ontolex:representation ?masPluralVocative ; + wikibase:grammaticalFeature wd:Q499327 ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q185077 ; + FILTER(LANG(?masPluralVocative) = "hi") + } . +} diff --git a/src/scribe_data/language_data_extraction/Hindi/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/adverbs/query_adverbs.sparql similarity index 84% rename from src/scribe_data/language_data_extraction/Hindi/adverbs/query_adverbs.sparql rename to src/scribe_data/language_data_extraction/Hindustani/Hindi/adverbs/query_adverbs.sparql index 41277883..68fc5563 100644 --- a/src/scribe_data/language_data_extraction/Hindi/adverbs/query_adverbs.sparql +++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/adverbs/query_adverbs.sparql @@ -1,7 +1,8 @@ # tool: scribe-data # All Hindi (from Hindustani Q11051) adverbs. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "hi" to remove Urdu (ur) words. + +# Note: We need to filter for "hi" to remove Urdu (ur) words. SELECT DISTINCT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) diff --git a/src/scribe_data/language_data_extraction/Hindustani/Hindi/emoji_keywords/___init__.py b/src/scribe_data/language_data_extraction/Hindustani/Hindi/emoji_keywords/___init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/scribe_data/language_data_extraction/Hindi/emoji_keywords/generate_emoji_keywords.py b/src/scribe_data/language_data_extraction/Hindustani/Hindi/emoji_keywords/generate_emoji_keywords.py similarity index 89% rename from src/scribe_data/language_data_extraction/Hindi/emoji_keywords/generate_emoji_keywords.py rename to src/scribe_data/language_data_extraction/Hindustani/Hindi/emoji_keywords/generate_emoji_keywords.py index 0550fba1..3003fbdd 100644 --- a/src/scribe_data/language_data_extraction/Hindi/emoji_keywords/generate_emoji_keywords.py +++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/emoji_keywords/generate_emoji_keywords.py @@ -25,8 +25,7 @@ from scribe_data.unicode.process_unicode import gen_emoji_lexicon from scribe_data.utils import export_formatted_data -LANGUAGE = "Hindustani" # Broad language category -LANGUAGE_CODE = "hi" # Specific filter for Hindi +LANGUAGE = "Hindi" DATA_TYPE = "emoji-keywords" emojis_per_keyword = 3 @@ -38,7 +37,6 @@ if emoji_keywords_dict := gen_emoji_lexicon( language=LANGUAGE, emojis_per_keyword=emojis_per_keyword, - filter_language_code=LANGUAGE_CODE, # Adding filter for Hindi language code "hi" ): export_formatted_data( file_path=args.file_path, diff --git a/src/scribe_data/language_data_extraction/Hindi/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/nouns/query_nouns.sparql similarity index 93% rename from src/scribe_data/language_data_extraction/Hindi/nouns/query_nouns.sparql rename to src/scribe_data/language_data_extraction/Hindustani/Hindi/nouns/query_nouns.sparql index c21279fc..5c653c4b 100644 --- a/src/scribe_data/language_data_extraction/Hindi/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/nouns/query_nouns.sparql @@ -1,7 +1,8 @@ # tool: scribe-data # All Hindi (from Hindustani Q11051) nouns and their gender. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "hi" to remove Urdu (ur) words. + +# Note: We need to filter for "hi" to remove Urdu (ur) words. SELECT DISTINCT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) diff --git a/src/scribe_data/language_data_extraction/Hindi/Postpositions/query_postpositions.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/postpositions/query_postpositions.sparql similarity index 85% rename from src/scribe_data/language_data_extraction/Hindi/Postpositions/query_postpositions.sparql rename to src/scribe_data/language_data_extraction/Hindustani/Hindi/postpositions/query_postpositions.sparql index 835327b8..dde9fb0a 100644 --- a/src/scribe_data/language_data_extraction/Hindi/Postpositions/query_postpositions.sparql +++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/postpositions/query_postpositions.sparql @@ -1,7 +1,8 @@ # tool: scribe-data # All Hindi (from Hindustani Q11051) postpositions. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "hi" to remove Urdu (ur) words. + +# Note: We need to filter for "hi" to remove Urdu (ur) words. SELECT DISTINCT ?lexeme diff --git a/src/scribe_data/language_data_extraction/Hindi/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Hindustani/Hindi/verbs/query_verbs.sparql similarity index 97% rename from src/scribe_data/language_data_extraction/Hindi/verbs/query_verbs.sparql rename to src/scribe_data/language_data_extraction/Hindustani/Hindi/verbs/query_verbs.sparql index 40414886..984121e9 100644 --- a/src/scribe_data/language_data_extraction/Hindi/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Hindustani/Hindi/verbs/query_verbs.sparql @@ -1,7 +1,8 @@ # tool: scribe-data # All Hindi (from Hindustani Q11051) verbs and the currently implemented forms for each. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "hi" to remove Urdu (ur) words. + +# Note: We need to filter for "hi" to remove Urdu (ur) words. SELECT DISTINCT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) diff --git a/src/scribe_data/language_data_extraction/Urdu/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/adjectives/query_adjectives.sparql similarity index 84% rename from src/scribe_data/language_data_extraction/Urdu/adjectives/query_adjectives.sparql rename to src/scribe_data/language_data_extraction/Hindustani/Urdu/adjectives/query_adjectives.sparql index bde7d13a..01aa22aa 100644 --- a/src/scribe_data/language_data_extraction/Urdu/adjectives/query_adjectives.sparql +++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/adjectives/query_adjectives.sparql @@ -1,10 +1,14 @@ # tool: scribe-data -# All Urduo (from Hindustani Q11051) adjectives. +# All Urdu (from Hindustani Q11051) adjectives with the included grammatical forms. # Enter this query at https://query.wikidata.org/. +# Note: We need to filter for "ur" to remove Hindi (hi) words. + SELECT DISTINCT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) - ?lemma + ?adjective + ?singulativeNumeral + ?collectiveNumeral ?femSingularDirect ?masSingularDirect ?femPluralDirect @@ -21,8 +25,26 @@ SELECT DISTINCT WHERE { ?lexeme dct:language wd:Q11051 ; wikibase:lexicalCategory wd:Q34698 ; - wikibase:lemma ?lemma . - FILTER(lang(?lemma) = "ur") + wikibase:lemma ?adjective . + FILTER(lang(?adjective) = "ur") + + # MARK: Singulative Numeral + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?singulativeNumeralForm . + ?singulativeNumeralForm ontolex:representation ?singulativeNumeral ; + wikibase:grammaticalFeature wd:Q110786 . + FILTER(LANG(?singulativeNumeral) = "ur") + } + + # MARK: Collective Numeral + + OPTIONAL { + ?lexeme ontolex:lexicalForm ?collectiveNumeralForm . + ?collectiveNumeralForm ontolex:representation ?collectiveNumeral ; + wikibase:grammaticalFeature wd:Q146786 . + FILTER(LANG(?collectiveNumeral) = "ur") + } # MARK: Direct diff --git a/src/scribe_data/language_data_extraction/Urdu/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/adverbs/query_adverbs.sparql similarity index 70% rename from src/scribe_data/language_data_extraction/Urdu/adverbs/query_adverbs.sparql rename to src/scribe_data/language_data_extraction/Hindustani/Urdu/adverbs/query_adverbs.sparql index 49a1448a..09a8d7ca 100644 --- a/src/scribe_data/language_data_extraction/Urdu/adverbs/query_adverbs.sparql +++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/adverbs/query_adverbs.sparql @@ -1,14 +1,15 @@ # tool: scribe-data # All Urdu (from Hindustani Q11051) adverbs. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "ur" to remove Hindi (hi) words. + +# Note: We need to filter for "ur" to remove Hindi (hi) words. SELECT DISTINCT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?adverb WHERE { - ?lexeme dct:language wd:Q11051 ; # Urdu language (from Hindustani) + ?lexeme dct:language wd:Q11051 ; wikibase:lexicalCategory wd:Q380057 ; wikibase:lemma ?adverb . FILTER(lang(?adverb) = "ur") diff --git a/src/scribe_data/language_data_extraction/Urdu/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/nouns/query_nouns.sparql similarity index 93% rename from src/scribe_data/language_data_extraction/Urdu/nouns/query_nouns.sparql rename to src/scribe_data/language_data_extraction/Hindustani/Urdu/nouns/query_nouns.sparql index 0d88c4b4..86119a0c 100644 --- a/src/scribe_data/language_data_extraction/Urdu/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/nouns/query_nouns.sparql @@ -1,7 +1,8 @@ # tool: scribe-data # All Urdu (from Hindustani Q11051) nouns and their gender. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "ur" to remove Hindi (hi) words. + +# Note: We need to filter for "ur" to remove Hindi (hi) words. SELECT DISTINCT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) diff --git a/src/scribe_data/language_data_extraction/Hindustani/Urdu/postpositions/query_postpositions.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/postpositions/query_postpositions.sparql new file mode 100644 index 00000000..d6449014 --- /dev/null +++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/postpositions/query_postpositions.sparql @@ -0,0 +1,17 @@ +# tool: scribe-data +# All Urdu (from Hindustani Q11051) postpositions. +# Enter this query at https://query.wikidata.org/. + +# Note: We need to filter for "ur" to remove Hindi (hi) words. + +SELECT DISTINCT + ?lexeme + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?postposition + +WHERE { + ?lexeme dct:language wd:Q11051 ; + wikibase:lexicalCategory wd:Q161873 ; + wikibase:lemma ?postposition . + FILTER(lang(?postposition) = "ur") +} diff --git a/src/scribe_data/language_data_extraction/Urdu/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Hindustani/Urdu/verbs/query_verbs.sparql similarity index 95% rename from src/scribe_data/language_data_extraction/Urdu/verbs/query_verbs.sparql rename to src/scribe_data/language_data_extraction/Hindustani/Urdu/verbs/query_verbs.sparql index 4c87e2d8..bf1d8b1f 100644 --- a/src/scribe_data/language_data_extraction/Urdu/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Hindustani/Urdu/verbs/query_verbs.sparql @@ -1,7 +1,8 @@ # tool: scribe-data # All Urdu (from Hindustani Q11051) verbs and the currently implemented conjugations for each. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "ur" to remove Hindustani (hi) words. + +# Note: We need to filter for "ur" to remove Hindustani (hi) words. SELECT DISTINCT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) diff --git a/src/scribe_data/language_data_extraction/Punjabi/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Punjabi/nouns/query_nouns.sparql index 64cd0bb1..bc57926a 100644 --- a/src/scribe_data/language_data_extraction/Punjabi/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Punjabi/nouns/query_nouns.sparql @@ -1,7 +1,8 @@ # tool: scribe-data # All Gurmukhi (from Punjabi Q58635) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "pa" to select Gurmukhi words. + +# Note: We need to filter for "pa" to select Gurmukhi words. SELECT DISTINCT (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)