From 584d4fb6a41e3eba6c2233f9f6f3273468ecbab9 Mon Sep 17 00:00:00 2001 From: Tanmai Khanna Date: Mon, 10 May 2021 12:37:06 +0530 Subject: [PATCH] update readme | all tests pass | add sentence tokenizer | rearrange --- README.md | 15 +++- src/list_postag.txt => data/spacy_postag.txt | 0 src/SvayamMT_AccessToken.txt | 13 ---- src/nmt_api.py | 76 ------------------- src/output_test.txt | 19 ----- src/preprocess.py | 12 +-- src/preprocessing_testing.txt | 3 - src/requirements.txt | 17 +---- src/sentence_tokenizer.py | 13 ++++ src/swayam_api_python.py | 17 ----- src/testing.py | 17 ----- src/testing.txt | 70 ----------------- .../rulesets/eng-hin.ppr | 0 tests/rulesets/rule-set.ppr | 27 ------- tests/test.sh | 40 +++++----- 15 files changed, 52 insertions(+), 287 deletions(-) rename src/list_postag.txt => data/spacy_postag.txt (100%) delete mode 100644 src/SvayamMT_AccessToken.txt delete mode 100644 src/nmt_api.py delete mode 100644 src/output_test.txt delete mode 100644 src/preprocessing_testing.txt create mode 100644 src/sentence_tokenizer.py delete mode 100644 src/swayam_api_python.py delete mode 100644 src/testing.py delete mode 100644 src/testing.txt rename src/rule-set.ppr => tests/rulesets/eng-hin.ppr (100%) delete mode 100644 tests/rulesets/rule-set.ppr diff --git a/README.md b/README.md index b0bc170..0a198cd 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,16 @@ ## How to Use - Install dependencies using `pip install -r requirements.txt` +- Download spacy model using `python -m spacy download en_core_web_sm` - `python3 src/preprocess.py [rule_file.ppr] [input_file.txt]` - Test using `./tests/test.sh` -## External tools used -- spacy POS tagger -- Download model using `python -m spacy download en_core_web_sm` +Note: This assumes your input is already sentence tokenised. If it's not, you can use the `spacy` sentence tokeniser first. ## Rule formalism (File extension .ppr) +## **Sample rule file: `tests/rulesets/eng-hin.ppr`** + ### Source side rules - `[...]` : POS Tags - `[..@1]` : Variables named `0-9,a-z`,etc. to be used in the target side @@ -32,3 +33,11 @@ For example, if you want a rule that matches "the" followed by an Adjective, whi - Anything not in `[...]` is matched directly - Rules are put in a list and applied on the input sentence one after the other. - Only lines with `->` in the rule-set are counted as rules. + +## Testing +- Run tests using `tests/test.sh` + +## Miscellaneous Information +This project is part of my Master's thesis in Computational Linguistics titled: **Rule-based pre-processing of idioms and non-compositional constructions to simplify them and improve black-box machine translation** + +You can open an issue on this repo to report any bugs or just to ask a doubt. diff --git a/src/list_postag.txt b/data/spacy_postag.txt similarity index 100% rename from src/list_postag.txt rename to data/spacy_postag.txt diff --git a/src/SvayamMT_AccessToken.txt b/src/SvayamMT_AccessToken.txt deleted file mode 100644 index fbb835d..0000000 --- a/src/SvayamMT_AccessToken.txt +++ /dev/null @@ -1,13 +0,0 @@ -curl -k -X POST https://apicallhttps.iiithcanvas.com/token -d "grant_type=client_credentials" -H"Authorization: Basic Rkhxazg5MG9Edko2dFFXYWIzbldFOVhwNEE0YTpXSjVmTVloV2JUWjF1RzloVzRrYnA1OEptZllh" - -Python Code - -headers = { - 'Authorization': 'Basic Rkhxazg5MG9Edko2dFFXYWIzbldFOVhwNEE0YTpXSjVmTVloV2JUWjF1RzloVzRrYnA1OEptZllh', -} - -data = { - 'grant_type': 'client_credentials' -} - -response = requests.post('https://apicallhttps.iiithcanvas.com/token', headers=headers, data=data, verify=False) \ No newline at end of file diff --git a/src/nmt_api.py b/src/nmt_api.py deleted file mode 100644 index 815cbf8..0000000 --- a/src/nmt_api.py +++ /dev/null @@ -1,76 +0,0 @@ -import googletrans -import sys -import time -import requests -import json -from googletrans import Translator - -if(len(sys.argv) < 4): - print("Not enough arguments.") - sys.exit() - -source_input = sys.argv[1] -replace_source = sys.argv[2] -replace_target = sys.argv[3] - -print("*** Pre-processing: " + replace_source + " -> " + replace_target + " ***\n") - -# GOOGLE TRANSLATE API - -translator = Translator() -source_language = 'en' -destination_language = 'hi' - -print("Google Translate\n") - -print("Original Input: " + source_input) -result_original = translator.translate(source_input, src=source_language, dest=destination_language) -print("Original Translation: " + result_original.text) - -time.sleep(2.5) -preprocessed_input = source_input.replace(replace_source, replace_target) -print("\nPre-processed input: " + preprocessed_input) - -time.sleep(2.5) - -result_final = translator.translate(preprocessed_input, src=source_language, dest=destination_language) -print("Final Translation: " + result_final.text) - -# SWAYAM API -print("\n*****\n\nSwayam Translate\n") - -print("Original Input: " + source_input) - -headers_token = { - 'Authorization': 'Basic Rkhxazg5MG9Edko2dFFXYWIzbldFOVhwNEE0YTpXSjVmTVloV2JUWjF1RzloVzRrYnA1OEptZllh', -} - -data_token = { - 'grant_type': 'client_credentials' -} - -response = requests.post('https://apicallhttps.iiithcanvas.com/token', headers=headers_token, data=data_token) - -current_token = json.loads(response.text)["access_token"] - -headers = { - 'accept': '*/*', - 'Content-Type': 'application/json', - 'Authorization': 'Bearer ' + current_token, -} - -data = '{"text":"' + source_input + '","source_language":"eng","target_language":"hin"}' - -response = requests.post('https://apicallhttps.iiithcanvas.com/apiMt/v.1.0.0/mt_linker', headers=headers, data=data) -response_text_original = json.loads(response.text) - -print("Original Translation: " + response_text_original["data"]) - -print("\nPre-processed input: " + preprocessed_input) - -data = '{"text":"' + preprocessed_input + '","source_language":"eng","target_language":"hin"}' - -response = requests.post('https://apicallhttps.iiithcanvas.com/apiMt/v.1.0.0/mt_linker', headers=headers, data=data) -response_text_final = json.loads(response.text) - -print("Final Translation: " + response_text_final["data"]) diff --git a/src/output_test.txt b/src/output_test.txt deleted file mode 100644 index 996c785..0000000 --- a/src/output_test.txt +++ /dev/null @@ -1,19 +0,0 @@ -*** Pre-processing: I wonder -> I am keen to know *** - -Google Translate - -Original Input: I wonder if they will win tomorrow -Original Translation: मुझे आश्चर्य है कि अगर वे कल जीतेंगे - -Pre-processed input: I am keen to know if they will win tomorrow -Final Translation: मैं यह जानने के लिए उत्सुक हूं कि क्या वे कल जीतेंगे - -***** - -Swayam Translate - -Original Input: I wonder if they will win tomorrow -Original Translation: मुझे आश्चर्य होता है कि वे कल जीतेंगे तो - -Pre-processed input: I am keen to know if they will win tomorrow -Final Translation: मुझे जानने की उत्सुकता है कि वे कल जीतेंगे तो diff --git a/src/preprocess.py b/src/preprocess.py index 67ffe6e..91a5b91 100644 --- a/src/preprocess.py +++ b/src/preprocess.py @@ -83,7 +83,7 @@ def check(x, y): #Comparison with multiple options patterns_and_replacements.append((detection_pattern, rule[1].strip().split(" "))) -nlp = spacy.load("en_core_web_sm", disable=["parser", "ner", "attribute_ruler"]) +nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"]) input_lines = open(input_file_path).readlines() @@ -281,11 +281,11 @@ def check(x, y): #Comparison with multiple options text = "".join(output_parts) construction_detected_in_line = True - if(construction_detected_in_line): - print("Construct Detected\t" + text) - else: - print("Not Detected\t" + text) + #if(construction_detected_in_line): + # print("Construct Detected\t" + text) + #else: + # print("Not Detected\t" + text) #Output after applying all rules - #print(text) + print(text) diff --git a/src/preprocessing_testing.txt b/src/preprocessing_testing.txt deleted file mode 100644 index e26150f..0000000 --- a/src/preprocessing_testing.txt +++ /dev/null @@ -1,3 +0,0 @@ -She dislikes the lazy dog in the park. She dislikes the lazy. -> She dislikes the lazy dog in the park . She dislikes lazy people . - diff --git a/src/requirements.txt b/src/requirements.txt index a655808..8a627fb 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,16 +1 @@ -certifi==2020.12.5 -chardet==3.0.4 -googletrans==3.1.0a0 -h11==0.9.0 -h2==3.2.0 -hpack==3.0.0 -hstspreload==2020.12.22 -httpcore==0.9.1 -httpx==0.13.3 -hyperframe==5.2.0 -idna==2.10 -requests==2.25.1 -rfc3986==1.4.0 -sniffio==1.2.0 -urllib3==1.26.3 -spacy==2.2.4 +spacy==3.0.6 \ No newline at end of file diff --git a/src/sentence_tokenizer.py b/src/sentence_tokenizer.py new file mode 100644 index 0000000..7326657 --- /dev/null +++ b/src/sentence_tokenizer.py @@ -0,0 +1,13 @@ +import spacy +import sys + +nlp = spacy.load('en_core_web_sm') # Load the English Model + +file_name = sys.argv[1] +f = open(file_name).readlines() + +for line in f: + doc = nlp(line) + for sent in doc.sents: + print(str(sent).strip()) + diff --git a/src/swayam_api_python.py b/src/swayam_api_python.py deleted file mode 100644 index c103262..0000000 --- a/src/swayam_api_python.py +++ /dev/null @@ -1,17 +0,0 @@ -import requests -import json - -headers = { - 'accept': '*/*', - 'Content-Type': 'application/json', - 'Authorization': 'Bearer your_access_token_here', -} - -data = '{"text":"This is example text","source_language":"eng","target_language":"hin"}' - -response = requests.post('https://apicallhttps.iiithcanvas.com/apiMt/v.1.0.0/mt_linker', headers=headers, data=data) - -response_text = json.loads(response.text) - -print(response_text["data"]) - diff --git a/src/testing.py b/src/testing.py deleted file mode 100644 index f0cd223..0000000 --- a/src/testing.py +++ /dev/null @@ -1,17 +0,0 @@ -import requests -import json - -headers = { - 'accept': '*/*', - 'Content-Type': 'application/json', - 'Authorization': 'Bearer eyJ4NXQiOiJNell4TW1Ga09HWXdNV0kwWldObU5EY3hOR1l3WW1NNFpUQTNNV0kyTkRBelpHUXpOR00wWkdSbE5qSmtPREZrWkRSaU9URmtNV0ZoTXpVMlpHVmxOZyIsImtpZCI6Ik16WXhNbUZrT0dZd01XSTBaV05tTkRjeE5HWXdZbU00WlRBM01XSTJOREF6WkdRek5HTTBaR1JsTmpKa09ERmtaRFJpT1RGa01XRmhNelUyWkdWbE5nX1JTMjU2IiwiYWxnIjoiUlMyNTYifQ.eyJzdWIiOiJraGFubmF0YW5tYWlAY2FyYm9uLnN1cGVyIiwiYXVkIjoiX2t6V1V3c0M5RExsZ1VOZ1hjdG9qcDQ4YW04YSIsIm5iZiI6MTYxNDg1OTA2MSwiYXpwIjoiX2t6V1V3c0M5RExsZ1VOZ1hjdG9qcDQ4YW04YSIsInNjb3BlIjoiYW1fYXBwbGljYXRpb25fc2NvcGUgZGVmYXVsdCIsImlzcyI6Imh0dHBzOlwvXC9lYzItMTUtMjA3LTI1NC0zNC5hcC1zb3V0aC0xLmNvbXB1dGUuYW1hem9uYXdzLmNvbTo5NDQ1XC9vYXV0aDJcL3Rva2VuIiwiZXhwIjoxNjE0ODYyNjYxLCJpYXQiOjE2MTQ4NTkwNjEsImp0aSI6IjA5MmE2NzI3LTI1YzItNDVlOC05NTQ4LTNjODYxZWM1NzY0MiJ9.pINkOdgVXzIwi5c66Ye5b46WF9aMKFCyA3wUME8l4amKCZmFdaxvmMs03yr8kTdF2SZh57dpuoJd9qa-4D8BbVFadX5LP_RUeSFY1eX7mxrfKaHC_7bBY8gigc4jWYs7Zpxa1DgHTwM5wJ0rmjg0oEx4kK1hjIdxGISZ8hOvQAR_zG7eN4F0NVcJCR81gWbwjwpA-N4btXVlzjiuO2p6m-BGC5tpY-qNf2CrABAhpaxIO_MVUsPf4Za4Bwe4YXAp7v5HVQ6bOejhgjXKNzHw66fsQCm8abmi9PZj3nIyeAM-kOoFrHXGXkOcZn4qtNKqQVt08XCh8YWAN2I3w-UCFg', -} - -data = '{"text":"This is example text","source_language":"eng","target_language":"hin"}' - -response = requests.post('https://apicallhttps.iiithcanvas.com/apiMt/v.1.0.0/mt_linker', headers=headers, data=data) - -response_text = json.loads(response.text) - -print(response_text["data"]) - diff --git a/src/testing.txt b/src/testing.txt deleted file mode 100644 index 108df04..0000000 --- a/src/testing.txt +++ /dev/null @@ -1,70 +0,0 @@ -1. python3 preprocess.py "I wonder if they will win tomorrow" "I wonder" "I am keen to know" - -*** Pre-processing: I wonder -> I am keen to know *** - -Google Translate - -Original Input: I wonder if they will win tomorrow -Original Translation: मुझे आश्चर्य है कि अगर वे कल जीतेंगे - -Pre-processed input: I am keen to know if they will win tomorrow -Final Translation: मैं यह जानने के लिए उत्सुक हूं कि क्या वे कल जीतेंगे - -***** - -Swayam Translate - -Original Input: I wonder if they will win tomorrow -Original Translation: मुझे आश्चर्य होता है कि वे कल जीतेंगे तो - -Pre-processed input: I am keen to know if they will win tomorrow -Final Translation: मुझे जानने की उत्सुकता है कि वे कल जीतेंगे तो - -2. python3 preprocess.py "Most people don't admit they have mental health issues, let alone reach out for help" "let alone" "leave" - -*** Pre-processing: let alone -> leave *** - -Google Translate - -Original Input: Most people don't admit they have mental health issues, let alone reach out for help -Original Translation: ज्यादातर लोग स्वीकार नहीं करते हैं कि उनके पास मानसिक स्वास्थ्य के मुद्दे हैं, मदद के लिए अकेले पहुंचें - -Pre-processed input: Most people don't admit they have mental health issues, leave reach out for help -Final Translation: अधिकांश लोग स्वीकार नहीं करते कि उनके पास मानसिक स्वास्थ्य के मुद्दे हैं, मदद के लिए पहुंच छोड़ दें - -***** - -Swayam Translate - -Original Input: Most people don't admit they have mental health issues, let alone reach out for help -Original Translation: ज्यादातर लोगों को स्वीकार नहीं है वे मानसिक स्वास्थ्य मुद्दे हैं , अकेले मदद के लिए बाहर पहुंच - -Pre-processed input: Most people don't admit they have mental health issues, leave reach out for help -Final Translation: ज्यादातर लोगों को स्वीकार नहीं है वे मानसिक स्वास्थ्य मुद्दे हैं , मदद के लिए बाहर पहुंच छोड़ - - -Maybe even deal with phrasal verbs: https://www.englishclub.com/vocabulary/phrasal-verbs-list.htm - -3. python3 preprocess.py "When I first started out I didn't think I'd win one game, let alone five" "let alone" "leave" - -*** Pre-processing: let alone -> leave *** - -Google Translate - -Original Input: When I first started out I didn't think I'd win one game, let alone five -Original Translation: जब मैंने पहली बार शुरुआत की तो मुझे नहीं लगा कि मैं एक गेम जीत पाऊंगा, पांच को अकेले रहने दूंगा - -Pre-processed input: When I first started out I didn't think I'd win one game, leave five -Final Translation: जब मैंने पहली बार शुरुआत की तो मुझे नहीं लगा कि मैं एक गेम जीत पाऊँगा, पाँच छोड़ दो - -***** - -Swayam Translate - -Original Input: When I first started out I didn't think I'd win one game, let alone five -Original Translation: जब मैं पहले बाहर शुरू किया था मैं नहीं सोचा था कि मैं एक खेल जीत जाएगा , अकेले पांच - -Pre-processed input: When I first started out I didn't think I'd win one game, leave five -Final Translation: जब मैं पहले बाहर शुरू किया था मैं नहीं सोचा था कि मैं एक खेल जीत जाएगा , पांच छोड़ दो - - diff --git a/src/rule-set.ppr b/tests/rulesets/eng-hin.ppr similarity index 100% rename from src/rule-set.ppr rename to tests/rulesets/eng-hin.ppr diff --git a/tests/rulesets/rule-set.ppr b/tests/rulesets/rule-set.ppr deleted file mode 100644 index fa0aa6c..0000000 --- a/tests/rulesets/rule-set.ppr +++ /dev/null @@ -1,27 +0,0 @@ -get [PRP$@1] act|acts together -> sort out [@1] issues -give [NN|NNS|PRP@1] the slip -> evade [@1] -water under the bridge -> an old talk -to do with -> related to -play dumb -> pretend to be stupid -off the mark -> inaccurate -back to square one -> back to the start -all of a sudden -> suddenly -call it a day -> finish the work -, are|is|can|ca|do|does (n't) [PRP] (not) ?|,@1 -> , right [@1] -[NN|NNS@1] galore -> a lot of [@1] -before [PRP$@1] very eyes -> in front of [@1] own eyes -let alone -> leave -the [JJ@1] [!NN|NNS@2] -#> [@1] people [@2] #Removed temporarily due to a lot of false positives -should feel free to jump in -> should not hesitate to get involved -feel free to jump in -> don't hesitate to get involved -the one with the [@1] -> the one which has the [@1] -thorn in ([DT@1]) [PRP$|NN|NNS|NNP@2] ([POS]) side -> persistent problem for [@1] [@2|my:me|his:him|their:them|its:it] -{kick@1} the bucket -> [@1:die|kick:die|kicks:dies|kicked:died|kicking:dying] -try as ([DT|PRP$@1]) [NN|NNS|PRP@2] might -> no matter how much [@1] [@2] try -[NN|NNS|PRP@2] {make@3} ([DT|PRP$@4]) [NN|NNS|PRP@5] [VB@6] -> [@2] [@3:cause|made:caused|makes:causes|making:causing] [@4] [@5] to [@6] -[NN|NNS|PRP@2] {be@3} ([RB@4]) ([JJ@5])[NNS@6] with ([DT|PRP$@7]) [NN|NNS|PRP@8] -> [@2] and [@7] [@8] [@3] [@4] [@5] [@6] -I {wonder@1} -> I [@1:am|wondered:was] thinking -in [PRP$@1] own right -> in [@1:themselves|my:myself|his:himself|her:herself|its:itself] -What 's ([DT|PRP$@1]) [NN|NNS|PRP@2] doing [IN|VBG@3] -> Why is [@1] [@2] [@3] -Many|many@1 a|an [NN|NNS@2] -> [@1] [@2] -May you|she|he|they|it@1 [VB@2] -> I hope [@1] will [@2] \ No newline at end of file diff --git a/tests/test.sh b/tests/test.sh index 9741d5e..3c860a8 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -14,7 +14,7 @@ fi echo "Test 1: Basic with POS Tag" echo "US forces in Iraq need to get their act together there and really dampen the situation and stop inflaming things by confrontational policies." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="US forces in Iraq need to sort out their issues there and really dampen the situation and stop inflaming things by confrontational policies." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -25,7 +25,7 @@ Compare_Outputs check_output.txt temp_output.txt echo "Test 2: Multiple rules" echo "US forces in Iraq need to get their act together there and the vice president should feel free to jump in" > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="US forces in Iraq need to sort out their issues there and the vice president should not hesitate to get involved" python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -36,7 +36,7 @@ Compare_Outputs check_output.txt temp_output.txt echo "Test 3: Optional Token" echo "You are a student here, aren't you?" > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="You are a student here, right ?" python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -45,7 +45,7 @@ echo $expected_output > check_output.txt Compare_Outputs check_output.txt temp_output.txt echo "We aren't going with him, are we?" > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="We aren't going with him, right ?" python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -54,7 +54,7 @@ echo $expected_output > check_output.txt Compare_Outputs check_output.txt temp_output.txt echo "He really looks like that actor, does he not?" > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="He really looks like that actor, right ?" python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -65,7 +65,7 @@ Compare_Outputs check_output.txt temp_output.txt echo "Test 4: Multiple variables" echo "She made her students take the test again." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="She caused her students to take the test again." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -76,8 +76,8 @@ Compare_Outputs check_output.txt temp_output.txt echo "Test 5: OR Operator" echo "He told me to give police the slip and then I told them to give her the slip." > input_text.txt -rule_file="rule-set.ppr" -expected_output="He told me to evade police and then I told them to evade her." +rule_file="eng-hin.ppr" +expected_output="He told me to escape from police and then I told them to escape from her." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt echo $expected_output > check_output.txt @@ -87,7 +87,7 @@ Compare_Outputs check_output.txt temp_output.txt echo "Test 6: NOT Operator" echo "She dislikes the lazy employees and will fix this department." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="She dislikes the lazy employees and will fix this department." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -98,7 +98,7 @@ Compare_Outputs check_output.txt temp_output.txt echo "Test 7: Match Any Token Operator" echo "It's the one with the actor who went to jail." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="It's the one which has the actor who went to jail." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -109,7 +109,7 @@ Compare_Outputs check_output.txt temp_output.txt echo "Test 8: Mappings in Replacement Rules" echo "This pandemic is a thorn in his side." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="This pandemic is a persistent problem for him." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -118,7 +118,7 @@ echo $expected_output > check_output.txt Compare_Outputs check_output.txt temp_output.txt echo "This pandemic is a thorn in their side." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="This pandemic is a persistent problem for them." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -126,9 +126,9 @@ echo $expected_output > check_output.txt Compare_Outputs check_output.txt temp_output.txt -echo "This pandemic is a thorn in Pushpa's side." > input_text.txt -rule_file="rule-set.ppr" -expected_output="This pandemic is a persistent problem for Pushpa." +echo "This pandemic is a thorn in John's side." > input_text.txt +rule_file="eng-hin.ppr" +expected_output="This pandemic is a persistent problem for John." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt echo $expected_output > check_output.txt @@ -136,7 +136,7 @@ echo $expected_output > check_output.txt Compare_Outputs check_output.txt temp_output.txt echo "This pandemic is a thorn in the police's side." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="This pandemic is a persistent problem for the police." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -147,7 +147,7 @@ Compare_Outputs check_output.txt temp_output.txt echo "Test 9: Lemma matching" echo "I will find this poster before I kick the bucket." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="I will find this poster before I die." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -156,7 +156,7 @@ echo $expected_output > check_output.txt Compare_Outputs check_output.txt temp_output.txt echo "Have you heard? The old man down the street has kicked the bucket." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="Have you heard? The old man down the street has died." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -165,7 +165,7 @@ echo $expected_output > check_output.txt Compare_Outputs check_output.txt temp_output.txt echo "He knew that he will be able to achieve everything on the list before he kicks the bucket." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="He knew that he will be able to achieve everything on the list before he dies." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt @@ -174,7 +174,7 @@ echo $expected_output > check_output.txt Compare_Outputs check_output.txt temp_output.txt echo "He knew that he will be able to achieve everything on the list before kicking the bucket." > input_text.txt -rule_file="rule-set.ppr" +rule_file="eng-hin.ppr" expected_output="He knew that he will be able to achieve everything on the list before dying." python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt