From 584d4fb6a41e3eba6c2233f9f6f3273468ecbab9 Mon Sep 17 00:00:00 2001
From: Tanmai Khanna <khanna.tanmai@gmail.com>
Date: Mon, 10 May 2021 12:37:06 +0530
Subject: [PATCH] update readme | all tests pass | add sentence tokenizer |
 rearrange

---
 README.md                                     | 15 +++-
 src/list_postag.txt => data/spacy_postag.txt  |  0
 src/SvayamMT_AccessToken.txt                  | 13 ----
 src/nmt_api.py                                | 76 -------------------
 src/output_test.txt                           | 19 -----
 src/preprocess.py                             | 12 +--
 src/preprocessing_testing.txt                 |  3 -
 src/requirements.txt                          | 17 +----
 src/sentence_tokenizer.py                     | 13 ++++
 src/swayam_api_python.py                      | 17 -----
 src/testing.py                                | 17 -----
 src/testing.txt                               | 70 -----------------
 .../rulesets/eng-hin.ppr                      |  0
 tests/rulesets/rule-set.ppr                   | 27 -------
 tests/test.sh                                 | 40 +++++-----
 15 files changed, 52 insertions(+), 287 deletions(-)
 rename src/list_postag.txt => data/spacy_postag.txt (100%)
 delete mode 100644 src/SvayamMT_AccessToken.txt
 delete mode 100644 src/nmt_api.py
 delete mode 100644 src/output_test.txt
 delete mode 100644 src/preprocessing_testing.txt
 create mode 100644 src/sentence_tokenizer.py
 delete mode 100644 src/swayam_api_python.py
 delete mode 100644 src/testing.py
 delete mode 100644 src/testing.txt
 rename src/rule-set.ppr => tests/rulesets/eng-hin.ppr (100%)
 delete mode 100644 tests/rulesets/rule-set.ppr

diff --git a/README.md b/README.md
index b0bc170..0a198cd 100644
--- a/README.md
+++ b/README.md
@@ -2,15 +2,16 @@
 
 ## How to Use
 - Install dependencies using `pip install -r requirements.txt`
+- Download spacy model using `python -m spacy download en_core_web_sm`
 - `python3 src/preprocess.py [rule_file.ppr] [input_file.txt]`
 - Test using `./tests/test.sh`
 
-## External tools used
-- spacy POS tagger
-- Download model using `python -m spacy download en_core_web_sm`
+Note: This assumes your input is already sentence tokenised. If it's not, you can use the `spacy` sentence tokeniser first.
 
 ## Rule formalism (File extension .ppr)
 
+## **Sample rule file: `tests/rulesets/eng-hin.ppr`**
+
 ### Source side rules
 - `[...]` : POS Tags
 - `[..@1]` : Variables named `0-9,a-z`,etc. to be used in the target side
@@ -32,3 +33,11 @@ For example, if you want a rule that matches "the" followed by an Adjective, whi
 - Anything not in `[...]` is matched directly
 - Rules are put in a list and applied on the input sentence one after the other.
 - Only lines with `->` in the rule-set are counted as rules.
+
+## Testing
+- Run tests using `tests/test.sh`
+
+## Miscellaneous Information
+This project is part of my Master's thesis in Computational Linguistics titled: **Rule-based pre-processing of idioms and non-compositional constructions to simplify them and improve black-box machine translation**
+
+You can open an issue on this repo to report any bugs or just to ask a doubt.
diff --git a/src/list_postag.txt b/data/spacy_postag.txt
similarity index 100%
rename from src/list_postag.txt
rename to data/spacy_postag.txt
diff --git a/src/SvayamMT_AccessToken.txt b/src/SvayamMT_AccessToken.txt
deleted file mode 100644
index fbb835d..0000000
--- a/src/SvayamMT_AccessToken.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-curl -k -X POST https://apicallhttps.iiithcanvas.com/token -d "grant_type=client_credentials" -H"Authorization: Basic Rkhxazg5MG9Edko2dFFXYWIzbldFOVhwNEE0YTpXSjVmTVloV2JUWjF1RzloVzRrYnA1OEptZllh"
-
-Python Code
-
-headers = {
-    'Authorization': 'Basic Rkhxazg5MG9Edko2dFFXYWIzbldFOVhwNEE0YTpXSjVmTVloV2JUWjF1RzloVzRrYnA1OEptZllh',
-}
-
-data = {
-  'grant_type': 'client_credentials'
-}
-
-response = requests.post('https://apicallhttps.iiithcanvas.com/token', headers=headers, data=data, verify=False)
\ No newline at end of file
diff --git a/src/nmt_api.py b/src/nmt_api.py
deleted file mode 100644
index 815cbf8..0000000
--- a/src/nmt_api.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import googletrans
-import sys
-import time
-import requests
-import json
-from googletrans import Translator
-
-if(len(sys.argv) < 4):
-	print("Not enough arguments.")
-	sys.exit()
-
-source_input = sys.argv[1]
-replace_source = sys.argv[2]
-replace_target = sys.argv[3]
-
-print("*** Pre-processing: " + replace_source + " -> " + replace_target + " ***\n")
-
-# GOOGLE TRANSLATE API
-
-translator = Translator()
-source_language = 'en'
-destination_language = 'hi'
-
-print("Google Translate\n")
-
-print("Original Input: " + source_input)
-result_original = translator.translate(source_input, src=source_language, dest=destination_language)
-print("Original Translation: " + result_original.text)
-
-time.sleep(2.5)
-preprocessed_input = source_input.replace(replace_source, replace_target)
-print("\nPre-processed input: " + preprocessed_input)
-
-time.sleep(2.5)
-
-result_final = translator.translate(preprocessed_input, src=source_language, dest=destination_language)
-print("Final Translation: " + result_final.text)
-
-# SWAYAM API
-print("\n*****\n\nSwayam Translate\n")
-
-print("Original Input: " + source_input)
-
-headers_token = {
-    'Authorization': 'Basic Rkhxazg5MG9Edko2dFFXYWIzbldFOVhwNEE0YTpXSjVmTVloV2JUWjF1RzloVzRrYnA1OEptZllh',
-}
-
-data_token = {
-  'grant_type': 'client_credentials'
-}
-
-response = requests.post('https://apicallhttps.iiithcanvas.com/token', headers=headers_token, data=data_token)
-
-current_token = json.loads(response.text)["access_token"]
-
-headers = {
-    'accept': '*/*',
-    'Content-Type': 'application/json',
-    'Authorization': 'Bearer ' + current_token,
-}
-
-data = '{"text":"' + source_input + '","source_language":"eng","target_language":"hin"}'
-
-response = requests.post('https://apicallhttps.iiithcanvas.com/apiMt/v.1.0.0/mt_linker', headers=headers, data=data)
-response_text_original = json.loads(response.text)
-
-print("Original Translation: " + response_text_original["data"])
-
-print("\nPre-processed input: " + preprocessed_input)
-
-data = '{"text":"' + preprocessed_input + '","source_language":"eng","target_language":"hin"}'
-
-response = requests.post('https://apicallhttps.iiithcanvas.com/apiMt/v.1.0.0/mt_linker', headers=headers, data=data)
-response_text_final = json.loads(response.text)
-
-print("Final Translation: " + response_text_final["data"])
diff --git a/src/output_test.txt b/src/output_test.txt
deleted file mode 100644
index 996c785..0000000
--- a/src/output_test.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-*** Pre-processing: I wonder -> I am keen to know ***
-
-Google Translate
-
-Original Input: I wonder if they will win tomorrow
-Original Translation: मुझे आश्चर्य है कि अगर वे कल जीतेंगे
-
-Pre-processed input: I am keen to know if they will win tomorrow
-Final Translation: मैं यह जानने के लिए उत्सुक हूं कि क्या वे कल जीतेंगे
-
-*****
-
-Swayam Translate
-
-Original Input: I wonder if they will win tomorrow
-Original Translation: मुझे आश्चर्य होता है कि वे कल जीतेंगे तो
-
-Pre-processed input: I am keen to know if they will win tomorrow
-Final Translation: मुझे जानने की उत्सुकता है कि वे कल जीतेंगे तो
diff --git a/src/preprocess.py b/src/preprocess.py
index 67ffe6e..91a5b91 100644
--- a/src/preprocess.py
+++ b/src/preprocess.py
@@ -83,7 +83,7 @@ def check(x, y): #Comparison with multiple options
 
 	patterns_and_replacements.append((detection_pattern, rule[1].strip().split(" ")))
 
-nlp = spacy.load("en_core_web_sm", disable=["parser", "ner", "attribute_ruler"])
+nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])
 
 input_lines = open(input_file_path).readlines()
 
@@ -281,11 +281,11 @@ def check(x, y): #Comparison with multiple options
 			text = "".join(output_parts)
 			construction_detected_in_line = True
 
-	if(construction_detected_in_line):
-		print("Construct Detected\t" + text)
-	else:
-		print("Not Detected\t" + text)
+	#if(construction_detected_in_line):
+	#	print("Construct Detected\t" + text)
+	#else:
+	#	print("Not Detected\t" + text)
 
 	#Output after applying all rules
-	#print(text)
+	print(text)
 
diff --git a/src/preprocessing_testing.txt b/src/preprocessing_testing.txt
deleted file mode 100644
index e26150f..0000000
--- a/src/preprocessing_testing.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-She dislikes the lazy dog in the park. She dislikes the lazy.
-> She dislikes the lazy dog in the park . She dislikes lazy people .
-
diff --git a/src/requirements.txt b/src/requirements.txt
index a655808..8a627fb 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -1,16 +1 @@
-certifi==2020.12.5
-chardet==3.0.4
-googletrans==3.1.0a0
-h11==0.9.0
-h2==3.2.0
-hpack==3.0.0
-hstspreload==2020.12.22
-httpcore==0.9.1
-httpx==0.13.3
-hyperframe==5.2.0
-idna==2.10
-requests==2.25.1
-rfc3986==1.4.0
-sniffio==1.2.0
-urllib3==1.26.3
-spacy==2.2.4
+spacy==3.0.6
\ No newline at end of file
diff --git a/src/sentence_tokenizer.py b/src/sentence_tokenizer.py
new file mode 100644
index 0000000..7326657
--- /dev/null
+++ b/src/sentence_tokenizer.py
@@ -0,0 +1,13 @@
+import spacy
+import sys
+
+nlp = spacy.load('en_core_web_sm') # Load the English Model
+
+file_name = sys.argv[1]
+f = open(file_name).readlines()
+
+for line in f:
+	doc = nlp(line)
+	for sent in doc.sents:
+		print(str(sent).strip())
+
diff --git a/src/swayam_api_python.py b/src/swayam_api_python.py
deleted file mode 100644
index c103262..0000000
--- a/src/swayam_api_python.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import requests
-import json
-
-headers = {
-    'accept': '*/*',
-    'Content-Type': 'application/json',
-    'Authorization': 'Bearer your_access_token_here',
-}
-
-data = '{"text":"This is example text","source_language":"eng","target_language":"hin"}'
-
-response = requests.post('https://apicallhttps.iiithcanvas.com/apiMt/v.1.0.0/mt_linker', headers=headers, data=data)
-
-response_text = json.loads(response.text)
-
-print(response_text["data"])
-
diff --git a/src/testing.py b/src/testing.py
deleted file mode 100644
index f0cd223..0000000
--- a/src/testing.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import requests
-import json
-
-headers = {
-    'accept': '*/*',
-    'Content-Type': 'application/json',
-    'Authorization': 'Bearer eyJ4NXQiOiJNell4TW1Ga09HWXdNV0kwWldObU5EY3hOR1l3WW1NNFpUQTNNV0kyTkRBelpHUXpOR00wWkdSbE5qSmtPREZrWkRSaU9URmtNV0ZoTXpVMlpHVmxOZyIsImtpZCI6Ik16WXhNbUZrT0dZd01XSTBaV05tTkRjeE5HWXdZbU00WlRBM01XSTJOREF6WkdRek5HTTBaR1JsTmpKa09ERmtaRFJpT1RGa01XRmhNelUyWkdWbE5nX1JTMjU2IiwiYWxnIjoiUlMyNTYifQ.eyJzdWIiOiJraGFubmF0YW5tYWlAY2FyYm9uLnN1cGVyIiwiYXVkIjoiX2t6V1V3c0M5RExsZ1VOZ1hjdG9qcDQ4YW04YSIsIm5iZiI6MTYxNDg1OTA2MSwiYXpwIjoiX2t6V1V3c0M5RExsZ1VOZ1hjdG9qcDQ4YW04YSIsInNjb3BlIjoiYW1fYXBwbGljYXRpb25fc2NvcGUgZGVmYXVsdCIsImlzcyI6Imh0dHBzOlwvXC9lYzItMTUtMjA3LTI1NC0zNC5hcC1zb3V0aC0xLmNvbXB1dGUuYW1hem9uYXdzLmNvbTo5NDQ1XC9vYXV0aDJcL3Rva2VuIiwiZXhwIjoxNjE0ODYyNjYxLCJpYXQiOjE2MTQ4NTkwNjEsImp0aSI6IjA5MmE2NzI3LTI1YzItNDVlOC05NTQ4LTNjODYxZWM1NzY0MiJ9.pINkOdgVXzIwi5c66Ye5b46WF9aMKFCyA3wUME8l4amKCZmFdaxvmMs03yr8kTdF2SZh57dpuoJd9qa-4D8BbVFadX5LP_RUeSFY1eX7mxrfKaHC_7bBY8gigc4jWYs7Zpxa1DgHTwM5wJ0rmjg0oEx4kK1hjIdxGISZ8hOvQAR_zG7eN4F0NVcJCR81gWbwjwpA-N4btXVlzjiuO2p6m-BGC5tpY-qNf2CrABAhpaxIO_MVUsPf4Za4Bwe4YXAp7v5HVQ6bOejhgjXKNzHw66fsQCm8abmi9PZj3nIyeAM-kOoFrHXGXkOcZn4qtNKqQVt08XCh8YWAN2I3w-UCFg',
-}
-
-data = '{"text":"This is example text","source_language":"eng","target_language":"hin"}'
-
-response = requests.post('https://apicallhttps.iiithcanvas.com/apiMt/v.1.0.0/mt_linker', headers=headers, data=data)
-
-response_text = json.loads(response.text)
-
-print(response_text["data"])
-
diff --git a/src/testing.txt b/src/testing.txt
deleted file mode 100644
index 108df04..0000000
--- a/src/testing.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-1. python3 preprocess.py "I wonder if they will win tomorrow" "I wonder" "I am keen to know"
-
-*** Pre-processing: I wonder -> I am keen to know ***
-
-Google Translate
-
-Original Input: I wonder if they will win tomorrow
-Original Translation: मुझे आश्चर्य है कि अगर वे कल जीतेंगे
-
-Pre-processed input: I am keen to know if they will win tomorrow
-Final Translation: मैं यह जानने के लिए उत्सुक हूं कि क्या वे कल जीतेंगे
-
-*****
-
-Swayam Translate
-
-Original Input: I wonder if they will win tomorrow
-Original Translation: मुझे आश्चर्य होता है कि वे कल जीतेंगे तो
-
-Pre-processed input: I am keen to know if they will win tomorrow
-Final Translation: मुझे जानने की उत्सुकता है कि वे कल जीतेंगे तो
-
-2. python3 preprocess.py "Most people don't admit they have mental health issues, let alone reach out for help" "let alone" "leave"
-
-*** Pre-processing: let alone -> leave ***
-
-Google Translate
-
-Original Input: Most people don't admit they have mental health issues, let alone reach out for help
-Original Translation: ज्यादातर लोग स्वीकार नहीं करते हैं कि उनके पास मानसिक स्वास्थ्य के मुद्दे हैं, मदद के लिए अकेले पहुंचें
-
-Pre-processed input: Most people don't admit they have mental health issues, leave reach out for help
-Final Translation: अधिकांश लोग स्वीकार नहीं करते कि उनके पास मानसिक स्वास्थ्य के मुद्दे हैं, मदद के लिए पहुंच छोड़ दें
-
-*****
-
-Swayam Translate
-
-Original Input: Most people don't admit they have mental health issues, let alone reach out for help
-Original Translation: ज्यादातर लोगों को स्वीकार नहीं है वे मानसिक स्वास्थ्य मुद्दे हैं , अकेले मदद के लिए बाहर पहुंच
-
-Pre-processed input: Most people don't admit they have mental health issues, leave reach out for help
-Final Translation: ज्यादातर लोगों को स्वीकार नहीं है वे मानसिक स्वास्थ्य मुद्दे हैं , मदद के लिए बाहर पहुंच छोड़
-
-
-Maybe even deal with phrasal verbs: https://www.englishclub.com/vocabulary/phrasal-verbs-list.htm
-
-3. python3 preprocess.py "When I first started out I didn't think I'd win one game, let alone five" "let alone" "leave"
-
-*** Pre-processing: let alone -> leave ***
-
-Google Translate
-
-Original Input: When I first started out I didn't think I'd win one game, let alone five
-Original Translation: जब मैंने पहली बार शुरुआत की तो मुझे नहीं लगा कि मैं एक गेम जीत पाऊंगा, पांच को अकेले रहने दूंगा
-
-Pre-processed input: When I first started out I didn't think I'd win one game, leave five
-Final Translation: जब मैंने पहली बार शुरुआत की तो मुझे नहीं लगा कि मैं एक गेम जीत पाऊँगा, पाँच छोड़ दो
-
-*****
-
-Swayam Translate
-
-Original Input: When I first started out I didn't think I'd win one game, let alone five
-Original Translation: जब मैं पहले बाहर शुरू किया था मैं नहीं सोचा था कि मैं एक खेल जीत जाएगा , अकेले पांच
-
-Pre-processed input: When I first started out I didn't think I'd win one game, leave five
-Final Translation: जब मैं पहले बाहर शुरू किया था मैं नहीं सोचा था कि मैं एक खेल जीत जाएगा , पांच छोड़ दो
-
-
diff --git a/src/rule-set.ppr b/tests/rulesets/eng-hin.ppr
similarity index 100%
rename from src/rule-set.ppr
rename to tests/rulesets/eng-hin.ppr
diff --git a/tests/rulesets/rule-set.ppr b/tests/rulesets/rule-set.ppr
deleted file mode 100644
index fa0aa6c..0000000
--- a/tests/rulesets/rule-set.ppr
+++ /dev/null
@@ -1,27 +0,0 @@
-get [PRP$@1] act|acts together -> sort out [@1] issues
-give [NN|NNS|PRP@1] the slip -> evade [@1]
-water under the bridge -> an old talk
-to do with -> related to
-play dumb -> pretend to be stupid
-off the mark -> inaccurate
-back to square one -> back to the start
-all of a sudden -> suddenly
-call it a day -> finish the work
-, are|is|can|ca|do|does (n't) [PRP] (not) ?|,@1 -> , right [@1]
-[NN|NNS@1] galore -> a lot of [@1]
-before [PRP$@1] very eyes -> in front of [@1] own eyes
-let alone -> leave
-the [JJ@1] [!NN|NNS@2] -#> [@1] people [@2] #Removed temporarily due to a lot of false positives
-should feel free to jump in -> should not hesitate to get involved
-feel free to jump in -> don't hesitate to get involved
-the one with the [@1] -> the one which has the [@1]
-thorn in ([DT@1]) [PRP$|NN|NNS|NNP@2] ([POS]) side -> persistent problem for [@1] [@2|my:me|his:him|their:them|its:it]
-{kick@1} the bucket -> [@1:die|kick:die|kicks:dies|kicked:died|kicking:dying]
-try as ([DT|PRP$@1]) [NN|NNS|PRP@2] might -> no matter how much [@1] [@2] try
-[NN|NNS|PRP@2] {make@3} ([DT|PRP$@4]) [NN|NNS|PRP@5] [VB@6] -> [@2] [@3:cause|made:caused|makes:causes|making:causing] [@4] [@5] to [@6]
-[NN|NNS|PRP@2] {be@3} ([RB@4]) ([JJ@5])[NNS@6] with ([DT|PRP$@7]) [NN|NNS|PRP@8] -> [@2] and [@7] [@8] [@3] [@4] [@5] [@6]
-I {wonder@1} -> I [@1:am|wondered:was] thinking
-in [PRP$@1] own right  -> in [@1:themselves|my:myself|his:himself|her:herself|its:itself]
-What 's ([DT|PRP$@1]) [NN|NNS|PRP@2] doing [IN|VBG@3] -> Why is [@1] [@2] [@3]
-Many|many@1 a|an [NN|NNS@2] -> [@1] [@2]
-May you|she|he|they|it@1 [VB@2] -> I hope [@1] will [@2]
\ No newline at end of file
diff --git a/tests/test.sh b/tests/test.sh
index 9741d5e..3c860a8 100755
--- a/tests/test.sh
+++ b/tests/test.sh
@@ -14,7 +14,7 @@ fi
 echo "Test 1: Basic with POS Tag"
 
 echo "US forces in Iraq need to get their act together there and really dampen the situation and stop inflaming things by confrontational policies." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="US forces in Iraq need to sort out their issues there and really dampen the situation and stop inflaming things by confrontational policies."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -25,7 +25,7 @@ Compare_Outputs check_output.txt temp_output.txt
 echo "Test 2: Multiple rules"
 
 echo "US forces in Iraq need to get their act together there and the vice president should feel free to jump in" > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="US forces in Iraq need to sort out their issues there and the vice president should not hesitate to get involved"
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -36,7 +36,7 @@ Compare_Outputs check_output.txt temp_output.txt
 echo "Test 3: Optional Token"
 
 echo "You are a student here, aren't you?" > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="You are a student here, right ?"
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -45,7 +45,7 @@ echo $expected_output > check_output.txt
 Compare_Outputs check_output.txt temp_output.txt
 
 echo "We aren't going with him, are we?" > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="We aren't going with him, right ?"
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -54,7 +54,7 @@ echo $expected_output > check_output.txt
 Compare_Outputs check_output.txt temp_output.txt
 
 echo "He really looks like that actor, does he not?" > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="He really looks like that actor, right ?"
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -65,7 +65,7 @@ Compare_Outputs check_output.txt temp_output.txt
 echo "Test 4: Multiple variables"
 
 echo "She made her students take the test again." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="She caused her students to take the test again."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -76,8 +76,8 @@ Compare_Outputs check_output.txt temp_output.txt
 echo "Test 5: OR Operator"
 
 echo "He told me to give police the slip and then I told them to give her the slip." > input_text.txt
-rule_file="rule-set.ppr"
-expected_output="He told me to evade police and then I told them to evade her."
+rule_file="eng-hin.ppr"
+expected_output="He told me to escape from police and then I told them to escape from her."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
 echo $expected_output > check_output.txt
@@ -87,7 +87,7 @@ Compare_Outputs check_output.txt temp_output.txt
 echo "Test 6: NOT Operator"
 
 echo "She dislikes the lazy employees and will fix this department." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="She dislikes the lazy employees and will fix this department."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -98,7 +98,7 @@ Compare_Outputs check_output.txt temp_output.txt
 echo "Test 7: Match Any Token Operator"
 
 echo "It's the one with the actor who went to jail." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="It's the one which has the actor who went to jail."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -109,7 +109,7 @@ Compare_Outputs check_output.txt temp_output.txt
 echo "Test 8: Mappings in Replacement Rules"
 
 echo "This pandemic is a thorn in his side." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="This pandemic is a persistent problem for him."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -118,7 +118,7 @@ echo $expected_output > check_output.txt
 Compare_Outputs check_output.txt temp_output.txt
 
 echo "This pandemic is a thorn in their side." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="This pandemic is a persistent problem for them."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -126,9 +126,9 @@ echo $expected_output > check_output.txt
 
 Compare_Outputs check_output.txt temp_output.txt
 
-echo "This pandemic is a thorn in Pushpa's side." > input_text.txt
-rule_file="rule-set.ppr"
-expected_output="This pandemic is a persistent problem for Pushpa."
+echo "This pandemic is a thorn in John's side." > input_text.txt
+rule_file="eng-hin.ppr"
+expected_output="This pandemic is a persistent problem for John."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
 echo $expected_output > check_output.txt
@@ -136,7 +136,7 @@ echo $expected_output > check_output.txt
 Compare_Outputs check_output.txt temp_output.txt
 
 echo "This pandemic is a thorn in the police's side." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="This pandemic is a persistent problem for the police."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -147,7 +147,7 @@ Compare_Outputs check_output.txt temp_output.txt
 echo "Test 9: Lemma matching"
 
 echo "I will find this poster before I kick the bucket." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="I will find this poster before I die."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -156,7 +156,7 @@ echo $expected_output > check_output.txt
 Compare_Outputs check_output.txt temp_output.txt
 
 echo "Have you heard? The old man down the street has kicked the bucket." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="Have you heard? The old man down the street has died."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -165,7 +165,7 @@ echo $expected_output > check_output.txt
 Compare_Outputs check_output.txt temp_output.txt
 
 echo "He knew that he will be able to achieve everything on the list before he kicks the bucket." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="He knew that he will be able to achieve everything on the list before he dies."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt
@@ -174,7 +174,7 @@ echo $expected_output > check_output.txt
 Compare_Outputs check_output.txt temp_output.txt
 
 echo "He knew that he will be able to achieve everything on the list before kicking the bucket." > input_text.txt
-rule_file="rule-set.ppr"
+rule_file="eng-hin.ppr"
 expected_output="He knew that he will be able to achieve everything on the list before dying."
 
 python3 ../src/preprocess.py "rulesets/$rule_file" input_text.txt > temp_output.txt