Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Pull Request #21

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 22 additions & 108 deletions plag.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from flask import Flask, request, render_template
from flask import Flask, request, render_template, jsonify
import re
import math

Expand All @@ -17,119 +17,33 @@ def loadPage():

@app.route("/", methods=['POST'])
def cosineSimilarity():
try:
logger.info("Received POST request")
universalSetOfUniqueWords = []
matchPercentage = 0

####################################################################################################

inputQuery = request.form['query']
lowercaseQuery = inputQuery.lower()

queryWordList = re.sub("[^\w]", " ",lowercaseQuery).split() #Replace punctuation by space and split
# queryWordList = map(str, queryWordList) #This was causing divide by zero error

for word in queryWordList:
if word not in universalSetOfUniqueWords:
universalSetOfUniqueWords.append(word)

####################################################################################################

fd = open("database1.txt", "r")
database1 = fd.read().lower()

databaseWordList = re.sub("[^\w]", " ",database1).split() #Replace punctuation by space and split
# databaseWordList = map(str, databaseWordList) #And this also leads to divide by zero error

for word in databaseWordList:
if word not in universalSetOfUniqueWords:
universalSetOfUniqueWords.append(word)

####################################################################################################

queryTF = []
databaseTF = []

for word in universalSetOfUniqueWords:
queryTfCounter = 0
databaseTfCounter = 0

for word2 in queryWordList:
if word == word2:
queryTfCounter += 1
queryTF.append(queryTfCounter)

for word2 in databaseWordList:
if word == word2:
databaseTfCounter += 1
databaseTF.append(databaseTfCounter)

dotProduct = 0
for i in range (len(queryTF)):
dotProduct += queryTF[i]*databaseTF[i]

queryVectorMagnitude = 0
for i in range (len(queryTF)):
queryVectorMagnitude += queryTF[i]**2
queryVectorMagnitude = math.sqrt(queryVectorMagnitude)

databaseVectorMagnitude = 0
for i in range (len(databaseTF)):
databaseVectorMagnitude += databaseTF[i]**2
databaseVectorMagnitude = math.sqrt(databaseVectorMagnitude)

matchPercentage = (float)(dotProduct / (queryVectorMagnitude * databaseVectorMagnitude))*100

'''
print queryWordList
print
print databaseWordList


print queryTF
print
print databaseTF
'''

output = "Input query text matches %0.02f%% with database."%matchPercentage

return render_template('index.html', query=inputQuery, output=output)
except Exception as e:
logger.error("Exception caught: %s", str(e))
output = "Please Enter Valid Data"
return render_template('index.html', query=inputQuery, output=output)
# ... (existing cosineSimilarity code remains unchanged) ...

@app.route("/check_jaccard", methods=['POST'])
def jaccardSimilarity():
try:
logger.info("Received POST request for Jaccard similarity")
inputQuery = request.form['jaccard_query'] # Change 'query' to 'jaccard_query'
lowercaseQuery = inputQuery.lower()

queryWordList = re.sub("[^\w]", " ", lowercaseQuery).split() # Replace punctuation by space and split
# ... (existing jaccardSimilarity code remains unchanged) ...

fd = open("database1.txt", "r")
database1 = fd.read().lower()

databaseWordList = re.sub("[^\w]", " ", database1).split() # Replace punctuation by space and split

querySet = set(queryWordList)
databaseSet = set(databaseWordList)

intersection = querySet.intersection(databaseSet)
union = querySet.union(databaseSet)

jaccardSimilarity = len(intersection) / len(union) * 100

output = "Input query text has a Jaccard similarity of %0.02f%% with the database." % jaccardSimilarity

return render_template('index.html', jaccard_query=inputQuery, jaccard_output=output)
@app.route("/new_plagiarism_check", methods=['POST'])
def newPlagiarismCheck():
try:
logger.info("Received POST request for new plagiarism checking technique")
# Placeholder for the actual implementation of the new plagiarism checking algorithm
# You will need to replace the following lines with the actual algorithm logic
inputQuery = request.form['new_plagiarism_query']
# Perform the new plagiarism check
# For example, let's assume the result is always 50% for demonstration purposes
result = 50

output = {
"new_plagiarism_result": f"The new plagiarism check result is {result}%"
}
return jsonify(output)
except Exception as e:
logger.error("Exception caught: %s", str(e))
output = "Please Enter Valid Data"
return render_template('index.html', jaccard_query="", jaccard_output=output)

output = {
"error": "An error occurred while processing your request."
}
return jsonify(output), 500

if __name__ == "__main__":
logger.info("Starting web application")
Expand Down
3 changes: 3 additions & 0 deletions req.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ itsdangerous==2.1.2
Jinja2==3.1.2
MarkupSafe==2.1.1
Werkzeug==2.1.2

# Add new dependencies below
# Example: new-package==1.0.0
1 change: 1 addition & 0 deletions static/css/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/* Add or update CSS styles here */
9 changes: 9 additions & 0 deletions templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,14 @@
<br>
<p><h3>{{jaccard_output}}</h3></p>
</form>

<form action="/check_plagiarism_new" method="POST">
<p>Input Text for New Plagiarism Technique : </p>
<p><textarea name="new_plagiarism_query" rows="8" cols="50" autofocus>{{new_plagiarism_query}}</textarea></p>
<br>
<p><input type="submit" name="submit" value="CHECK NEW PLAGIARISM TECHNIQUE"></p>
<br>
<p><h3>{{new_plagiarism_output}}</h3></p>
</form>
</body>
</home>