Skip to content

Commit

Permalink
working code
Browse files Browse the repository at this point in the history
  • Loading branch information
colinelder committed Mar 1, 2024
1 parent b73c844 commit bc33dfb
Showing 1 changed file with 40 additions and 46 deletions.
86 changes: 40 additions & 46 deletions src/WorkingCode.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,60 @@
# Import necessary spacy and os packages to parse through data and read text

import spacy
import os
import re
# Import streamlit, which will be used as the interface for users
import streamlit as st
from fuzzywuzzy import fuzz

# Add tokenizer so that spacy can assign vectors to words
# Load the NLP model
nlp = spacy.load("en_core_web_sm")

# define file path to text directories
# Define file path to text directories.
data_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'Jsem_Data')

# Creates empty list for text file paths
text_path = []

# Function to remove any non letter characters in the text
# Function to clean text data
def remove_non_letters(data):
# This uses regular expression to remove non-letter characters from text files
return re.sub(r'[^a-zA-Z]', '', data)

# Program needs to iterate through files

for filename in os.listdir(data_path):
file_path = os.path.join(data_path, filename)
with open(file_path, 'r') as file:
# Process content of the file here
content = file.read()
# Example: print the filename
print(f"File: {filename}")

# fact checking function
def fact_checker(input,data):
# processing text file
doc = nlp(data)
# Processing user input
doc = nlp(input)

# Retain spaces and letters for readability
return re.sub(r'[^a-zA-Z\s]', '', data)

# Function to read and preprocess text files
def preprocess_files(directory):
texts = {}
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
if filename.endswith('.txt'):
# Appends the name of the file to the list instead of txt
file_path.append(filename)
if os.path.isfile(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
cleaned_content = remove_non_letters(content)
texts[filename] = cleaned_content
return texts


# Fact-checking function
def fact_checker(input_fact, texts, threshhold=75):
found_in_files = []
for filename, content in texts.items():
if input_fact.lower() in content.lower():
found_in_files.append(filename)
return found_in_files


# Load and preprocess the text data
texts = preprocess_files(data_path)

# Streamlit interface

st.title("Fact Checking Program")
fact_input = st.text_input("Enter the fact you want to check:")
st.title("Colin's Science Fact Checking Program")
fact_input = st.text_input(" Please enter the fact you want to check:")

if st.button("Check Fact"):
if fact_input:
found, found_in_files = fact_check(fact_input)
if found:
st.success("The fact is true.")
found_in_files = fact_checker(fact_input, texts)
if found_in_files:
st.success("The fact is true!")
st.write(f"Found in file(s): {', '.join(found_in_files)}")
else:
st.error("The fact is false.")
st.error("The fact is false :(")
else:
st.warning("Please enter a fact to check.")

# Parsing function

#def text_parsing(file_path):
# with open(file_path, "r", encoding="utf-8") as file:
# text = file.read()
# doc = nlp(text)
# return doc

# Tensorizer - this gives words parts of speech

0 comments on commit bc33dfb

Please sign in to comment.