Skip to content
This repository has been archived by the owner on Oct 16, 2023. It is now read-only.

V1.5.2 #104

Merged
merged 18 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/dependabot-misc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Enable version updates for npm
- package-ecosystem: "npm"
# Look for `package.json` and `lock` files in the `root` directory
directory: "/project/modules/web-ui" # MUST BE UPDATED UPON TYPESCRIPT PROGRAMMING
# Check the npm registry for updates every day (weekdays)
schedule:
interval: "daily"
# Check for npm updates at 9am UTC
time: "09:00"
timezone: "America/Los_Angeles"
target-branch: "master"
versioning-strategy: auto

# Enable version updates for Docker
- package-ecosystem: "docker"
# Look for a `Dockerfile` in the `root` directory
directory: "/project/modules/orchestration"
# Check for updates once a week
schedule:
interval: "daily"
# Check for npm updates at 9am UTC
time: "09:00"
timezone: "America/Los_Angeles"
target-branch: "master"
24 changes: 24 additions & 0 deletions .github/dependabot-python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
# Enable version updates for multiple branches
- package-ecosystem: "pip"
directory: "project/modules/orchestration"
schedule:
interval: "daily"
# Check for pip updates at 9am UTC
time: "09:00"
timezone: "America/Los_Angeles"
target-branch: "master"
versioning-strategy: auto

- package-ecosystem: "pip"
directory: "project/modules/orchestration"
schedule:
interval: "daily"
# Check for pip updates at 9am UTC
time: "09:00"
timezone: "America/Los_Angeles"
target-branch: "v1.5*"
versioning-strategy: auto
60 changes: 0 additions & 60 deletions .github/dependabot.yml

This file was deleted.

4 changes: 0 additions & 4 deletions .github/workflows/.gitignore

This file was deleted.

4 changes: 2 additions & 2 deletions .github/workflows/greetings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ jobs:
- uses: actions/first-interaction@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
issue-message: "Message that will be displayed on users' first issue"
pr-message: "Message that will be displayed on users' first pull request"
issue-message: "Thanks for chippin' in, choom. We'll have this looked at right away."
pr-message: "OK! Big legend! Nice job, can't wait to see your work."
35 changes: 0 additions & 35 deletions documents/todo.txt

This file was deleted.

File renamed without changes.
2 changes: 2 additions & 0 deletions project/documents/supplementary-information/explanation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
These subdirectories contain contextual information for the project.
- The AI may make use of everything inside 'supplementary-information/'
16 changes: 15 additions & 1 deletion project/modules/CyberSentinel/.env.template
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
TRAINING_DATA_PATH=
# Create a copy of this file and name it '.env'

# Model Training Configuration
TRAINING_DATA_PATH=project/modules/CyberSentinel/training-data/
LEARNING_RATE=0.001
BATCH_SIZE=32
EPOCHS=10
L2_REG=0.01

# Preprocessor Configuration (Use '.' for current working directory)
INPUT_FILE_PATH=
PREPROCESSED_DATA_FILE_PATH=project/modules/CyberSentinel/training-data/Processed-Data/

# Temporary file paths for DataLabeler
TEMP_PDF_FILE_PATH=temp_pdf_data.csv
TEMP_TXT_FILE_PATH=temp_txt_data.csv

# Path to save labeled data
LABELED_DATA_FILE_PATH=project/modules/CyberSentinel/preprocess/
Empty file.
26 changes: 17 additions & 9 deletions project/modules/CyberSentinel/preprocess/data_labeler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
from dotenv import load_dotenv
import os
import csv
from typing import List, Tuple

# Point to the location of the .env file relative to the script's location
env_path = os.path.join(os.path.dirname(__file__), '../../../.env')

# Load the .env file
load_dotenv(dotenv_path=env_path)

class DataLabeler:
def __init__(self, temp_pdf_file_path: str = "temp_pdf_data.csv", temp_txt_file_path: str = "temp_txt_data.csv"):
self.temp_pdf_file_path = temp_pdf_file_path
self.temp_txt_file_path = temp_txt_file_path
def __init__(self):
default_temp_path = os.path.dirname(__file__)
self.temp_pdf_file_path = os.getenv('TEMP_PDF_FILE_PATH', os.path.join(default_temp_path, 'temp_pdf_data.csv'))
self.temp_txt_file_path = os.getenv('TEMP_TXT_FILE_PATH', os.path.join(default_temp_path, 'temp_txt_data.csv'))
self.output_file_path = os.getenv('LABELED_DATA_FILE_PATH')
self.labeled_pdf_data = self.load_temp_data(self.temp_pdf_file_path)
self.labeled_txt_data = self.load_temp_data(self.temp_txt_file_path)

Expand Down Expand Up @@ -36,13 +45,13 @@ def load_temp_data(self, file_path: str) -> List[Tuple[str, bool]]:
def label_data(self, data: List[str]) -> List[Tuple[str, bool]]:
labeled_data = []
for text in data:
print("\nSample:")
print(text)
print(f"\\nSample:{text}")
label = self.get_user_input("Does this text indicate the intention to commit acts of hate-based violence? (True/False): ")
labeled_data.append((text, label))
return labeled_data

def save_labeled_data_to_csv(self, labeled_data: List[Tuple[str, bool]], file_path: str):
def save_labeled_data_to_csv(self, labeled_data: List[Tuple[str, bool]]):
file_path = self.output_file_path or input("Enter the path to save the labeled data: ")
with open(file_path, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['text', 'label'])
Expand All @@ -52,12 +61,11 @@ def save_labeled_data_to_csv(self, labeled_data: List[Tuple[str, bool]], file_pa
if __name__ == "__main__":
data_labeler = DataLabeler()
# Load the preprocessed data from the file saved by the Preprocessor
file_path = input("Enter the path to the preprocessed data file: ")
file_path = os.getenv('PREPROCESSED_DATA_FILE_PATH') or input("Enter the path to the preprocessed data file: ")
with open(file_path, 'r', encoding='utf-8') as file:
reader = csv.reader(file)
next(reader) # Skip the header
data = [row[0] for row in reader]

labeled_data = data_labeler.label_data(data)
output_file_path = input("Enter the path to save the labeled data: ")
data_labeler.save_labeled_data_to_csv(labeled_data, output_file_path)
data_labeler.save_labeled_data_to_csv(labeled_data)
Loading