Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pesacheck Meedan Bridge #549

Merged
merged 33 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6044b38
Add pesacheck script
koechkevin Feb 1, 2024
79e75e5
Add a cron job
koechkevin Feb 2, 2024
1d52ea7
Remove unwanted files
koechkevin Feb 2, 2024
a91cafa
Rename variables
koechkevin Feb 2, 2024
64c0da7
Lint and use pants
koechkevin Feb 2, 2024
918bdc0
Refactor and add docker build
koechkevin Feb 7, 2024
6763c0b
Add cron job, add .env
koechkevin Feb 8, 2024
dc73c8a
Bump boto3 from 1.34.31 to 1.34.32 in /3rdparty/py
dependabot[bot] Feb 1, 2024
2e72f8c
Bump boto3 from 1.34.32 to 1.34.33 in /3rdparty/py
dependabot[bot] Feb 2, 2024
68233fe
Bump boto3 from 1.34.33 to 1.34.34 in /3rdparty/py
dependabot[bot] Feb 5, 2024
10004d1
Bump boto3 from 1.34.34 to 1.34.35 in /3rdparty/py
dependabot[bot] Feb 6, 2024
de9879b
Bump boto3 from 1.34.35 to 1.34.36 in /3rdparty/py
dependabot[bot] Feb 7, 2024
8cdf376
Bump sentry-sdk from 1.40.0 to 1.40.2 in /3rdparty/py
dependabot[bot] Feb 7, 2024
09a5d3d
Merge branch 'main' into feature/pesacheck
koechkevin Feb 8, 2024
4d50048
Fix: Lint issues
koechkevin Feb 9, 2024
909fed8
Merge branch 'main' into feature/pesacheck
koechkevin Feb 9, 2024
1bc7dae
Fix docker lint issues
koechkevin Feb 9, 2024
4256526
Update files, fix lints
koechkevin Feb 9, 2024
da690f2
Update env.example
koechkevin Feb 9, 2024
04f3026
Merge branch 'main' into feature/pesacheck
koechkevin Feb 13, 2024
5b97306
Add local database
koechkevin Feb 13, 2024
a346d99
Build and deploy as a docker image
koechkevin Feb 14, 2024
f1e391b
Remove test data
koechkevin Feb 15, 2024
07fd4fb
Deploy app to dokku and test
koechkevin Feb 15, 2024
05face2
Fix lint issues
koechkevin Feb 15, 2024
ecf1c5c
- Use 1 database instance
koechkevin Feb 16, 2024
0637ba7
Pass whole database path as a url
koechkevin Feb 16, 2024
605a356
Update docker file
koechkevin Feb 16, 2024
bf3f2ce
Fix Graphql errors
koechkevin Feb 19, 2024
ecb97e1
Set default value in mutation queries
koechkevin Feb 20, 2024
6fd59db
Update language codes
koechkevin Feb 20, 2024
d9d835a
Remove credentials.json
koechkevin Feb 20, 2024
39359a9
add html2text parser
koechkevin Feb 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**/venv
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,4 @@ cython_debug/
/.pants.workdir.file_lock*

# End of custom ignore
credentials.json
koechkevin marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
koechkevin marked this conversation as resolved.
Show resolved Hide resolved
"python.linting.enabled": true,
"cSpell.words": [
"dotenv"
]
}
22 changes: 22 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM python:3.9
koechkevin marked this conversation as resolved.
Show resolved Hide resolved

WORKDIR /app
COPY . /app

RUN pip install virtualenv
ENV PATH="/app/pesacheck_meedan_bridge/venv/bin:$PATH"
RUN virtualenv /app/pesacheck_meedan_bridge/venv
RUN echo "source /app/pesacheck_meedan_bridge/venv/bin/activate" >> ~/.bashrc
RUN pip install --no-cache-dir -r pesacheck_meedan_bridge/requirements.txt

RUN apt-get update && apt-get -y install cron

RUN chmod +x /app/pesacheck_meedan_bridge/service.py
ADD crontab /etc/cron.d/crontab

RUN chmod 0644 /etc/cron.d/crontab

RUN crontab /etc/cron.d/crontab

CMD ["cron", "-f"]

8 changes: 8 additions & 0 deletions app.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"cron": [
{
"command": "./test_script.sh",
koechkevin marked this conversation as resolved.
Show resolved Hide resolved
"schedule": "* * * * *"
}
]
}
1 change: 1 addition & 0 deletions crontab
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0 0 * * * root /app/pesacheck_meedan_bridge/venv/bin/python /app/pesacheck_meedan_bridge/service.py && echo "Cron job ran at $(date)" >> /app/debug.log
5 changes: 4 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ services:
- 8000:80
depends_on:
- twoopstracker_app

pesacheck_meedan_bridge:
build:
context: .
dockerfile: Dockerfile
koechkevin marked this conversation as resolved.
Show resolved Hide resolved
volumes:
app-media:
app-staticfiles:
Expand Down
6 changes: 6 additions & 0 deletions pesacheck_meedan_bridge/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
PESACHECK_URL=https://pesacheck.org/feed
PESACHECK_RSS_2_JSON_API_KEY=
koechkevin marked this conversation as resolved.
Show resolved Hide resolved
PESACHECK_CHECK_URL=https://check-api.checkmedia.org/api/graphql
PESACHECK_TOKEN=
PESACHECK_WORKSPACE_SLUG=pesacheck-tipline-sandbox
koechkevin marked this conversation as resolved.
Show resolved Hide resolved
PESACHECK_SENTRY_DSN=
48 changes: 48 additions & 0 deletions pesacheck_meedan_bridge/mutation_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import json
koechkevin marked this conversation as resolved.
Show resolved Hide resolved


def create_mutation_query(media_type=None, channel=None, set_tags=None,
set_status=None, set_claim_description=None,
title=None, summary=None, url=None,
language=None, publish_report=False):
mutation_query = f"""
mutation create {{
createProjectMedia(input: {{
media_type: "{media_type or "Blank"}",
channel: {{ main: {channel} }},
set_tags: {json.dumps(set_tags or [])},
set_status: "{set_status or ""}",
set_claim_description: "{set_claim_description or ""}",
set_fact_check: {{
title: "{title or ""}",
summary: "{summary or ""}",
url: "{url or ""}",
language: "{language or ""}",
publish_report: {str(publish_report).lower()}
}}
}}) {{
project_media {{
id
full_url
claim_description {{
fact_check {{
id
}}
}}
}}
}}
}}
"""

return mutation_query


def delete_mutation_query(id):
mutation_query = f'''
mutation {{
destroyFactCheck(input: {{
id: "{id}"
}}) {{ deletedId }}
}}
'''
return mutation_query
152 changes: 152 additions & 0 deletions pesacheck_meedan_bridge/output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
[
koechkevin marked this conversation as resolved.
Show resolved Hide resolved
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyMzg=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637238",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIxNA=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyMzk=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637239",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIxNQ=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyNDA=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637240",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIxNg=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyNDE=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637241",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIxNw=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyNDI=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637242",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIxOA=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyNDM=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637243",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIxOQ=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyNDQ=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637244",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIyMA=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyNDU=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637245",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIyMQ=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyNDY=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637246",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIyMg=="
}
}
}
}
}
},
{
"data": {
"createProjectMedia": {
"project_media": {
"id": "UHJvamVjdE1lZGlhLzI2MzcyNDc=",
"full_url": "https://checkmedia.org/pesacheck-tipline-sandbox/project/16148/media/2637247",
"claim_description": {
"fact_check": {
"id": "RmFjdENoZWNrLzgyNzIyMw=="
}
}
}
}
}
}
]
7 changes: 7 additions & 0 deletions pesacheck_meedan_bridge/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
certifi==2023.11.17
charset-normalizer==3.3.2
idna==3.6
python-dotenv==1.0.1
requests==2.31.0
sentry-sdk==1.40.0
urllib3==2.2.0
76 changes: 76 additions & 0 deletions pesacheck_meedan_bridge/service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import re
import requests
import os
from dotenv import load_dotenv
from mutation_queries import create_mutation_query, delete_mutation_query
from sentry_sdk import init, capture_exception, capture_message, set_context

load_dotenv()


init(
dsn=os.getenv("PESACHECK_SENTRY_DSN"),
traces_sample_rate=1.0,
profiles_sample_rate=1.0,
)


def log_error(code, message=""):
capture_message(f"Status code: {code}\n Error: {message}")


def remove_html_tags(input_string):
return re.sub(r'<[^>]*>', '', input_string)


def fetch_from_pesacheck():
url = "https://api.rss2json.com/v1/api.json"
rss_api_key = os.getenv("PESACHECK_RSS_2_JSON_API_KEY")
feed_url = os.getenv("PESACHECK_URL")
params = {'rss_url': feed_url, 'api_key': rss_api_key, 'count': 10, 'order_by': 'pubDate'}
response = requests.get(url, params=params, timeout=60)
if response.status_code == 200:
return response.json().get('items') or []
log_error(code=response.status_code, message=response.text)
return []


def post_to_check(query):
try:
headers = {"Content-Type": "application/json",
"X-Check-Token": os.getenv("PESACHECK_TOKEN"), "X-Check-Team": os.getenv("PESACHECK_WORKSPACE_SLUG")}
body = dict(query=query)
url = os.getenv("PESACHECK_CHECK_URL")
response = requests.post(url, headers=headers, json=body, timeout=60)
if response.status_code == 200:
return response.json()
log_error(code=response.status_code, message=response.text)
return None
except Exception as e:
capture_exception(e)


def upload_content():
items = fetch_from_pesacheck()
success_posts = []
for _, item in enumerate(items):
language = "fr" if "french" in item.get("categories") else "en"
input_data = {
"media_type": "Blank",
"channel": 1,
"set_tags": item["categories"],
"set_status": "verified",
"set_claim_description": remove_html_tags(item["description"]),
"title": remove_html_tags(item["title"]),
"summary": remove_html_tags(item["description"]),
"url": item["link"],
"language": language,
"publish_report": True,
}
query = create_mutation_query(**input_data)
res = post_to_check(query)
if (res):
success_posts.append(res)
set_context("Successful Uploads", success_posts)
koechkevin marked this conversation as resolved.
Show resolved Hide resolved

upload_content()