Skip to content

Commit

Permalink
Merge pull request #819 from CodeForAfrica/feature/fact-check-title
Browse files Browse the repository at this point in the history
Use set_claim_description as blurb
  • Loading branch information
koechkevin authored Nov 20, 2024
2 parents 6589882 + 4d80cd2 commit 0629c56
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 9 deletions.
1 change: 1 addition & 0 deletions 3rdparty/py/requirements-all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ google-auth-oauthlib==1.2.1
greenlet==3.1.1
gunicorn[gevent, setproctitle]==23.0.0
html2text==2024.2.26
lxml==5.1.0
redis==5.2.0
requests==2.32.3
sentry-sdk==2.18.0
Expand Down
2 changes: 1 addition & 1 deletion pesacheck_meedan_bridge/py/BUILD
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
python_sources(
name="lib",
dependencies=[
"3rdparty/py:requirements-all#lxml",
"3rdparty/py:requirements-all#requests",
"3rdparty/py:requirements-all#sentry-sdk",
"3rdparty/py:requirements-all#environs",
"3rdparty/py:requirements-all#trafilatura",
],
)

Expand Down
2 changes: 1 addition & 1 deletion pesacheck_meedan_bridge/py/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.8
0.1.12
22 changes: 15 additions & 7 deletions pesacheck_meedan_bridge/py/main.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
import json
import sys

import lxml.html
import requests
import sentry_sdk
import settings
from check_api import post_to_check
from database import PesacheckDatabase, PesacheckFeed
from trafilatura import extract


def html_to_text(content):
return extract(content, include_links=True, include_images=True) or content
def extract_summary(content):
tree = lxml.html.fromstring(content)
figures = tree.xpath("//figure")
if len(figures) == 0:
return None
summary_el = figures[0].getprevious()
summary_text = summary_el.text_content()
return summary_text.strip() if summary_text else None


language_codes = {
Expand Down Expand Up @@ -57,14 +63,16 @@ def post_to_check_and_update(feed, db):
if language.lower() in language_codes
]
language = "en" if not codes else codes[0]
claim_description = feed.title
summary = extract_summary(feed.description) or "Not Found"
input_data = {
"media_type": "Blank",
"channel": 1,
"set_tags": categories,
"set_status": "verified",
"set_claim_description": f"""{html_to_text(feed.description)}""",
"title": f"""{feed.title}""",
"summary": f"""{html_to_text(feed.description)}""",
"set_claim_description": claim_description,
"title": feed.title,
"summary": summary,
"url": feed.link,
"language": language,
"publish_report": True,
Expand Down Expand Up @@ -119,7 +127,7 @@ def main(db):
check_project_media_id="",
check_full_url="",
claim_description_id="",
)
)
store_in_database(feed, db=db)
posted = post_to_check_and_update(feed, db=db)
success_posts.append(posted)
Expand Down

0 comments on commit 0629c56

Please sign in to comment.