Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
joelsleeba committed Mar 29, 2024
0 parents commit 101c67c
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/zulip-bot-workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: ArXiv to Zulip Bot

on:
push:
branches: ["master"]
schedule:
- cron: "27 */1 * * *"


env:
ZULIP_API_KEY: ${{ secrets.ZULIP_API_KEY }}
ZULIP_EMAIL: ${{ secrets.ZULIP_EMAIL }}
ZULIP_SITE: ${{ secrets.ZULIP_SITE}}

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Install dependencies
run: pip install requests zulip feedparser markdowinify

- name: Run script
run: python main.py
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# ArXiv-to-Zulip bot

## How to Setup
1. Create a generic bot in your Zulip Organization
2. Add the new bot to the stream where you want the RSS feed updates
3. Get the bot's API key from Personal Settings >> Bots
4. Fork this repository
5. Add organization url, bot's email ID and API key to your repository secrets
- For this go to your repository settings >> Secrets and Variables >> Actions >> New repository secret
- Add `ZULIP_SITE` as the `https://yourdomainname.zulipchat.com` (replace `yourdomainname` with your domain name)
- Add `ZULIP_EMAIL` as the email id for the bot
- Add `ZULIP_API_KEY`. This you can get from the bot settings in zulip
6. Change `RSS_FEEDS` to your favorite feeds in `main.py`

If you have set it up correctly, you'll get the latest 10 articles in the stream you've selected within minutes.
This bot relies on GitHub actions to schedule running `main.py` on select intervals.
89 changes: 89 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
import requests
import time
import zulip
import re
import feedparser
import markdownify
from datetime import datetime, timedelta, timezone

# Define your Zulip credentials
ZULIP_EMAIL = os.environ.get('ZULIP_EMAIL')
ZULIP_STREAM_NAME = 'articles'

# Define the dictionary of feed names and their RSS links
RSS_FEEDS = {
"Dan Ma's Topology Blog" : "https://dantopology.wordpress.com/feed/"
"The Higher Geometer" : "https://thehighergeometer.wordpress.com/feed/"
"Terence Tao" : "https://terrytao.wordpress.com/feed/"
"Mathematics, Quanta Magazine" : "https://api.quantamagazine.org/mathematics/feed"
"Physics, Quanta Magazine" : "https://api.quantamagazine.org/physics/feed"
"Biology, Quanta Magazine" : "https://api.quantamagazine.org/biology/feed"
"Computer Science, Quanta Magazine" : "https://api.quantamagazine.org/computer-science/feed"
}

# Function to send a message to Zulip
def send_zulip_message(content, topic):
client = zulip.Client(email=ZULIP_EMAIL, client='rss-feed-bot/0.1')
data = {
"type": "stream",
"to": ZULIP_STREAM_NAME,
"topic": topic,
"content": content,
}
client.send_message(data)

# Get the url of the last article update sent to the stream
def last_article_update_link(topic):
client = zulip.Client(email=ZULIP_EMAIL, client='test-github-client/0.1')
request: Dict[str, Any] = {
"anchor": "newest",
"num_before": 1,
"num_after": 0,
"narrow": [
{"operator": "sender", "operand": ZULIP_EMAIL},
{"operator": "stream", "operand": ZULIP_STREAM_NAME},
{"operator": "topic", "operand": topic},
],
"apply_markdown": False
}
response = client.get_messages(request)
if response['result'] == "success":
messages = response["messages"]
if messages:
latest_message = messages[0]
latest_message_content = latest_message["content"]
url_pattern = r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)'
latest_arxiv_link = re.findall(url_pattern, latest_message_content)[0]
return latest_arxiv_link
else:
return None
else:
print(f"Failed to retrieve message or No previous messages")
return None

# Function to fetch latest articles from arXiv
def update_zulip_stream():
for feed_name, feed_url in RSS_FEEDS.items():
feed = feedparser.parse(feed_url)
topic = feed.feed.title
last_updated_article_link = last_article_update_link(topic)
flag = True if last_updated_article_link else False # To avoid the case when not having an article ends up not getting to the execution part of the for loop
for article in feed.entries[:4][::-1]: #This is to get the latest 5 articles in ascending order by time
link = article.link
if flag:
if link == last_updated_article_link:
flag = False
continue
title = markdownify.markdownify(article.title.replace('$^{\\ast}$', '* ').replace('$^*$', '* ').replace('$^*$', '* ').replace("$", "$$").replace("\n", " "))
published = time.strftime("%d %B %Y", article.published_parsed)
author = article.author
summary = markdownify.markdownify(article.description)
tags = ", ".join([entry['term'] for entry in article.tags])
message = f"\n**[{title}]({link})**\n*{author}, {published}*\n\n{summary}\n\n*{tags}*"
# send_zulip_message(message, topic)
print(message)

# Main function to check for new articles periodically
if __name__ == "__main__":
update_zulip_stream()

0 comments on commit 101c67c

Please sign in to comment.