-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 101c67c
Showing
3 changed files
with
136 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: ArXiv to Zulip Bot | ||
|
||
on: | ||
push: | ||
branches: ["master"] | ||
schedule: | ||
- cron: "27 */1 * * *" | ||
|
||
|
||
env: | ||
ZULIP_API_KEY: ${{ secrets.ZULIP_API_KEY }} | ||
ZULIP_EMAIL: ${{ secrets.ZULIP_EMAIL }} | ||
ZULIP_SITE: ${{ secrets.ZULIP_SITE}} | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v3 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.x' | ||
|
||
- name: Install dependencies | ||
run: pip install requests zulip feedparser markdowinify | ||
|
||
- name: Run script | ||
run: python main.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# ArXiv-to-Zulip bot | ||
|
||
## How to Setup | ||
1. Create a generic bot in your Zulip Organization | ||
2. Add the new bot to the stream where you want the RSS feed updates | ||
3. Get the bot's API key from Personal Settings >> Bots | ||
4. Fork this repository | ||
5. Add organization url, bot's email ID and API key to your repository secrets | ||
- For this go to your repository settings >> Secrets and Variables >> Actions >> New repository secret | ||
- Add `ZULIP_SITE` as the `https://yourdomainname.zulipchat.com` (replace `yourdomainname` with your domain name) | ||
- Add `ZULIP_EMAIL` as the email id for the bot | ||
- Add `ZULIP_API_KEY`. This you can get from the bot settings in zulip | ||
6. Change `RSS_FEEDS` to your favorite feeds in `main.py` | ||
|
||
If you have set it up correctly, you'll get the latest 10 articles in the stream you've selected within minutes. | ||
This bot relies on GitHub actions to schedule running `main.py` on select intervals. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import os | ||
import requests | ||
import time | ||
import zulip | ||
import re | ||
import feedparser | ||
import markdownify | ||
from datetime import datetime, timedelta, timezone | ||
|
||
# Define your Zulip credentials | ||
ZULIP_EMAIL = os.environ.get('ZULIP_EMAIL') | ||
ZULIP_STREAM_NAME = 'articles' | ||
|
||
# Define the dictionary of feed names and their RSS links | ||
RSS_FEEDS = { | ||
"Dan Ma's Topology Blog" : "https://dantopology.wordpress.com/feed/" | ||
"The Higher Geometer" : "https://thehighergeometer.wordpress.com/feed/" | ||
"Terence Tao" : "https://terrytao.wordpress.com/feed/" | ||
"Mathematics, Quanta Magazine" : "https://api.quantamagazine.org/mathematics/feed" | ||
"Physics, Quanta Magazine" : "https://api.quantamagazine.org/physics/feed" | ||
"Biology, Quanta Magazine" : "https://api.quantamagazine.org/biology/feed" | ||
"Computer Science, Quanta Magazine" : "https://api.quantamagazine.org/computer-science/feed" | ||
} | ||
|
||
# Function to send a message to Zulip | ||
def send_zulip_message(content, topic): | ||
client = zulip.Client(email=ZULIP_EMAIL, client='rss-feed-bot/0.1') | ||
data = { | ||
"type": "stream", | ||
"to": ZULIP_STREAM_NAME, | ||
"topic": topic, | ||
"content": content, | ||
} | ||
client.send_message(data) | ||
|
||
# Get the url of the last article update sent to the stream | ||
def last_article_update_link(topic): | ||
client = zulip.Client(email=ZULIP_EMAIL, client='test-github-client/0.1') | ||
request: Dict[str, Any] = { | ||
"anchor": "newest", | ||
"num_before": 1, | ||
"num_after": 0, | ||
"narrow": [ | ||
{"operator": "sender", "operand": ZULIP_EMAIL}, | ||
{"operator": "stream", "operand": ZULIP_STREAM_NAME}, | ||
{"operator": "topic", "operand": topic}, | ||
], | ||
"apply_markdown": False | ||
} | ||
response = client.get_messages(request) | ||
if response['result'] == "success": | ||
messages = response["messages"] | ||
if messages: | ||
latest_message = messages[0] | ||
latest_message_content = latest_message["content"] | ||
url_pattern = r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)' | ||
latest_arxiv_link = re.findall(url_pattern, latest_message_content)[0] | ||
return latest_arxiv_link | ||
else: | ||
return None | ||
else: | ||
print(f"Failed to retrieve message or No previous messages") | ||
return None | ||
|
||
# Function to fetch latest articles from arXiv | ||
def update_zulip_stream(): | ||
for feed_name, feed_url in RSS_FEEDS.items(): | ||
feed = feedparser.parse(feed_url) | ||
topic = feed.feed.title | ||
last_updated_article_link = last_article_update_link(topic) | ||
flag = True if last_updated_article_link else False # To avoid the case when not having an article ends up not getting to the execution part of the for loop | ||
for article in feed.entries[:4][::-1]: #This is to get the latest 5 articles in ascending order by time | ||
link = article.link | ||
if flag: | ||
if link == last_updated_article_link: | ||
flag = False | ||
continue | ||
title = markdownify.markdownify(article.title.replace('$^{\\ast}$', '* ').replace('$^*$', '* ').replace('$^*$', '* ').replace("$", "$$").replace("\n", " ")) | ||
published = time.strftime("%d %B %Y", article.published_parsed) | ||
author = article.author | ||
summary = markdownify.markdownify(article.description) | ||
tags = ", ".join([entry['term'] for entry in article.tags]) | ||
message = f"\n**[{title}]({link})**\n*{author}, {published}*\n\n{summary}\n\n*{tags}*" | ||
# send_zulip_message(message, topic) | ||
print(message) | ||
|
||
# Main function to check for new articles periodically | ||
if __name__ == "__main__": | ||
update_zulip_stream() |