diff --git a/.github/workflows/zulip-bot-workflow.yml b/.github/workflows/zulip-bot-workflow.yml new file mode 100644 index 0000000..b6bf305 --- /dev/null +++ b/.github/workflows/zulip-bot-workflow.yml @@ -0,0 +1,31 @@ +name: ArXiv to Zulip Bot + +on: + push: + branches: ["master"] + schedule: + - cron: "27 */1 * * *" + + +env: + ZULIP_API_KEY: ${{ secrets.ZULIP_API_KEY }} + ZULIP_EMAIL: ${{ secrets.ZULIP_EMAIL }} + ZULIP_SITE: ${{ secrets.ZULIP_SITE}} + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: pip install requests zulip feedparser markdowinify + + - name: Run script + run: python main.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..52eefae --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# ArXiv-to-Zulip bot + +## How to Setup +1. Create a generic bot in your Zulip Organization +2. Add the new bot to the stream where you want the RSS feed updates +3. Get the bot's API key from Personal Settings >> Bots +4. Fork this repository +5. Add organization url, bot's email ID and API key to your repository secrets + - For this go to your repository settings >> Secrets and Variables >> Actions >> New repository secret + - Add `ZULIP_SITE` as the `https://yourdomainname.zulipchat.com` (replace `yourdomainname` with your domain name) + - Add `ZULIP_EMAIL` as the email id for the bot + - Add `ZULIP_API_KEY`. This you can get from the bot settings in zulip +6. Change `RSS_FEEDS` to your favorite feeds in `main.py` + +If you have set it up correctly, you'll get the latest 10 articles in the stream you've selected within minutes. +This bot relies on GitHub actions to schedule running `main.py` on select intervals. diff --git a/main.py b/main.py new file mode 100644 index 0000000..d39ffb0 --- /dev/null +++ b/main.py @@ -0,0 +1,89 @@ +import os +import requests +import time +import zulip +import re +import feedparser +import markdownify +from datetime import datetime, timedelta, timezone + +# Define your Zulip credentials +ZULIP_EMAIL = os.environ.get('ZULIP_EMAIL') +ZULIP_STREAM_NAME = 'articles' + +# Define the dictionary of feed names and their RSS links +RSS_FEEDS = { + "Dan Ma's Topology Blog" : "https://dantopology.wordpress.com/feed/" + "The Higher Geometer" : "https://thehighergeometer.wordpress.com/feed/" + "Terence Tao" : "https://terrytao.wordpress.com/feed/" + "Mathematics, Quanta Magazine" : "https://api.quantamagazine.org/mathematics/feed" + "Physics, Quanta Magazine" : "https://api.quantamagazine.org/physics/feed" + "Biology, Quanta Magazine" : "https://api.quantamagazine.org/biology/feed" + "Computer Science, Quanta Magazine" : "https://api.quantamagazine.org/computer-science/feed" +} + +# Function to send a message to Zulip +def send_zulip_message(content, topic): + client = zulip.Client(email=ZULIP_EMAIL, client='rss-feed-bot/0.1') + data = { + "type": "stream", + "to": ZULIP_STREAM_NAME, + "topic": topic, + "content": content, + } + client.send_message(data) + +# Get the url of the last article update sent to the stream +def last_article_update_link(topic): + client = zulip.Client(email=ZULIP_EMAIL, client='test-github-client/0.1') + request: Dict[str, Any] = { + "anchor": "newest", + "num_before": 1, + "num_after": 0, + "narrow": [ + {"operator": "sender", "operand": ZULIP_EMAIL}, + {"operator": "stream", "operand": ZULIP_STREAM_NAME}, + {"operator": "topic", "operand": topic}, + ], + "apply_markdown": False + } + response = client.get_messages(request) + if response['result'] == "success": + messages = response["messages"] + if messages: + latest_message = messages[0] + latest_message_content = latest_message["content"] + url_pattern = r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)' + latest_arxiv_link = re.findall(url_pattern, latest_message_content)[0] + return latest_arxiv_link + else: + return None + else: + print(f"Failed to retrieve message or No previous messages") + return None + +# Function to fetch latest articles from arXiv +def update_zulip_stream(): + for feed_name, feed_url in RSS_FEEDS.items(): + feed = feedparser.parse(feed_url) + topic = feed.feed.title + last_updated_article_link = last_article_update_link(topic) + flag = True if last_updated_article_link else False # To avoid the case when not having an article ends up not getting to the execution part of the for loop + for article in feed.entries[:4][::-1]: #This is to get the latest 5 articles in ascending order by time + link = article.link + if flag: + if link == last_updated_article_link: + flag = False + continue + title = markdownify.markdownify(article.title.replace('$^{\\ast}$', '* ').replace('$^*$', '* ').replace('$^*$', '* ').replace("$", "$$").replace("\n", " ")) + published = time.strftime("%d %B %Y", article.published_parsed) + author = article.author + summary = markdownify.markdownify(article.description) + tags = ", ".join([entry['term'] for entry in article.tags]) + message = f"\n**[{title}]({link})**\n*{author}, {published}*\n\n{summary}\n\n*{tags}*" + # send_zulip_message(message, topic) + print(message) + +# Main function to check for new articles periodically +if __name__ == "__main__": + update_zulip_stream()