Skip to content

Commit

Permalink
Support for new username formats
Browse files Browse the repository at this point in the history
  • Loading branch information
rchurchley authored and seefood committed Mar 25, 2024
1 parent d686691 commit bed1af2
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 82 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,15 @@ user! All RSS items will be posted as toots.
- `@[email protected]`
- `@[email protected]`
- Or any other website that has an RSS feed! `@[email protected]`
- Convert slashes to dots: `indiegames.com/blog` -> `@[email protected]`
- Remove `.rss` and `.xml` extensions: `https://feeds.yle.fi/uutiset/v1/majorHeadlines/YLE_UUTISET.rss` -> `@feeds.yle.fi.uutiset.v1.majorHeadlines.YLE_UUTISET`

You can follow feeds with slashes in the URL by replacing the slashes with `..`:

- `indiegames.com/blog` -> `@[email protected]`

Some websites might have feed URLs that are case-sensitive or contain characters that can't be expressed in a Fediverse username. To follow these feeds, you can replace the first slash with `._.` and [base32 encode](https://www.rfctools.com/base32-encoder/) the path, ignoring any trailing `=`. For example:

- `en.wikipedia.org/w/api.php?action=featuredfeed&feed=featured&feedformat=rss` -> ``
`en.wikipedia.org._.O4XWC4DJFZYGQ4B7MFRXI2LPNY6WMZLBOR2XEZLEMZSWKZBGMZSWKZB5MZSWC5DVOJSWIJTGMVSWIZTPOJWWC5B5OJZXG@mastofeeder.com`

## Developing

Expand Down
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
"dependencies": {
"@types/express": "^4.17.21",
"@types/jsdom": "^21.1.6",
"@types/morgan": "^1.9.4",
"@types/node-fetch": "^2.6.4",
"@types/uuid": "^9.0.8",
"body-parser": "^1.20.2",
"express": "^4.19.1",
"io-ts": "^2.2.21",
"jsdom": "^24.0.0",
"morgan": "^1.10.0",
"node-fetch": "2",
"rfc4648": "^1.5.2",
"sql-template-strings": "^2.2.2",
"sqlite": "^5.1.1",
"sqlite3": "^5.1.7",
Expand Down
170 changes: 96 additions & 74 deletions src/fetch-url-info.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import * as Option from "fp-ts/lib/Option";
import { JSDOM } from "jsdom";
import path from "path";
import { openDb } from "./db";
import SQL from "sql-template-strings";
import { parseUsernameToDomainWithPath } from "./parse-domain";
import { Element, xml2js } from "xml-js";
import { findOne, text } from "./xml-utils";
import fetch from "node-fetch";
import { base32 } from "rfc4648";

type UrlInfo = {
rssUrl: string;
Expand Down Expand Up @@ -49,83 +49,110 @@ const cacheUrlInfo = async (hostname: string) => {

export const fetchUrlInfo = cacheUrlInfo;

const _fetchUrlInfo = async (
username: string
): Promise<Option.Option<UrlInfo>> => {
const hostname = parseUsernameToDomainWithPath(username);
try {
let res = await fetch(`https://${hostname}/`);
let additionalExtension = ""; // TODO: Refactor, the logic is getting messy
if (!res.ok) {
additionalExtension = ".rss";
res = await fetch(`https://${hostname}${additionalExtension}`);
}
if (!res.ok) {
additionalExtension = ".xml";
res = await fetch(`https://${hostname}${additionalExtension}`);
}
if (!res.ok) return Option.none;

const isRss = ["application/xml", "application/rss+xml", "text/xml"].some(
(type) => res.headers.get("Content-Type")?.startsWith(type)
);
if (isRss)
return Option.some({
rssUrl: `https://${hostname}${additionalExtension}`,
name: parseNameFromRss(await res.text(), hostname),
icon: await getIconForDomain(hostname),
});

const html = await res.text();
const rssUrl =
ensureFullUrl(getRssValue(html), hostname) ??
(await tryWordpressFeed(hostname));
if (!rssUrl)
return hostname.endsWith("/blog")
? Option.none
: fetchUrlInfo(hostname + "/blog");

return Option.some({
rssUrl,
icon: ensureFullUrl(getPngIcon(html), hostname),
name: parseNameFromRss(
await fetch(rssUrl).then((res) => res.text()),
hostname
),
});
} catch (e) {
console.error(e);
return Option.none;
const _fetchUrlInfo = async (username: string): Promise<Option.Option<UrlInfo>> => {
console.log(`Fetching feed URL info for username ${username}...`);
for (const url of possibleUrlsFromUsername(username)) {
console.log(`Trying ${url}...`);
try {
const result = await _tryFetchUrlInfo(new URL(url));
if (Option.isSome(result)) {
console.log(`Feed URL found: ${result.value.rssUrl}.`);
return result;
}
} catch {};
}

console.log(`No feeds found for username ${username}.`);
return Option.none;
};

const parseNameFromRss = (rss: string, fallback: string): string => {
const possibleUrlsFromUsername = (username: string): string[] => {
const paths = possiblePathsFromUsername(username);
const httpsUrls = paths.map((path) => `https://${path}`);
const httpUrls = paths.map((path) => `http://${path}`);
return httpsUrls.concat(httpUrls);
}

const possiblePathsFromUsername = (username: string): string[] => {
const inferredPath = parseUsernameToDomainWithPath(username)
return [
username,
username.replace(/\.\./g, "/"),
base32decode(username),
`${username}.rss`,
`${username}.xml`,
`${username}/feed/`,
`${inferredPath}`,
`${inferredPath}.rss`,
`${inferredPath}.xml`,
`${inferredPath}/feed/`,
].filter(function(item, pos, self) {
// remove duplicate paths
return self.indexOf(item) === pos;
})
}

const base32decode = (username: string): string => {
const [hostname, base32encodedPath] = username.split("._.");
if (!base32encodedPath) { return username }
const uint8array = base32.parse(base32encodedPath, { loose: true });
const path = new TextDecoder().decode(uint8array);
return `${hostname}/${path}`
}

const _tryFetchUrlInfo = async (url: URL): Promise<Option.Option<UrlInfo>> =>{
let res = await fetch(url);
if (!res.ok) return Option.none;

const content = await res.text();
const isFeed = ["application/xml", "application/rss+xml", "text/xml"].some(
(type) => res.headers.get("Content-Type")?.startsWith(type)
);

if (isFeed) return Option.some(getUrlInfoFromFeed(url, content));
return await getUrlInfoFromPage(url, content);
}

const getUrlInfoFromFeed = (url: URL, content: string): UrlInfo =>
({
rssUrl: url.toString(),
name: parseNameFromFeed(content) ?? url.toString(),
icon: getIconFromFeed(content),
});

const parseNameFromFeed = (rss: string): string | undefined => {
const doc = xml2js(rss, { compact: false }) as Element;
return text(findOne("title", doc)) ?? fallback;
};
const tryWordpressFeed = async (
hostname: string
): Promise<string | undefined> => {
const res = await fetch(`https://${hostname}/feed/`);
return res.ok ? `https://${hostname}/feed/` : undefined;
return text(findOne("title", doc)) ?? undefined;
}

const getIconFromFeed = (rss: string): string | undefined => {
const doc = xml2js(rss, { compact: false }) as Element;
return text(findOne("icon", doc)) ?? text(findOne("url", findOne("image", doc)));
};

const getRssValue = (html: string): string | undefined =>
const getUrlInfoFromPage = async(url: URL, content: string): Promise<Option.Option<UrlInfo>> => {
const linkedUrl = getFullUrl(getLinkedFeedUrl(content), url);
if (!linkedUrl) return Option.none;

let res = await fetch(linkedUrl);
if (!res.ok) return Option.none;

let linkedInfo = getUrlInfoFromFeed(new URL(linkedUrl, url), await res.text());
let icon = getPngIcon(content);
if (icon) {
linkedInfo.icon = icon;
}
return Option.some(linkedInfo);
}

const getLinkedFeedUrl = (html: string): string | undefined =>
new JSDOM(html).window.document
.querySelector('link[type="application/rss+xml"]')
?.getAttribute("href") ?? undefined;

const ensureFullUrl = (
urlOrPath: string | undefined,
hostname: string
): string | undefined => {
if (!urlOrPath) return undefined;
try {
const url = new URL(urlOrPath);
if (url.hostname !== null) return urlOrPath;
} catch {}

return path.join(`https://${hostname}`, urlOrPath);
const getFullUrl = (url: string | undefined, base: URL | undefined): URL | undefined => {
if (!url || !base) return undefined;
return new URL(url, base);
};

const getPngIcon = (html: string): string | undefined => {
Expand All @@ -139,17 +166,12 @@ const getPngIcon = (html: string): string | undefined => {
return icons.find((icon) => icon.endsWith(".png") || icon.endsWith("gif")); // TODO: Local proxy to convert .ico to .png
};

const getIconForDomain = async (url: string): Promise<string | undefined> => {
const domain = new URL(`https://${url}`).hostname;
const html = await fetch(`https://${domain}/`).then((res) => res.text());
return ensureFullUrl(getPngIcon(html), domain);
};

const getLinkHref = (doc: Document, rel: string): string[] =>
[...doc.querySelectorAll(`link[rel="${rel}"]`)].flatMap((link) => {
const href = link.getAttribute("href");
return href ? [href] : [];
});

const getMetaContent = (doc: Document, property: string): string[] =>
[...doc.querySelectorAll(`meta[property="${property}"]`)].flatMap((meta) => {
const content = meta.getAttribute("content");
Expand Down
3 changes: 2 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import express from "express";
import bodyParser from "body-parser";
import morgan from "morgan";
import { routes } from "./routes";
import { fetchAndSendAllFeeds } from "./fetch-and-send-all-feeds";
import { forever } from "./forever";
Expand All @@ -8,7 +9,7 @@ import { PORT } from "./env";
const app = express();

app.use(bodyParser.json({ type: "application/activity+json" }));

app.use(morgan('tiny'));
app.use(routes);

app.get("/", (req, res) => {
Expand Down
4 changes: 2 additions & 2 deletions src/webfinger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ type WebfingerResponse = {
};

export const webfingerRoute: Route<
Response.Ok<WebfingerResponse> | Response.BadRequest<string>
Response.Ok<WebfingerResponse> | Response.BadRequest<string> | Response.NotFound
> = route
.use(Parser.query(webfingeQuery))
.get("/.well-known/webfinger")
.handler(async (req) => {
const account = req.query.resource.slice("acct:".length);
const [username] = account.split("@");
const urlInfo = await fetchUrlInfo(username);
if (Option.isNone(urlInfo)) return Response.notFound();

console.log(username);
return Response.ok({
subject: req.query.resource,
aliases: [],
Expand Down
4 changes: 2 additions & 2 deletions src/xml-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ export const findAll = (name: string, doc: Element): Element[] => {
) ?? []
);
};
export const findOne = (name: string, doc: Element): Element | undefined => {
for (const element of doc.elements ?? []) {
export const findOne = (name: string, doc: Element | undefined): Element | undefined => {
for (const element of doc?.elements ?? []) {
if (element.name === name) return element;
const found = findOne(name, element);
if (found) return found;
Expand Down
Loading

0 comments on commit bed1af2

Please sign in to comment.