diff --git a/README.md b/README.md index de92630..8063d70 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,15 @@ user! All RSS items will be posted as toots. - `@engadget.com@mastofeeder.com` - `@techcrunch.com@mastofeeder.com` - Or any other website that has an RSS feed! `@YOUR_WEBSITE_HERE@mastofeeder.com` -- Convert slashes to dots: `indiegames.com/blog` -> `@indiegames.com.blog@mastofeeder.com` -- Remove `.rss` and `.xml` extensions: `https://feeds.yle.fi/uutiset/v1/majorHeadlines/YLE_UUTISET.rss` -> `@feeds.yle.fi.uutiset.v1.majorHeadlines.YLE_UUTISET` + +You can follow feeds with slashes in the URL by replacing the slashes with `..`: + +- `indiegames.com/blog` -> `@indiegames.com..blog@mastofeeder.com` + +Some websites might have feed URLs that are case-sensitive or contain characters that can't be expressed in a Fediverse username. To follow these feeds, you can replace the first slash with `._.` and [base32 encode](https://www.rfctools.com/base32-encoder/) the path, ignoring any trailing `=`. For example: + +- `en.wikipedia.org/w/api.php?action=featuredfeed&feed=featured&feedformat=rss` -> `` +`en.wikipedia.org._.O4XWC4DJFZYGQ4B7MFRXI2LPNY6WMZLBOR2XEZLEMZSWKZBGMZSWKZB5MZSWC5DVOJSWIJTGMVSWIZTPOJWWC5B5OJZXG@mastofeeder.com` ## Developing diff --git a/package.json b/package.json index b6624f1..7ea4912 100644 --- a/package.json +++ b/package.json @@ -6,13 +6,16 @@ "dependencies": { "@types/express": "^4.17.21", "@types/jsdom": "^21.1.6", + "@types/morgan": "^1.9.4", "@types/node-fetch": "^2.6.4", "@types/uuid": "^9.0.8", "body-parser": "^1.20.2", "express": "^4.19.1", "io-ts": "^2.2.21", "jsdom": "^24.0.0", + "morgan": "^1.10.0", "node-fetch": "2", + "rfc4648": "^1.5.2", "sql-template-strings": "^2.2.2", "sqlite": "^5.1.1", "sqlite3": "^5.1.7", diff --git a/src/fetch-url-info.ts b/src/fetch-url-info.ts index 62825f3..b8fd3b8 100644 --- a/src/fetch-url-info.ts +++ b/src/fetch-url-info.ts @@ -1,12 +1,12 @@ import * as Option from "fp-ts/lib/Option"; import { JSDOM } from "jsdom"; -import path from "path"; import { openDb } from "./db"; import SQL from "sql-template-strings"; import { parseUsernameToDomainWithPath } from "./parse-domain"; import { Element, xml2js } from "xml-js"; import { findOne, text } from "./xml-utils"; import fetch from "node-fetch"; +import { base32 } from "rfc4648"; type UrlInfo = { rssUrl: string; @@ -49,83 +49,110 @@ const cacheUrlInfo = async (hostname: string) => { export const fetchUrlInfo = cacheUrlInfo; -const _fetchUrlInfo = async ( - username: string -): Promise> => { - const hostname = parseUsernameToDomainWithPath(username); - try { - let res = await fetch(`https://${hostname}/`); - let additionalExtension = ""; // TODO: Refactor, the logic is getting messy - if (!res.ok) { - additionalExtension = ".rss"; - res = await fetch(`https://${hostname}${additionalExtension}`); - } - if (!res.ok) { - additionalExtension = ".xml"; - res = await fetch(`https://${hostname}${additionalExtension}`); - } - if (!res.ok) return Option.none; - - const isRss = ["application/xml", "application/rss+xml", "text/xml"].some( - (type) => res.headers.get("Content-Type")?.startsWith(type) - ); - if (isRss) - return Option.some({ - rssUrl: `https://${hostname}${additionalExtension}`, - name: parseNameFromRss(await res.text(), hostname), - icon: await getIconForDomain(hostname), - }); - - const html = await res.text(); - const rssUrl = - ensureFullUrl(getRssValue(html), hostname) ?? - (await tryWordpressFeed(hostname)); - if (!rssUrl) - return hostname.endsWith("/blog") - ? Option.none - : fetchUrlInfo(hostname + "/blog"); - - return Option.some({ - rssUrl, - icon: ensureFullUrl(getPngIcon(html), hostname), - name: parseNameFromRss( - await fetch(rssUrl).then((res) => res.text()), - hostname - ), - }); - } catch (e) { - console.error(e); - return Option.none; +const _fetchUrlInfo = async (username: string): Promise> => { + console.log(`Fetching feed URL info for username ${username}...`); + for (const url of possibleUrlsFromUsername(username)) { + console.log(`Trying ${url}...`); + try { + const result = await _tryFetchUrlInfo(new URL(url)); + if (Option.isSome(result)) { + console.log(`Feed URL found: ${result.value.rssUrl}.`); + return result; + } + } catch {}; } + + console.log(`No feeds found for username ${username}.`); + return Option.none; }; -const parseNameFromRss = (rss: string, fallback: string): string => { +const possibleUrlsFromUsername = (username: string): string[] => { + const paths = possiblePathsFromUsername(username); + const httpsUrls = paths.map((path) => `https://${path}`); + const httpUrls = paths.map((path) => `http://${path}`); + return httpsUrls.concat(httpUrls); +} + +const possiblePathsFromUsername = (username: string): string[] => { + const inferredPath = parseUsernameToDomainWithPath(username) + return [ + username, + username.replace(/\.\./g, "/"), + base32decode(username), + `${username}.rss`, + `${username}.xml`, + `${username}/feed/`, + `${inferredPath}`, + `${inferredPath}.rss`, + `${inferredPath}.xml`, + `${inferredPath}/feed/`, + ].filter(function(item, pos, self) { + // remove duplicate paths + return self.indexOf(item) === pos; + }) +} + +const base32decode = (username: string): string => { + const [hostname, base32encodedPath] = username.split("._."); + if (!base32encodedPath) { return username } + const uint8array = base32.parse(base32encodedPath, { loose: true }); + const path = new TextDecoder().decode(uint8array); + return `${hostname}/${path}` +} + +const _tryFetchUrlInfo = async (url: URL): Promise> =>{ + let res = await fetch(url); + if (!res.ok) return Option.none; + + const content = await res.text(); + const isFeed = ["application/xml", "application/rss+xml", "text/xml"].some( + (type) => res.headers.get("Content-Type")?.startsWith(type) + ); + + if (isFeed) return Option.some(getUrlInfoFromFeed(url, content)); + return await getUrlInfoFromPage(url, content); +} + +const getUrlInfoFromFeed = (url: URL, content: string): UrlInfo => + ({ + rssUrl: url.toString(), + name: parseNameFromFeed(content) ?? url.toString(), + icon: getIconFromFeed(content), + }); + +const parseNameFromFeed = (rss: string): string | undefined => { const doc = xml2js(rss, { compact: false }) as Element; - return text(findOne("title", doc)) ?? fallback; -}; -const tryWordpressFeed = async ( - hostname: string -): Promise => { - const res = await fetch(`https://${hostname}/feed/`); - return res.ok ? `https://${hostname}/feed/` : undefined; + return text(findOne("title", doc)) ?? undefined; +} + +const getIconFromFeed = (rss: string): string | undefined => { + const doc = xml2js(rss, { compact: false }) as Element; + return text(findOne("icon", doc)) ?? text(findOne("url", findOne("image", doc))); }; -const getRssValue = (html: string): string | undefined => +const getUrlInfoFromPage = async(url: URL, content: string): Promise> => { + const linkedUrl = getFullUrl(getLinkedFeedUrl(content), url); + if (!linkedUrl) return Option.none; + + let res = await fetch(linkedUrl); + if (!res.ok) return Option.none; + + let linkedInfo = getUrlInfoFromFeed(new URL(linkedUrl, url), await res.text()); + let icon = getPngIcon(content); + if (icon) { + linkedInfo.icon = icon; + } + return Option.some(linkedInfo); +} + +const getLinkedFeedUrl = (html: string): string | undefined => new JSDOM(html).window.document .querySelector('link[type="application/rss+xml"]') ?.getAttribute("href") ?? undefined; -const ensureFullUrl = ( - urlOrPath: string | undefined, - hostname: string -): string | undefined => { - if (!urlOrPath) return undefined; - try { - const url = new URL(urlOrPath); - if (url.hostname !== null) return urlOrPath; - } catch {} - - return path.join(`https://${hostname}`, urlOrPath); +const getFullUrl = (url: string | undefined, base: URL | undefined): URL | undefined => { + if (!url || !base) return undefined; + return new URL(url, base); }; const getPngIcon = (html: string): string | undefined => { @@ -139,17 +166,12 @@ const getPngIcon = (html: string): string | undefined => { return icons.find((icon) => icon.endsWith(".png") || icon.endsWith("gif")); // TODO: Local proxy to convert .ico to .png }; -const getIconForDomain = async (url: string): Promise => { - const domain = new URL(`https://${url}`).hostname; - const html = await fetch(`https://${domain}/`).then((res) => res.text()); - return ensureFullUrl(getPngIcon(html), domain); -}; - const getLinkHref = (doc: Document, rel: string): string[] => [...doc.querySelectorAll(`link[rel="${rel}"]`)].flatMap((link) => { const href = link.getAttribute("href"); return href ? [href] : []; }); + const getMetaContent = (doc: Document, property: string): string[] => [...doc.querySelectorAll(`meta[property="${property}"]`)].flatMap((meta) => { const content = meta.getAttribute("content"); diff --git a/src/index.ts b/src/index.ts index de31a8b..f0c11ac 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,6 @@ import express from "express"; import bodyParser from "body-parser"; +import morgan from "morgan"; import { routes } from "./routes"; import { fetchAndSendAllFeeds } from "./fetch-and-send-all-feeds"; import { forever } from "./forever"; @@ -8,7 +9,7 @@ import { PORT } from "./env"; const app = express(); app.use(bodyParser.json({ type: "application/activity+json" })); - +app.use(morgan('tiny')); app.use(routes); app.get("/", (req, res) => { diff --git a/src/webfinger.ts b/src/webfinger.ts index 51023be..cd6c78c 100644 --- a/src/webfinger.ts +++ b/src/webfinger.ts @@ -22,7 +22,7 @@ type WebfingerResponse = { }; export const webfingerRoute: Route< - Response.Ok | Response.BadRequest + Response.Ok | Response.BadRequest | Response.NotFound > = route .use(Parser.query(webfingeQuery)) .get("/.well-known/webfinger") @@ -30,8 +30,8 @@ export const webfingerRoute: Route< const account = req.query.resource.slice("acct:".length); const [username] = account.split("@"); const urlInfo = await fetchUrlInfo(username); + if (Option.isNone(urlInfo)) return Response.notFound(); - console.log(username); return Response.ok({ subject: req.query.resource, aliases: [], diff --git a/src/xml-utils.ts b/src/xml-utils.ts index cdc6dff..26e9c84 100644 --- a/src/xml-utils.ts +++ b/src/xml-utils.ts @@ -7,8 +7,8 @@ export const findAll = (name: string, doc: Element): Element[] => { ) ?? [] ); }; -export const findOne = (name: string, doc: Element): Element | undefined => { - for (const element of doc.elements ?? []) { +export const findOne = (name: string, doc: Element | undefined): Element | undefined => { + for (const element of doc?.elements ?? []) { if (element.name === name) return element; const found = findOne(name, element); if (found) return found; diff --git a/yarn.lock b/yarn.lock index 889c821..d101b27 100644 --- a/yarn.lock +++ b/yarn.lock @@ -121,6 +121,13 @@ resolved "https://registry.yarnpkg.com/@types/mime/-/mime-3.0.1.tgz#5f8f2bca0a5863cb69bc0b0acd88c96cb1d4ae10" integrity sha512-Y4XFY5VJAuw0FgAqPNd6NNoV44jbq9Bz2L7Rh/J6jLTiHBSBJa9fxqQIvkIld4GsoDOcCbvzOUAbLPsSKKg+uA== +"@types/morgan@^1.9.4": + version "1.9.4" + resolved "https://registry.yarnpkg.com/@types/morgan/-/morgan-1.9.4.tgz#99965ad2bdc7c5cee28d8ce95cfa7300b19ea562" + integrity sha512-cXoc4k+6+YAllH3ZHmx4hf7La1dzUk6keTR4bF4b4Sc0mZxU/zK4wO7l+ZzezXm/jkYj/qC+uYGZrarZdIVvyQ== + dependencies: + "@types/node" "*" + "@types/node-fetch@^2.6.4": version "2.6.4" resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.4.tgz#1bc3a26de814f6bf466b25aeb1473fa1afe6a660" @@ -258,6 +265,31 @@ base64-js@^1.3.1: version "1.5.1" resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== + +basic-auth@~2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/basic-auth/-/basic-auth-2.0.1.tgz#b998279bf47ce38344b4f3cf916d4679bbf51e3a" + integrity sha512-NF+epuEdnUYVlGuhaxbbq+dvJttwLnGY+YixlXlME5KpQ5W3CnXA5cVTneY3SPbPDRkcjMbifrwmFYcClgOZeg== + dependencies: + safe-buffer "5.1.2" + +body-parser@1.20.1: + version "1.20.1" + resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.1.tgz#b1812a8912c195cd371a3ee5e66faa2338a5c668" + integrity sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw== + dependencies: + bytes "3.1.2" + content-type "~1.0.4" + debug "2.6.9" + depd "2.0.0" + destroy "1.2.0" + http-errors "2.0.0" + iconv-lite "0.4.24" + on-finished "2.4.1" + qs "6.11.0" + raw-body "2.5.1" + type-is "~1.6.18" + unpipe "1.0.0" bindings@^1.5.0: version "1.5.0" @@ -466,7 +498,7 @@ delegates@^1.0.0: resolved "https://registry.yarnpkg.com/delegates/-/delegates-1.0.0.tgz#84c6e159b81904fdca59a0ef44cd870d31250f9a" integrity sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ== -depd@2.0.0, depd@^2.0.0: +depd@2.0.0, depd@^2.0.0, depd@~2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/depd/-/depd-2.0.0.tgz#b696163cc757560d09cf22cc8fad1571b79e76df" integrity sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw== @@ -1051,6 +1083,17 @@ mkdirp@^1.0.3, mkdirp@^1.0.4: resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e" integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw== +morgan@^1.10.0: + version "1.10.0" + resolved "https://registry.yarnpkg.com/morgan/-/morgan-1.10.0.tgz#091778abc1fc47cd3509824653dae1faab6b17d7" + integrity sha512-AbegBVI4sh6El+1gNwvD5YIck7nSA36weD7xvIxG4in80j/UoK8AEGaWnnz8v1GxonMCltmlNs5ZKbGvl9b1XQ== + dependencies: + basic-auth "~2.0.1" + debug "2.6.9" + depd "~2.0.0" + on-finished "~2.3.0" + on-headers "~1.0.2" + ms@2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8" @@ -1145,6 +1188,18 @@ on-finished@2.4.1: dependencies: ee-first "1.1.1" +on-finished@~2.3.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/on-finished/-/on-finished-2.3.0.tgz#20f1336481b083cd75337992a16971aa2d906947" + integrity sha512-ikqdkGAAyf/X/gPhXGvfgAytDZtDbr+bkNUJ0N9h5MI/dmdgCs3l6hoHrcUv41sRKew3jIwrp4qQDXiK99Utww== + dependencies: + ee-first "1.1.1" + +on-headers@~1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/on-headers/-/on-headers-1.0.2.tgz#772b0ae6aaa525c399e489adfad90c403eb3c28f" + integrity sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA== + once@^1.3.0, once@^1.3.1, once@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" @@ -1299,6 +1354,11 @@ retry@^0.12.0: resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b" integrity sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow== +rfc4648@^1.5.2: + version "1.5.2" + resolved "https://registry.yarnpkg.com/rfc4648/-/rfc4648-1.5.2.tgz#cf5dac417dd83e7f4debf52e3797a723c1373383" + integrity sha512-tLOizhR6YGovrEBLatX1sdcuhoSCXddw3mqNVAcKxGJ+J0hFeJ+SjeWCv5UPA/WU3YzWPPuCVYgXBKZUPGpKtg== + rimraf@^3.0.2: version "3.0.2" resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a" @@ -1311,6 +1371,11 @@ rrweb-cssom@^0.6.0: resolved "https://registry.yarnpkg.com/rrweb-cssom/-/rrweb-cssom-0.6.0.tgz#ed298055b97cbddcdeb278f904857629dec5e0e1" integrity sha512-APM0Gt1KoXBz0iIkkdB/kfvGOwC4UuJFeG/c+yV7wSc7q96cG/kJ0HiYCnzivD9SB53cLV1MlHFNfOuPaadYSw== +safe-buffer@5.1.2: + version "5.1.2" + resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d" + integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g== + safe-buffer@5.2.1, safe-buffer@^5.0.1, safe-buffer@~5.2.0: version "5.2.1" resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"