Skip to content

Commit

Permalink
Handle relative feed URLs in HTML
Browse files Browse the repository at this point in the history
  • Loading branch information
jocmp committed Dec 11, 2023
1 parent 123a3b3 commit 622ffda
Show file tree
Hide file tree
Showing 20 changed files with 5,791 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ fun FeedList(
folders.forEach { folder ->
Text(folder.title)
folder.feeds.forEach { feed ->
Text("-- ${feed.name}")
Text("-- ${feed.name} (${feed.feedURL})")
}
}
feeds.forEach { feed ->
Text(feed.name)
Text("${feed.name} (${feed.feedURL})")
}
}
}
Expand Down
1 change: 1 addition & 0 deletions basil/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ dependencies {
implementation("androidx.appcompat:appcompat:1.6.1")
implementation("com.google.android.material:material:1.10.0")
implementation(project(":feedbinclient"))
implementation(project(":feedfinder"))
testImplementation("junit:junit:4.13.2")
testImplementation(kotlin("test"))
androidTestImplementation("androidx.test.ext:junit:1.1.5")
Expand Down
2 changes: 2 additions & 0 deletions basil/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">

<uses-permission android:name="android.permission.INTERNET" />
<application android:usesCleartextTraffic="true" />
</manifest>
12 changes: 10 additions & 2 deletions basil/src/main/java/com/jocmp/basil/Account.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ package com.jocmp.basil
import com.jocmp.basil.extensions.asFeed
import com.jocmp.basil.extensions.asFolder
import com.jocmp.basil.opml.Outline
import com.jocmp.feedfinder.FeedFinder
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import java.net.URI
import java.util.UUID


data class Account(
val id: String,
val path: URI,
Expand Down Expand Up @@ -39,10 +39,18 @@ data class Account(
}

suspend fun addFeed(entry: FeedFormEntry): Feed {
val result = FeedFinder.find(feedURL = entry.url)

if (result is FeedFinder.Result.Failure) {
throw Exception(result.error.toString())
}

val found = (result as FeedFinder.Result.Success).feeds.first()

val feed = Feed(
id = UUID.randomUUID().toString(),
name = entry.name,
feedURL = entry.url
feedURL = found.feedURL.toString()
)

if (entry.folderTitles.isEmpty()) {
Expand Down
25 changes: 8 additions & 17 deletions feedfinder/src/main/java/com/jocmp/feedfinder/DefaultRequest.kt
Original file line number Diff line number Diff line change
@@ -1,30 +1,21 @@
package com.jocmp.feedfinder

import java.net.HttpURLConnection
import java.net.URL
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse


internal class DefaultRequest(
private val client: HttpClient = buildClient()
) : Request {
internal class DefaultRequest: Request {
override suspend fun fetch(url: URL): Response {
val request = HttpRequest.newBuilder(url.toURI())
.GET()
.build()
val parsedURL = URL("https", url.host, url.port, url.file)
val connection = parsedURL.openConnection() as HttpURLConnection
connection.setRequestProperty("User-Agent", USER_AGENT)

val body = client.send(request, HttpResponse.BodyHandlers.ofString()).body()
val body = connection.inputStream.bufferedReader().readText()

return Response(body = body)
return Response(url = parsedURL, body = body)
}

companion object {
fun buildClient(): HttpClient {
return HttpClient
.newBuilder()
.followRedirects(HttpClient.Redirect.ALWAYS)
.build()
}
const val USER_AGENT = "Basil/1.0"
}
}
32 changes: 25 additions & 7 deletions feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package com.jocmp.feedfinder

import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.sources.MetaLinkSource
import com.jocmp.feedfinder.sources.Source
import com.jocmp.feedfinder.sources.XMLSource
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import java.net.MalformedURLException
import java.net.URI
import java.net.URL

// Parser
// - XMLFeed
Expand All @@ -28,16 +31,16 @@ class FeedFinder internal constructor(
// 3. If the response is HTML and th
internal suspend fun find(): Result = withContext(Dispatchers.IO) {
try {
// TODO:
// normalize URL via
// https://github.com/Ranchero-Software/RSCore/blob/a2f711d64af8f1baefdf0092f57a7f0df7f0e5e8/Sources/RSCore/Shared/String+RSCore.swift#L114
val parsedURL = URI(url).toURL()
val parsedURL = URI(url.withProtocol).toURL()
val response = request.fetch(url = parsedURL)
val feeds = mutableListOf<Feed>()

XMLSource(response).find().let {
if (it.isNotEmpty()) {
feeds.addAll(it)
sources(response).forEach { source ->
val currentFeeds = source.find()

if (currentFeeds.isNotEmpty()) {
feeds.addAll(currentFeeds)
return@forEach
}
}

Expand All @@ -47,6 +50,12 @@ class FeedFinder internal constructor(
}
}

private fun sources(response: Response): List<Source> {
return listOf(
MetaLinkSource(response = response, request = request),
)
}

sealed class Result {
class Success(val feeds: List<Feed>) : Result()

Expand All @@ -59,3 +68,12 @@ class FeedFinder internal constructor(
}
}
}

val String.withProtocol: String
get() {
return if (!(startsWith("http") || startsWith("https"))) {
"https://$this"
} else {
this
}
}
5 changes: 3 additions & 2 deletions feedfinder/src/main/java/com/jocmp/feedfinder/Response.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import com.jocmp.feedfinder.parser.FakeFeed
import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.parser.Parser
import com.jocmp.feedfinder.parser.XMLFeed
import java.net.URL

internal class Response(val body: String) {
internal class Response(val url: URL, val body: String) {
suspend fun parse(validate: Boolean = false): Parser.Result {
if (parsed == null) {
parsed = Parser.parse(body, validate = validate)
parsed = Parser.parse(body, url = url, validate = validate)
}

return parsed!!
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
package com.jocmp.feedfinder.parser

import java.net.URL

class FakeFeed: Feed {
override fun isValid(): Boolean {
return false
}

override val feedURL: URL
get() = URL("https://arstechnica.com/feed")
}
4 changes: 4 additions & 0 deletions feedfinder/src/main/java/com/jocmp/feedfinder/parser/Feed.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package com.jocmp.feedfinder.parser

import java.net.URL

interface Feed {
fun isValid(): Boolean

val feedURL: URL
}
11 changes: 6 additions & 5 deletions feedfinder/src/main/java/com/jocmp/feedfinder/parser/Parser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.jocmp.feedfinder.parser
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import java.io.IOException
import java.net.URL

internal object Parser {
class NoFeedFoundError : Throwable()
Expand All @@ -22,14 +23,14 @@ internal object Parser {
// - JSONFeed
// - HTML
@Throws(NoFeedFoundError::class)
suspend fun parse(body: String, validate: Boolean): Result {
val xmlFeed = XMLFeed.from(body)
suspend fun parse(body: String, url: URL, validate: Boolean): Result {
val xmlFeed = XMLFeed.from(url, body)

if (xmlFeed.isValid()) {
return Result.ParsedFeed(xmlFeed)
}

val document = tryHTML(body)
val document = tryHTML(url, body)

if (document != null) {
return Result.HTMLDocument(document)
Expand All @@ -42,9 +43,9 @@ internal object Parser {
return Result.ParsedFeed(xmlFeed)
}

private fun tryHTML(body: String): Document? {
private fun tryHTML(url: URL, body: String): Document? {
return try {
return Jsoup.parse(body)
return Jsoup.parse(body, url.toString())
} catch (e: IOException) {
null
}
Expand Down
10 changes: 7 additions & 3 deletions feedfinder/src/main/java/com/jocmp/feedfinder/parser/XMLFeed.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@ package com.jocmp.feedfinder.parser
import com.prof18.rssparser.RssParser
import com.prof18.rssparser.exception.RssParsingException
import com.prof18.rssparser.model.RssChannel
import java.net.URL

internal class XMLFeed(private val channel: RssChannel?) : Feed {
internal class XMLFeed(
override val feedURL: URL,
private val channel: RssChannel?
) : Feed {
override fun isValid(): Boolean {
return channel != null &&
!channel.link.isNullOrBlank() &&
Expand All @@ -18,14 +22,14 @@ internal class XMLFeed(private val channel: RssChannel?) : Feed {
}

companion object {
suspend fun from(body: String): XMLFeed {
suspend fun from(url: URL, body: String): XMLFeed {
val channel = try {
RssParser().parse(body)
} catch (e: RssParsingException) {
null
}

return XMLFeed(channel)
return XMLFeed(feedURL = url, channel = channel)
}
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.jocmp.feedfinder.sources

import com.jocmp.feedfinder.parser.Feed

internal class BodyLinkSource: Source {
override suspend fun find(): List<Feed> {
return emptyList()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ internal class MetaLinkSource(
return coroutineScope {
document.select("link[rel~=alternate]")
.filter { element -> isValidLink(element) }
.map { async { request.fetch(url = URL(it.attr("href"))) } }
.map { async { request.fetch(url = URL(it.absUrl("href"))) } }
.awaitAll()
.mapNotNull { response ->
when (val result = response.parse()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
package com.jocmp.feedfinder

import kotlinx.coroutines.runBlocking
import org.junit.Rule
import org.junit.Test
import kotlin.test.assertEquals

class FeedFinderTest {
@Test
fun find_returnsASuccess() {
fun find_returnsASuccess() = runBlocking {
val finder = FeedFinder("arstechnica.com")

finder.find()

assertEquals("", "")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ package com.jocmp.feedfinder.parser
import kotlinx.coroutines.runBlocking
import org.junit.Test
import java.io.File
import java.net.URL
import kotlin.test.assertTrue

class XMLFeedTest {
@Test
fun isValid() = runBlocking {
val responseBody = File("src/test/resources/arstechnica_feed.xml").readText()

val feed = XMLFeed.from(responseBody)
val feed = XMLFeed.from(url = URL("https://arstechnica.com"), body = responseBody)

assertTrue(feed.isValid())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,54 @@ import kotlinx.coroutines.runBlocking
import org.junit.Test
import java.io.File
import java.net.URL
import kotlin.test.assertEquals
import kotlin.test.assertTrue

class MetaLinkSourceTest {
@Test
fun `it finds a single link`() = runBlocking {
val feedURL = "http://feeds.arstechnica.com/arstechnica/index"
val response = Response(
url = URL("https://arstechnica.com"),
body = testFile("arstechnica.html").readText()
)

val sites = mapOf(
"http://feeds.arstechnica.com/arstechnica/index" to testResource("arstechnica_feed.xml")
feedURL to testResource("arstechnica_feed.xml")
)

val source = MetaLinkSource(response, TestRequest(sites))
val feed = source.find().first()

assertTrue(feed.isValid())
assertEquals(expected = URL(feedURL), actual = feed.feedURL)
}

@Test
fun `it works with relative URLs`() = runBlocking {
val feedURL = "https://theverge.com/rss/index.xml"

val response = Response(
url = URL("https://theverge.com"),
body = testFile("theverge.html").readText()
)

val sites = mapOf(
feedURL to testResource("theverge_feed.xml")
)

val source = MetaLinkSource(response, TestRequest(sites))
val feed = source.find().first()

assertTrue(feed.isValid())
assertEquals(expected = URL(feedURL), actual = feed.feedURL)
}
}

private class TestRequest(val sites: Map<String, String>) : Request {
override suspend fun fetch(url: URL): Response {
val body = File(sites[url.toString()]!!).readText()
return Response(body = body)

return Response(url = url, body = body)
}
}
Loading

0 comments on commit 622ffda

Please sign in to comment.