diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/DefaultRequest.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/DefaultRequest.kt index d25aff11..e1008bfd 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/DefaultRequest.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/DefaultRequest.kt @@ -1,9 +1,30 @@ package com.jocmp.feedfinder import java.net.URL +import java.net.http.HttpClient +import java.net.http.HttpRequest +import java.net.http.HttpResponse -internal class DefaultRequest: Request { + +internal class DefaultRequest( + private val client: HttpClient = buildClient() +) : Request { override suspend fun fetch(url: URL): Response { - TODO("Not yet implemented") + val request = HttpRequest.newBuilder(url.toURI()) + .GET() + .build() + + val body = client.send(request, HttpResponse.BodyHandlers.ofString()).body() + + return Response(body = body) + } + + companion object { + fun buildClient(): HttpClient { + return HttpClient + .newBuilder() + .followRedirects(HttpClient.Redirect.ALWAYS) + .build() + } } } diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt index 050e00e2..c0498299 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt @@ -1,6 +1,7 @@ package com.jocmp.feedfinder import com.jocmp.feedfinder.parser.Feed +import com.jocmp.feedfinder.sources.XMLSource import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.withContext import java.net.MalformedURLException @@ -17,7 +18,7 @@ import java.net.URI // XML can be parsed directly if XML feed // HTML takes response body -class FeedFinder( +class FeedFinder internal constructor( val url: String, private val request: Request = DefaultRequest() ) { @@ -31,25 +32,23 @@ class FeedFinder( // normalize URL via // https://github.com/Ranchero-Software/RSCore/blob/a2f711d64af8f1baefdf0092f57a7f0df7f0e5e8/Sources/RSCore/Shared/String+RSCore.swift#L114 val parsedURL = URI(url).toURL() -// val response = request.fetch(url = parsedURL).parse() + val response = request.fetch(url = parsedURL) + val feeds = mutableListOf() + XMLSource(response).find().let { + if (it.isNotEmpty()) { + feeds.addAll(it) + } + } - // XMLFeed.parse() -// val rssChannel = RssParser().parse(response.body) -// val feeds = XML(source = BaseSource(response)).find() - -// if (feeds.isNotEmpty()) { -// return@withContext Result.Success(feeds.first()) -// } -// - Result.Failure(error = FeedError.IO_FAILURE) + Result.Success(feeds = feeds) } catch (e: MalformedURLException) { Result.Failure(error = FeedError.IO_FAILURE) } } sealed class Result { - class Success(val feed: Feed) : Result() + class Success(val feeds: List) : Result() class Failure(val error: FeedError) : Result() } diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/Request.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/Request.kt index 638d563c..1f6c44a9 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/Request.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/Request.kt @@ -2,6 +2,6 @@ package com.jocmp.feedfinder import java.net.URL -interface Request { +internal interface Request { suspend fun fetch(url: URL): Response } diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/Response.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/Response.kt index 870a6bee..fe8e039c 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/Response.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/Response.kt @@ -2,9 +2,17 @@ package com.jocmp.feedfinder import com.jocmp.feedfinder.parser.FakeFeed import com.jocmp.feedfinder.parser.Feed +import com.jocmp.feedfinder.parser.Parser +import com.jocmp.feedfinder.parser.XMLFeed -class Response(val body: String?) { - suspend fun parse(): Feed { - return FakeFeed() +internal class Response(val body: String) { + suspend fun parse(validate: Boolean = false): Parser.Result { + if (parsed == null) { + parsed = Parser.parse(body, validate = validate) + } + + return parsed!! } + + private var parsed: Parser.Result? = null } diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/parser/Parser.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/parser/Parser.kt index 00bd82d4..cd7234d4 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/parser/Parser.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/parser/Parser.kt @@ -1,7 +1,11 @@ package com.jocmp.feedfinder.parser +import org.jsoup.Jsoup +import org.jsoup.nodes.Document +import java.io.IOException + internal object Parser { - class NoFeedFoundError: Throwable() + class NoFeedFoundError : Throwable() // Parse as XML // return result if feed is valid @@ -17,19 +21,37 @@ internal object Parser { // - XMLFeed // - JSONFeed // - HTML - suspend fun parse(body: String): Feed { + @Throws(NoFeedFoundError::class) + suspend fun parse(body: String, validate: Boolean): Result { val xmlFeed = XMLFeed.from(body) if (xmlFeed.isValid()) { - return xmlFeed + return Result.ParsedFeed(xmlFeed) + } + + val document = tryHTML(body) + + if (document != null) { + return Result.HTMLDocument(document) + } + + if (validate) { + throw NoFeedFoundError() } - throw NoFeedFoundError() + return Result.ParsedFeed(xmlFeed) } -// sealed class Document { -// class XMLDocument -// class HTMLDocument -// class JSONDocument -// } + private fun tryHTML(body: String): Document? { + return try { + return Jsoup.parse(body) + } catch (e: IOException) { + null + } + } + + sealed class Result { + class ParsedFeed(val feed: Feed): Result() + class HTMLDocument(val document: Document): Result() + } } diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/parser/XMLFeed.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/parser/XMLFeed.kt index e655778f..bbfcbe36 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/parser/XMLFeed.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/parser/XMLFeed.kt @@ -1,22 +1,31 @@ package com.jocmp.feedfinder.parser import com.prof18.rssparser.RssParser +import com.prof18.rssparser.exception.RssParsingException import com.prof18.rssparser.model.RssChannel -internal class XMLFeed(private val channel: RssChannel) : Feed { +internal class XMLFeed(private val channel: RssChannel?) : Feed { override fun isValid(): Boolean { - return !channel.link.isNullOrBlank() && + return channel != null && + !channel.link.isNullOrBlank() && !channel.title.isNullOrBlank() && hasEntries() } private fun hasEntries(): Boolean { - return channel.items.isNotEmpty() + return channel != null && + channel.items.isNotEmpty() } companion object { suspend fun from(body: String): XMLFeed { - return XMLFeed(RssParser().parse(body)) + val channel = try { + RssParser().parse(body) + } catch (e: RssParsingException) { + null + } + + return XMLFeed(channel) } } } diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLink.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLink.kt deleted file mode 100644 index ab6c7f6d..00000000 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLink.kt +++ /dev/null @@ -1,33 +0,0 @@ -package com.jocmp.feedfinder.sources - -import com.jocmp.feedfinder.parser.Feed -import org.jsoup.nodes.Element - -internal class MetaLink(source: Source) : Source by source { -// override fun find(): List { -// if (document == null) { -// return emptyList() -// } -// return emptyList() -// -// return document.select("link[rel~=alternate]") -// .filter { element -> isValidLink(element) } -// .map { XMLFeed(url = URL(it.attr("href"))) } -// } - - private fun isValidLink(element: Element): Boolean { - val type = element.attr("type").lowercase() - val href = element.attr("href") - - return href.isNotBlank() && linkTypes.contains(type) - } - - companion object { - private val linkTypes = setOf( - "application/rss+xml", - "application/atom+xml", - "application/feed+json", - "application/json" - ) - } -} diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinkSource.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinkSource.kt new file mode 100644 index 00000000..9528c38b --- /dev/null +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLinkSource.kt @@ -0,0 +1,50 @@ +package com.jocmp.feedfinder.sources + +import com.jocmp.feedfinder.DefaultRequest +import com.jocmp.feedfinder.Request +import com.jocmp.feedfinder.Response +import com.jocmp.feedfinder.parser.Feed +import com.jocmp.feedfinder.parser.Parser +import org.jsoup.nodes.Element +import java.net.URL +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.coroutineScope + +internal class MetaLinkSource( + private val response: Response, + private val request: Request = DefaultRequest() +) : Source { + override suspend fun find(): List { + val document = response.findDocument() ?: return emptyList() + + return coroutineScope { + document.select("link[rel~=alternate]") + .filter { element -> isValidLink(element) } + .map { async { request.fetch(url = URL(it.attr("href"))) } } + .awaitAll() + .mapNotNull { response -> + when (val result = response.parse()) { + is Parser.Result.ParsedFeed -> result.feed + is Parser.Result.HTMLDocument -> null + } + } + } + } + + private fun isValidLink(element: Element): Boolean { + val type = element.attr("type").lowercase() + val href = element.attr("href") + + return href.isNotBlank() && linkTypes.contains(type) + } + + companion object { + private val linkTypes = setOf( + "application/rss+xml", + "application/atom+xml", + "application/feed+json", + "application/json" + ) + } +} diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/ResponseDocumentExt.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/ResponseDocumentExt.kt new file mode 100644 index 00000000..d2541cf3 --- /dev/null +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/ResponseDocumentExt.kt @@ -0,0 +1,15 @@ +package com.jocmp.feedfinder.sources + +import com.jocmp.feedfinder.Response +import com.jocmp.feedfinder.parser.Parser +import org.jsoup.nodes.Document + +internal suspend fun Response.findDocument(): Document? { + val result = parse(validate = false) + + if (result is Parser.Result.HTMLDocument) { + return result.document + } + + return null +} diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/ResponseSource.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/ResponseSource.kt deleted file mode 100644 index a5e86beb..00000000 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/ResponseSource.kt +++ /dev/null @@ -1,7 +0,0 @@ -package com.jocmp.feedfinder.sources - -//import com.jocmp.feedfinder.Response -// -//internal class ResponseSource(response: Response): Source { -// fun createFromRequest() -//} diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/Source.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/Source.kt index e8e59905..269620d0 100644 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/Source.kt +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/Source.kt @@ -1,7 +1,11 @@ package com.jocmp.feedfinder.sources +import com.jocmp.feedfinder.Response import com.jocmp.feedfinder.parser.Feed +import com.jocmp.feedfinder.parser.Parser +import org.jsoup.nodes.Document +import java.net.URL -sealed interface Source { +internal sealed interface Source { suspend fun find(): List } diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XML.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XML.kt deleted file mode 100644 index 44c92444..00000000 --- a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XML.kt +++ /dev/null @@ -1,4 +0,0 @@ -package com.jocmp.feedfinder.sources - -//internal class XML(source: BaseSource): Source by source { -//} diff --git a/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XMLSource.kt b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XMLSource.kt new file mode 100644 index 00000000..107c16f3 --- /dev/null +++ b/feedfinder/src/main/java/com/jocmp/feedfinder/sources/XMLSource.kt @@ -0,0 +1,17 @@ +package com.jocmp.feedfinder.sources + +import com.jocmp.feedfinder.Response +import com.jocmp.feedfinder.parser.Feed +import com.jocmp.feedfinder.parser.Parser.Result.ParsedFeed + +internal class XMLSource(private val response: Response): Source { + override suspend fun find(): List { + val result = response.parse() + + if (result is ParsedFeed && result.feed.isValid()) { + return listOf(result.feed) + } + + return emptyList() + } +} diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt deleted file mode 100644 index 0b266ae0..00000000 --- a/feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt +++ /dev/null @@ -1,9 +0,0 @@ -package com.jocmp.feedfinder - -import java.net.URL - -class TestRequest(private val response: Response): Request { - override suspend fun fetch(url: URL): Response { - return response - } -} diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/helpers.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/helpers.kt new file mode 100644 index 00000000..906064b5 --- /dev/null +++ b/feedfinder/src/test/java/com/jocmp/feedfinder/helpers.kt @@ -0,0 +1,11 @@ +package com.jocmp.feedfinder + +import java.io.File + +fun testResource(resource: String): String { + return "src/test/resources/${resource}" +} + +fun testFile(resource: String): File { + return File(testResource(resource)) +} diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkSourceTest.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkSourceTest.kt new file mode 100644 index 00000000..cbcac048 --- /dev/null +++ b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkSourceTest.kt @@ -0,0 +1,36 @@ +package com.jocmp.feedfinder.sources + +import com.jocmp.feedfinder.Request +import com.jocmp.feedfinder.Response +import com.jocmp.feedfinder.testFile +import com.jocmp.feedfinder.testResource +import kotlinx.coroutines.runBlocking +import org.junit.Test +import java.io.File +import java.net.URL +import kotlin.test.assertTrue + +class MetaLinkSourceTest { + @Test + fun `it finds a single link`() = runBlocking { + val response = Response( + body = testFile("arstechnica.html").readText() + ) + + val sites = mapOf( + "http://feeds.arstechnica.com/arstechnica/index" to testResource("arstechnica_feed.xml") + ) + + val source = MetaLinkSource(response, TestRequest(sites)) + val feed = source.find().first() + + assertTrue(feed.isValid()) + } +} + +private class TestRequest(val sites: Map) : Request { + override suspend fun fetch(url: URL): Response { + val body = File(sites[url.toString()]!!).readText() + return Response(body = body) + } +} diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkTest.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkTest.kt deleted file mode 100644 index 984ec37d..00000000 --- a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/MetaLinkTest.kt +++ /dev/null @@ -1,23 +0,0 @@ -package com.jocmp.feedfinder.sources - -import org.junit.Test -import java.net.URL -import kotlin.test.assertEquals -import kotlin.test.assertTrue - -class MetaLinkTest { - @Test - fun find() { -// val source = MetaLink(source = TestSource("arstechnica.html")) -// val feed = source.find().first() -// -// assertEquals(expected = URL("http://feeds.arstechnica.com/arstechnica/index"), actual = feed.url) - } - - @Test - fun `find is empty if document is missing`() { -// val source = MetaLink(source = EmptySource()) -// -// assertTrue(source.find().isEmpty()) - } -} diff --git a/feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLSourceTest.kt b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLSourceTest.kt new file mode 100644 index 00000000..d45e8a37 --- /dev/null +++ b/feedfinder/src/test/java/com/jocmp/feedfinder/sources/XMLSourceTest.kt @@ -0,0 +1,20 @@ +package com.jocmp.feedfinder.sources + +import com.jocmp.feedfinder.Response +import kotlinx.coroutines.runBlocking +import org.junit.Test +import java.io.File +import kotlin.math.exp +import kotlin.test.assertEquals +import kotlin.test.assertFalse + +class XMLSourceTest { + @Test + fun `it parses from an XML source`() = runBlocking { + val body = File("src/test/resources/arstechnica_feed.xml").readText() + + val feeds = XMLSource(Response(body)).find() + + assertEquals(expected = 1, actual = feeds.size) + } +}