Skip to content

Commit

Permalink
Parse meta links source
Browse files Browse the repository at this point in the history
  • Loading branch information
jocmp committed Dec 10, 2023
1 parent 10fcada commit 123a3b3
Show file tree
Hide file tree
Showing 18 changed files with 244 additions and 108 deletions.
25 changes: 23 additions & 2 deletions feedfinder/src/main/java/com/jocmp/feedfinder/DefaultRequest.kt
Original file line number Diff line number Diff line change
@@ -1,9 +1,30 @@
package com.jocmp.feedfinder

import java.net.URL
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse

internal class DefaultRequest: Request {

internal class DefaultRequest(
private val client: HttpClient = buildClient()
) : Request {
override suspend fun fetch(url: URL): Response {
TODO("Not yet implemented")
val request = HttpRequest.newBuilder(url.toURI())
.GET()
.build()

val body = client.send(request, HttpResponse.BodyHandlers.ofString()).body()

return Response(body = body)
}

companion object {
fun buildClient(): HttpClient {
return HttpClient
.newBuilder()
.followRedirects(HttpClient.Redirect.ALWAYS)
.build()
}
}
}
23 changes: 11 additions & 12 deletions feedfinder/src/main/java/com/jocmp/feedfinder/FeedFinder.kt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.jocmp.feedfinder

import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.sources.XMLSource
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import java.net.MalformedURLException
Expand All @@ -17,7 +18,7 @@ import java.net.URI
// XML can be parsed directly if XML feed
// HTML takes response body

class FeedFinder(
class FeedFinder internal constructor(
val url: String,
private val request: Request = DefaultRequest()
) {
Expand All @@ -31,25 +32,23 @@ class FeedFinder(
// normalize URL via
// https://github.com/Ranchero-Software/RSCore/blob/a2f711d64af8f1baefdf0092f57a7f0df7f0e5e8/Sources/RSCore/Shared/String+RSCore.swift#L114
val parsedURL = URI(url).toURL()
// val response = request.fetch(url = parsedURL).parse()
val response = request.fetch(url = parsedURL)
val feeds = mutableListOf<Feed>()

XMLSource(response).find().let {
if (it.isNotEmpty()) {
feeds.addAll(it)
}
}

// XMLFeed.parse()
// val rssChannel = RssParser().parse(response.body)
// val feeds = XML(source = BaseSource(response)).find()

// if (feeds.isNotEmpty()) {
// return@withContext Result.Success(feeds.first())
// }
//
Result.Failure(error = FeedError.IO_FAILURE)
Result.Success(feeds = feeds)
} catch (e: MalformedURLException) {
Result.Failure(error = FeedError.IO_FAILURE)
}
}

sealed class Result {
class Success(val feed: Feed) : Result()
class Success(val feeds: List<Feed>) : Result()

class Failure(val error: FeedError) : Result()
}
Expand Down
2 changes: 1 addition & 1 deletion feedfinder/src/main/java/com/jocmp/feedfinder/Request.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ package com.jocmp.feedfinder

import java.net.URL

interface Request {
internal interface Request {
suspend fun fetch(url: URL): Response
}
14 changes: 11 additions & 3 deletions feedfinder/src/main/java/com/jocmp/feedfinder/Response.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,17 @@ package com.jocmp.feedfinder

import com.jocmp.feedfinder.parser.FakeFeed
import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.parser.Parser
import com.jocmp.feedfinder.parser.XMLFeed

class Response(val body: String?) {
suspend fun parse(): Feed {
return FakeFeed()
internal class Response(val body: String) {
suspend fun parse(validate: Boolean = false): Parser.Result {
if (parsed == null) {
parsed = Parser.parse(body, validate = validate)
}

return parsed!!
}

private var parsed: Parser.Result? = null
}
40 changes: 31 additions & 9 deletions feedfinder/src/main/java/com/jocmp/feedfinder/parser/Parser.kt
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package com.jocmp.feedfinder.parser

import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import java.io.IOException

internal object Parser {
class NoFeedFoundError: Throwable()
class NoFeedFoundError : Throwable()

// Parse as XML
// return result if feed is valid
Expand All @@ -17,19 +21,37 @@ internal object Parser {
// - XMLFeed
// - JSONFeed
// - HTML
suspend fun parse(body: String): Feed {
@Throws(NoFeedFoundError::class)
suspend fun parse(body: String, validate: Boolean): Result {
val xmlFeed = XMLFeed.from(body)

if (xmlFeed.isValid()) {
return xmlFeed
return Result.ParsedFeed(xmlFeed)
}

val document = tryHTML(body)

if (document != null) {
return Result.HTMLDocument(document)
}

if (validate) {
throw NoFeedFoundError()
}

throw NoFeedFoundError()
return Result.ParsedFeed(xmlFeed)
}

// sealed class Document {
// class XMLDocument
// class HTMLDocument
// class JSONDocument
// }
private fun tryHTML(body: String): Document? {
return try {
return Jsoup.parse(body)
} catch (e: IOException) {
null
}
}

sealed class Result {
class ParsedFeed(val feed: Feed): Result()
class HTMLDocument(val document: Document): Result()
}
}
17 changes: 13 additions & 4 deletions feedfinder/src/main/java/com/jocmp/feedfinder/parser/XMLFeed.kt
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
package com.jocmp.feedfinder.parser

import com.prof18.rssparser.RssParser
import com.prof18.rssparser.exception.RssParsingException
import com.prof18.rssparser.model.RssChannel

internal class XMLFeed(private val channel: RssChannel) : Feed {
internal class XMLFeed(private val channel: RssChannel?) : Feed {
override fun isValid(): Boolean {
return !channel.link.isNullOrBlank() &&
return channel != null &&
!channel.link.isNullOrBlank() &&
!channel.title.isNullOrBlank() &&
hasEntries()
}

private fun hasEntries(): Boolean {
return channel.items.isNotEmpty()
return channel != null &&
channel.items.isNotEmpty()
}

companion object {
suspend fun from(body: String): XMLFeed {
return XMLFeed(RssParser().parse(body))
val channel = try {
RssParser().parse(body)
} catch (e: RssParsingException) {
null
}

return XMLFeed(channel)
}
}
}
33 changes: 0 additions & 33 deletions feedfinder/src/main/java/com/jocmp/feedfinder/sources/MetaLink.kt

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package com.jocmp.feedfinder.sources

import com.jocmp.feedfinder.DefaultRequest
import com.jocmp.feedfinder.Request
import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.parser.Parser
import org.jsoup.nodes.Element
import java.net.URL
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope

internal class MetaLinkSource(
private val response: Response,
private val request: Request = DefaultRequest()
) : Source {
override suspend fun find(): List<Feed> {
val document = response.findDocument() ?: return emptyList()

return coroutineScope {
document.select("link[rel~=alternate]")
.filter { element -> isValidLink(element) }
.map { async { request.fetch(url = URL(it.attr("href"))) } }
.awaitAll()
.mapNotNull { response ->
when (val result = response.parse()) {
is Parser.Result.ParsedFeed -> result.feed
is Parser.Result.HTMLDocument -> null
}
}
}
}

private fun isValidLink(element: Element): Boolean {
val type = element.attr("type").lowercase()
val href = element.attr("href")

return href.isNotBlank() && linkTypes.contains(type)
}

companion object {
private val linkTypes = setOf(
"application/rss+xml",
"application/atom+xml",
"application/feed+json",
"application/json"
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.jocmp.feedfinder.sources

import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.parser.Parser
import org.jsoup.nodes.Document

internal suspend fun Response.findDocument(): Document? {
val result = parse(validate = false)

if (result is Parser.Result.HTMLDocument) {
return result.document
}

return null
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package com.jocmp.feedfinder.sources

import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.parser.Parser
import org.jsoup.nodes.Document
import java.net.URL

sealed interface Source {
internal sealed interface Source {
suspend fun find(): List<Feed>
}
4 changes: 0 additions & 4 deletions feedfinder/src/main/java/com/jocmp/feedfinder/sources/XML.kt

This file was deleted.

17 changes: 17 additions & 0 deletions feedfinder/src/main/java/com/jocmp/feedfinder/sources/XMLSource.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.jocmp.feedfinder.sources

import com.jocmp.feedfinder.Response
import com.jocmp.feedfinder.parser.Feed
import com.jocmp.feedfinder.parser.Parser.Result.ParsedFeed

internal class XMLSource(private val response: Response): Source {
override suspend fun find(): List<Feed> {
val result = response.parse()

if (result is ParsedFeed && result.feed.isValid()) {
return listOf(result.feed)
}

return emptyList()
}
}
9 changes: 0 additions & 9 deletions feedfinder/src/test/java/com/jocmp/feedfinder/TestRequest.kt

This file was deleted.

11 changes: 11 additions & 0 deletions feedfinder/src/test/java/com/jocmp/feedfinder/helpers.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.jocmp.feedfinder

import java.io.File

fun testResource(resource: String): String {
return "src/test/resources/${resource}"
}

fun testFile(resource: String): File {
return File(testResource(resource))
}
Loading

0 comments on commit 123a3b3

Please sign in to comment.