mirror of
https://github.com/msfjarvis/compose-lobsters
synced 2025-08-14 12:57:04 +05:30
metadata-extractor: init
This commit is contained in:
parent
3862b35f7b
commit
319838c737
5 changed files with 71 additions and 1 deletions
14
metadata-extractor/build.gradle.kts
Normal file
14
metadata-extractor/build.gradle.kts
Normal file
|
@ -0,0 +1,14 @@
|
|||
@file:Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage")
|
||||
|
||||
plugins {
|
||||
kotlin("jvm")
|
||||
id("dev.msfjarvis.claw.kotlin-library")
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation(libs.crux)
|
||||
implementation(libs.dagger.hilt.core)
|
||||
implementation(libs.jsoup)
|
||||
implementation(libs.kotlinx.coroutines.core)
|
||||
implementation(libs.okhttp.core)
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
package dev.msfjarvis.claw.metadata
|
||||
|
||||
data class LinkMetadata(
|
||||
val url: String,
|
||||
val faviconUrl: String?,
|
||||
val readingTime: String?,
|
||||
)
|
|
@ -0,0 +1,43 @@
|
|||
package dev.msfjarvis.claw.metadata
|
||||
|
||||
import com.chimbori.crux.Crux
|
||||
import com.chimbori.crux.api.Fields.DURATION_MS
|
||||
import com.chimbori.crux.api.Fields.FAVICON_URL
|
||||
import javax.inject.Inject
|
||||
import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
|
||||
import okhttp3.OkHttpClient
|
||||
import okhttp3.Request
|
||||
import org.jsoup.Jsoup
|
||||
|
||||
class MetadataExtractor
|
||||
@Inject
|
||||
constructor(
|
||||
private val crux: Crux,
|
||||
private val okHttpClient: OkHttpClient,
|
||||
) {
|
||||
|
||||
suspend fun getExtractedMetadata(url: String): LinkMetadata {
|
||||
val parsedUrl = url.toHttpUrlOrNull() ?: return makeDefault(url)
|
||||
val request = Request.Builder().url(parsedUrl).build()
|
||||
val htmlContent =
|
||||
okHttpClient.newCall(request).execute().use { response ->
|
||||
val body = response.body ?: return makeDefault(url)
|
||||
body.string()
|
||||
}
|
||||
val extractedMetadata = crux.extractFrom(parsedUrl, Jsoup.parse(htmlContent, url))
|
||||
val faviconUrl = extractedMetadata.urls[FAVICON_URL].toString()
|
||||
val readingTime = extractedMetadata[DURATION_MS]
|
||||
return LinkMetadata(
|
||||
url = url,
|
||||
faviconUrl = faviconUrl,
|
||||
readingTime = readingTime,
|
||||
)
|
||||
}
|
||||
|
||||
private fun makeDefault(url: String) =
|
||||
LinkMetadata(
|
||||
url,
|
||||
null,
|
||||
null,
|
||||
)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue