diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 319caf67..b7db6b20 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -9,6 +9,7 @@ dagger = "2.43.2" hilt = "1.0.0" kotlin = "1.7.10" material_motion = "0.9.1" +okhttp = "3.14.9" retrofit = "2.9.0" richtext = "0.13.0" serialization = "1.4.0-RC" @@ -58,9 +59,12 @@ compose-richtext-markdown = { module = "com.halilibo.compose-richtext:richtext-c compose-richtext-material3 = { module = "com.halilibo.compose-richtext:richtext-ui-material3", version.ref = "richtext" } compose-richtext-ui = { module = "com.halilibo.compose-richtext:richtext-ui", version.ref = "richtext" } copydown = "io.github.furstenheim:copy_down:1.1" +crux = "com.chimbori.crux:crux:3.9.0" dagger-hilt-android = { module = "com.google.dagger:hilt-android", version.ref = "dagger" } dagger-hilt-compiler = { module = "com.google.dagger:hilt-compiler", version.ref = "dagger" } +dagger-hilt-core = { module = "com.google.dagger:hilt-core", version.ref = "dagger" } javapoet = "com.squareup:javapoet:1.13.0" +jsoup = "org.jsoup:jsoup:1.15.2" kotlinx-coroutines-core = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" } kotlinx-datetime = "org.jetbrains.kotlinx:kotlinx-datetime:0.4.0" kotlinx-serialization-core = { module = "org.jetbrains.kotlinx:kotlinx-serialization-core", version.ref = "serialization" } @@ -68,7 +72,8 @@ kotlinx-serialization-json = { module = "org.jetbrains.kotlinx:kotlinx-serializa material_motion-core = { module = "io.github.fornewid:material-motion-compose-core", version.ref = "material_motion" } material_motion-navigation = { module = "io.github.fornewid:material-motion-compose-navigation", version.ref = "material_motion" } napier = "io.github.aakira:napier:2.6.1" -okhttp-loggingInterceptor = "com.squareup.okhttp3:logging-interceptor:3.14.9" +okhttp-core = { module = "com.squareup.okhttp3:okhttp", version.ref = "okhttp" } +okhttp-loggingInterceptor = { module = "com.squareup.okhttp3:logging-interceptor", version.ref = "okhttp" } retrofit-kotlinxSerializationConverter = "com.jakewharton.retrofit:retrofit2-kotlinx-serialization-converter:0.8.0" retrofit-lib = { module = "com.squareup.retrofit2:retrofit", version.ref = "retrofit" } retrofit-mock = { module = "com.squareup.retrofit2:retrofit-mock", version.ref = "retrofit" } diff --git a/metadata-extractor/build.gradle.kts b/metadata-extractor/build.gradle.kts new file mode 100644 index 00000000..70b94850 --- /dev/null +++ b/metadata-extractor/build.gradle.kts @@ -0,0 +1,14 @@ +@file:Suppress("DSL_SCOPE_VIOLATION", "UnstableApiUsage") + +plugins { + kotlin("jvm") + id("dev.msfjarvis.claw.kotlin-library") +} + +dependencies { + implementation(libs.crux) + implementation(libs.dagger.hilt.core) + implementation(libs.jsoup) + implementation(libs.kotlinx.coroutines.core) + implementation(libs.okhttp.core) +} diff --git a/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/LinkMetadata.kt b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/LinkMetadata.kt new file mode 100644 index 00000000..4e775c49 --- /dev/null +++ b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/LinkMetadata.kt @@ -0,0 +1,7 @@ +package dev.msfjarvis.claw.metadata + +data class LinkMetadata( + val url: String, + val faviconUrl: String?, + val readingTime: String?, +) diff --git a/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt new file mode 100644 index 00000000..43d3d7de --- /dev/null +++ b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt @@ -0,0 +1,43 @@ +package dev.msfjarvis.claw.metadata + +import com.chimbori.crux.Crux +import com.chimbori.crux.api.Fields.DURATION_MS +import com.chimbori.crux.api.Fields.FAVICON_URL +import javax.inject.Inject +import okhttp3.HttpUrl.Companion.toHttpUrlOrNull +import okhttp3.OkHttpClient +import okhttp3.Request +import org.jsoup.Jsoup + +class MetadataExtractor +@Inject +constructor( + private val crux: Crux, + private val okHttpClient: OkHttpClient, +) { + + suspend fun getExtractedMetadata(url: String): LinkMetadata { + val parsedUrl = url.toHttpUrlOrNull() ?: return makeDefault(url) + val request = Request.Builder().url(parsedUrl).build() + val htmlContent = + okHttpClient.newCall(request).execute().use { response -> + val body = response.body ?: return makeDefault(url) + body.string() + } + val extractedMetadata = crux.extractFrom(parsedUrl, Jsoup.parse(htmlContent, url)) + val faviconUrl = extractedMetadata.urls[FAVICON_URL].toString() + val readingTime = extractedMetadata[DURATION_MS] + return LinkMetadata( + url = url, + faviconUrl = faviconUrl, + readingTime = readingTime, + ) + } + + private fun makeDefault(url: String) = + LinkMetadata( + url, + null, + null, + ) +} diff --git a/settings.gradle.kts b/settings.gradle.kts index b2c260bd..abcb1afb 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -107,5 +107,6 @@ include( "benchmark", "common", "database", + "metadata-extractor", "model", )