feat(metadata-extractor): don't inspect canonical URLs

Changing the URL feels a bit icky when lobste.rs is a
_link_ aggregator at core. Let's trust the users are making
the right choices.
This commit is contained in:
Harsh Shandilya 2022-11-19 18:06:16 +05:30
parent 15d18414df
commit f429faf8b8
No known key found for this signature in database
2 changed files with 3 additions and 3 deletions

View file

@ -1,3 +1,5 @@
Changelog Changelog
- Switch to Compose Material's swipe refresh feature - Switch to Compose Material's swipe refresh feature
- Change metadata extractor logic to no longer prefer canonical URLs.
The submitted URL will be retained as-is now.

View file

@ -7,7 +7,6 @@
package dev.msfjarvis.claw.metadata package dev.msfjarvis.claw.metadata
import com.chimbori.crux.Crux import com.chimbori.crux.Crux
import com.chimbori.crux.api.Fields.CANONICAL_URL
import com.chimbori.crux.api.Fields.DURATION_MS import com.chimbori.crux.api.Fields.DURATION_MS
import com.chimbori.crux.api.Fields.FAVICON_URL import com.chimbori.crux.api.Fields.FAVICON_URL
import dev.msfjarvis.claw.model.LinkMetadata import dev.msfjarvis.claw.model.LinkMetadata
@ -37,9 +36,8 @@ constructor(
val extractedMetadata = crux.extractFrom(parsedUrl, Jsoup.parse(htmlContent, url)) val extractedMetadata = crux.extractFrom(parsedUrl, Jsoup.parse(htmlContent, url))
val faviconUrl = extractedMetadata[FAVICON_URL].toString() val faviconUrl = extractedMetadata[FAVICON_URL].toString()
val readingTime = extractedMetadata[DURATION_MS].toString() val readingTime = extractedMetadata[DURATION_MS].toString()
val overriddenUrl = extractedMetadata[CANONICAL_URL]?.toString() ?: url
LinkMetadata( LinkMetadata(
url = overriddenUrl, url = url,
faviconUrl = faviconUrl, faviconUrl = faviconUrl,
readingTime = readingTime, readingTime = readingTime,
) )