diff --git a/distribution/whatsnew/whatsnew-en-GB b/distribution/whatsnew/whatsnew-en-GB index c935e06e..63d7ad0d 100644 --- a/distribution/whatsnew/whatsnew-en-GB +++ b/distribution/whatsnew/whatsnew-en-GB @@ -1,3 +1,5 @@ Changelog - Switch to Compose Material's swipe refresh feature +- Change metadata extractor logic to no longer prefer canonical URLs. + The submitted URL will be retained as-is now. diff --git a/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt index 56213d2f..25ac1a40 100644 --- a/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt +++ b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt @@ -7,7 +7,6 @@ package dev.msfjarvis.claw.metadata import com.chimbori.crux.Crux -import com.chimbori.crux.api.Fields.CANONICAL_URL import com.chimbori.crux.api.Fields.DURATION_MS import com.chimbori.crux.api.Fields.FAVICON_URL import dev.msfjarvis.claw.model.LinkMetadata @@ -37,9 +36,8 @@ constructor( val extractedMetadata = crux.extractFrom(parsedUrl, Jsoup.parse(htmlContent, url)) val faviconUrl = extractedMetadata[FAVICON_URL].toString() val readingTime = extractedMetadata[DURATION_MS].toString() - val overriddenUrl = extractedMetadata[CANONICAL_URL]?.toString() ?: url LinkMetadata( - url = overriddenUrl, + url = url, faviconUrl = faviconUrl, readingTime = readingTime, )