diff --git a/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt index 7199d406..89e86dcb 100644 --- a/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt +++ b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt @@ -1,6 +1,7 @@ package dev.msfjarvis.claw.metadata import com.chimbori.crux.Crux +import com.chimbori.crux.api.Fields.CANONICAL_URL import com.chimbori.crux.api.Fields.DURATION_MS import com.chimbori.crux.api.Fields.FAVICON_URL import dev.msfjarvis.claw.model.LinkMetadata @@ -30,8 +31,9 @@ constructor( val extractedMetadata = crux.extractFrom(parsedUrl, Jsoup.parse(htmlContent, url)) val faviconUrl = extractedMetadata.urls[FAVICON_URL].toString() val readingTime = extractedMetadata[DURATION_MS] + val overriddenUrl = extractedMetadata[CANONICAL_URL] ?: url LinkMetadata( - url = url, + url = overriddenUrl, faviconUrl = faviconUrl, readingTime = readingTime, )