From 8eee42ae0f0bd40ec3289c5778fc45ba3a5e8648 Mon Sep 17 00:00:00 2001 From: Harsh Shandilya Date: Wed, 5 Oct 2022 12:23:04 +0530 Subject: [PATCH] feat(metadata-extractor): allow de-AMP plugin to change our target URL --- .../kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt index 7199d406..89e86dcb 100644 --- a/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt +++ b/metadata-extractor/src/main/kotlin/dev/msfjarvis/claw/metadata/MetadataExtractor.kt @@ -1,6 +1,7 @@ package dev.msfjarvis.claw.metadata import com.chimbori.crux.Crux +import com.chimbori.crux.api.Fields.CANONICAL_URL import com.chimbori.crux.api.Fields.DURATION_MS import com.chimbori.crux.api.Fields.FAVICON_URL import dev.msfjarvis.claw.model.LinkMetadata @@ -30,8 +31,9 @@ constructor( val extractedMetadata = crux.extractFrom(parsedUrl, Jsoup.parse(htmlContent, url)) val faviconUrl = extractedMetadata.urls[FAVICON_URL].toString() val readingTime = extractedMetadata[DURATION_MS] + val overriddenUrl = extractedMetadata[CANONICAL_URL] ?: url LinkMetadata( - url = url, + url = overriddenUrl, faviconUrl = faviconUrl, readingTime = readingTime, )