Compare commits
2 Commits
a64762a70d
...
d7c0e8574e
Author | SHA1 | Date |
---|---|---|
Harsh Shandilya | d7c0e8574e | |
Harsh Shandilya | f696f25913 |
|
@ -5,7 +5,6 @@ use crate::{
|
|||
utils::{get_urls_from_message, AsyncError},
|
||||
};
|
||||
use model::AMPResponse;
|
||||
use reqwest::Url;
|
||||
use std::str::FromStr;
|
||||
use teloxide::{prelude::Requester, types::Message, utils::html::link, Bot};
|
||||
use tracing::debug;
|
||||
|
@ -31,7 +30,7 @@ pub async fn handler(bot: Bot, message: Message) -> Result<(), AsyncError> {
|
|||
debug!(?resp, "{url}");
|
||||
let resp = deserialize_amp_response(&resp)?;
|
||||
if let AMPResponse::Success(ok) = resp {
|
||||
text = text.replace(url, &ok[0].canonical.url);
|
||||
text = text.replace(url.as_str(), &ok[0].canonical.url);
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -50,7 +49,7 @@ pub fn is_amp(msg: Message) -> bool {
|
|||
if urls.is_empty() {
|
||||
return false;
|
||||
}
|
||||
urls.iter().flat_map(|url| Url::parse(url)).any(|url| {
|
||||
urls.iter().any(|url| {
|
||||
if let Some(mut segments) = url.path_segments()
|
||||
&& let Some(host) = url.host_str()
|
||||
{
|
||||
|
|
|
@ -8,7 +8,8 @@ use teloxide::{prelude::Requester, types::Message, utils::html::link, Bot};
|
|||
|
||||
const HOST_MATCH_GROUP: &str = "host";
|
||||
|
||||
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
pub const DOMAINS: [&str; 1] = ["instagram.com"];
|
||||
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("https://(?:www.)?(?P<host>instagram.com)/(p|reel|tv)/[A-Za-z0-9]+.*/").unwrap()
|
||||
});
|
||||
|
||||
|
|
64
src/main.rs
64
src/main.rs
|
@ -28,6 +28,7 @@ use teloxide::{
|
|||
update_listeners::Polling,
|
||||
Bot,
|
||||
};
|
||||
use utils::has_matching_urls;
|
||||
|
||||
pub(crate) static FIXER_STATE: Lazy<Mutex<HashMap<ChatId, FixerState>>> =
|
||||
Lazy::new(|| Mutex::new(HashMap::new()));
|
||||
|
@ -50,18 +51,13 @@ async fn run() {
|
|||
)
|
||||
.branch(
|
||||
dptree::filter(|msg: Message| {
|
||||
if let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
let should_match = has_matching_urls(&msg, &twitter::DOMAINS)
|
||||
&& !msg.text().unwrap_or_default().contains(REPLACE_SKIP_TOKEN);
|
||||
if should_match
|
||||
&& let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
&& let Some(chat_id) = msg.chat_id()
|
||||
{
|
||||
let state = map.entry(chat_id).or_insert(FixerState::default());
|
||||
return state.twitter
|
||||
&& msg
|
||||
.text()
|
||||
.map(|text| {
|
||||
twitter::MATCH_REGEX.is_match(text)
|
||||
&& !text.contains(REPLACE_SKIP_TOKEN)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
return map.entry(chat_id).or_insert(FixerState::default()).twitter;
|
||||
}
|
||||
false
|
||||
})
|
||||
|
@ -70,18 +66,16 @@ async fn run() {
|
|||
#[cfg(feature = "ddinstagram")]
|
||||
let handler = handler.branch(
|
||||
dptree::filter(|msg: Message| {
|
||||
if let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
let should_match = has_matching_urls(&msg, &instagram::DOMAINS)
|
||||
&& !msg.text().unwrap_or_default().contains(REPLACE_SKIP_TOKEN);
|
||||
if should_match
|
||||
&& let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
&& let Some(chat_id) = msg.chat_id()
|
||||
{
|
||||
let state = map.entry(chat_id).or_insert(FixerState::default());
|
||||
return state.instagram
|
||||
&& msg
|
||||
.text()
|
||||
.map(|text| {
|
||||
instagram::MATCH_REGEX.is_match(text)
|
||||
&& !text.contains(REPLACE_SKIP_TOKEN)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
return map
|
||||
.entry(chat_id)
|
||||
.or_insert(FixerState::default())
|
||||
.instagram;
|
||||
}
|
||||
false
|
||||
})
|
||||
|
@ -89,18 +83,13 @@ async fn run() {
|
|||
);
|
||||
let handler = handler.branch(
|
||||
dptree::filter(|msg: Message| {
|
||||
if let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
let should_match = has_matching_urls(&msg, &youtube::DOMAINS)
|
||||
&& !msg.text().unwrap_or_default().contains(REPLACE_SKIP_TOKEN);
|
||||
if should_match
|
||||
&& let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
&& let Some(chat_id) = msg.chat_id()
|
||||
{
|
||||
let state = map.entry(chat_id).or_insert(FixerState::default());
|
||||
return state.youtube
|
||||
&& msg
|
||||
.text()
|
||||
.map(|text| {
|
||||
youtube::MATCH_REGEX.is_match(text)
|
||||
&& !text.contains(REPLACE_SKIP_TOKEN)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
return map.entry(chat_id).or_insert(FixerState::default()).youtube;
|
||||
}
|
||||
false
|
||||
})
|
||||
|
@ -108,17 +97,14 @@ async fn run() {
|
|||
);
|
||||
let handler = handler.branch(
|
||||
dptree::filter(|msg: Message| {
|
||||
if let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
let should_match = has_matching_urls(&msg, &medium::DOMAINS);
|
||||
let should_match =
|
||||
should_match && !msg.text().unwrap_or_default().contains(REPLACE_SKIP_TOKEN);
|
||||
if should_match
|
||||
&& let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
&& let Some(chat_id) = msg.chat_id()
|
||||
{
|
||||
let state = map.entry(chat_id).or_insert(FixerState::default());
|
||||
return state.medium
|
||||
&& msg
|
||||
.text()
|
||||
.map(|text| {
|
||||
medium::MATCH_REGEX.is_match(text) && !text.contains(REPLACE_SKIP_TOKEN)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
return map.entry(chat_id).or_insert(FixerState::default()).medium;
|
||||
}
|
||||
false
|
||||
})
|
||||
|
|
|
@ -11,7 +11,8 @@ const HOST_MATCH_GROUP: &str = "host";
|
|||
const PATH_MATCH_GROUP: &str = "path";
|
||||
const USER_MATCH_GROUP: &str = "user";
|
||||
|
||||
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
pub const DOMAINS: [&str; 1] = ["medium.com"];
|
||||
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("https://(?<user>[a-zA-Z0-9]*)?.?(?<host>medium.com)/(?<path>.*)").unwrap()
|
||||
});
|
||||
|
||||
|
|
|
@ -9,7 +9,8 @@ use teloxide::{prelude::Requester, types::Message, utils::html::link, Bot};
|
|||
const HOST_MATCH_GROUP: &str = "host";
|
||||
const ROOT_MATCH_GROUP: &str = "root";
|
||||
|
||||
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
pub const DOMAINS: [&str; 2] = ["twitter.com", "x.com"];
|
||||
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("https://(?P<host>(?:mobile.)?(?P<root>(twitter|x)).com)/.*/status/[0-9]+.*")
|
||||
.unwrap()
|
||||
});
|
||||
|
|
30
src/utils.rs
30
src/utils.rs
|
@ -6,7 +6,7 @@ use tracing::{error, info};
|
|||
|
||||
pub(crate) type AsyncError = Box<dyn Error + Send + Sync + 'static>;
|
||||
|
||||
pub(crate) fn get_urls_from_message(msg: &Message) -> Vec<String> {
|
||||
pub(crate) fn get_urls_from_message(msg: &Message) -> Vec<Url> {
|
||||
if let Some(entities) = msg.entities()
|
||||
&& !entities.is_empty()
|
||||
&& let Some(text) = msg.text()
|
||||
|
@ -24,26 +24,36 @@ pub(crate) fn get_urls_from_message(msg: &Message) -> Vec<String> {
|
|||
let utf16 = text.encode_utf16().collect::<Vec<u16>>();
|
||||
let mut urls = Vec::with_capacity(url_entities.len());
|
||||
for entity in &url_entities {
|
||||
urls.push(String::from_utf16_lossy(
|
||||
if let Ok(url) = Url::parse(&String::from_utf16_lossy(
|
||||
&utf16[entity.offset..entity.offset + entity.length],
|
||||
));
|
||||
)) {
|
||||
urls.push(url);
|
||||
}
|
||||
}
|
||||
info!(message_id = %msg.id.0, ?urls, "get_urls_from_message");
|
||||
let url_str = urls.iter().map(reqwest::Url::as_str).collect::<Vec<&str>>();
|
||||
info!(message_id = %msg.id.0, urls = ?url_str, "get_urls_from_message");
|
||||
return urls;
|
||||
}
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
pub(crate) fn has_matching_urls(msg: &Message, domains: &[&str]) -> bool {
|
||||
get_urls_from_message(msg).iter().any(|url| {
|
||||
if let Some(host) = url.host_str() {
|
||||
return domains.iter().any(|domain| host.ends_with(domain));
|
||||
}
|
||||
false
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn scrub_urls(msg: &Message) -> Option<String> {
|
||||
if let Some(text) = msg.text() {
|
||||
let urls = get_urls_from_message(msg);
|
||||
let mut final_text = text.to_owned();
|
||||
for item in urls {
|
||||
if let Ok(url) = Url::parse(&item)
|
||||
&& let Some(query_str) = url.query()
|
||||
{
|
||||
let scrubbed_url = item.replace(&format!("?{query_str}"), "");
|
||||
final_text = final_text.replace(&item, &scrubbed_url);
|
||||
for url in urls {
|
||||
if let Some(query_str) = url.query() {
|
||||
let scrubbed_url = url.as_str().replace(&format!("?{query_str}"), "");
|
||||
final_text = final_text.replace(url.as_str(), &scrubbed_url);
|
||||
}
|
||||
}
|
||||
info!(?text, ?final_text, "scrub_urls");
|
||||
|
|
|
@ -6,7 +6,8 @@ use once_cell::sync::Lazy;
|
|||
use regex::Regex;
|
||||
use teloxide::{prelude::Requester, types::Message, utils::html::link, Bot};
|
||||
|
||||
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
pub const DOMAINS: [&str; 1] = ["youtube.com"];
|
||||
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("https://(?:www.)?youtube.com/(?P<shorts>shorts/)[A-Za-z0-9-_]{11}.*").unwrap()
|
||||
});
|
||||
|
||||
|
|
Loading…
Reference in New Issue