refactor: match on domains rather than regex
This commit is contained in:
parent
f696f25913
commit
d7c0e8574e
|
@ -8,7 +8,8 @@ use teloxide::{prelude::Requester, types::Message, utils::html::link, Bot};
|
|||
|
||||
const HOST_MATCH_GROUP: &str = "host";
|
||||
|
||||
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
pub const DOMAINS: [&str; 1] = ["instagram.com"];
|
||||
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("https://(?:www.)?(?P<host>instagram.com)/(p|reel|tv)/[A-Za-z0-9]+.*/").unwrap()
|
||||
});
|
||||
|
||||
|
|
64
src/main.rs
64
src/main.rs
|
@ -28,6 +28,7 @@ use teloxide::{
|
|||
update_listeners::Polling,
|
||||
Bot,
|
||||
};
|
||||
use utils::has_matching_urls;
|
||||
|
||||
pub(crate) static FIXER_STATE: Lazy<Mutex<HashMap<ChatId, FixerState>>> =
|
||||
Lazy::new(|| Mutex::new(HashMap::new()));
|
||||
|
@ -50,18 +51,13 @@ async fn run() {
|
|||
)
|
||||
.branch(
|
||||
dptree::filter(|msg: Message| {
|
||||
if let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
let should_match = has_matching_urls(&msg, &twitter::DOMAINS)
|
||||
&& !msg.text().unwrap_or_default().contains(REPLACE_SKIP_TOKEN);
|
||||
if should_match
|
||||
&& let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
&& let Some(chat_id) = msg.chat_id()
|
||||
{
|
||||
let state = map.entry(chat_id).or_insert(FixerState::default());
|
||||
return state.twitter
|
||||
&& msg
|
||||
.text()
|
||||
.map(|text| {
|
||||
twitter::MATCH_REGEX.is_match(text)
|
||||
&& !text.contains(REPLACE_SKIP_TOKEN)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
return map.entry(chat_id).or_insert(FixerState::default()).twitter;
|
||||
}
|
||||
false
|
||||
})
|
||||
|
@ -70,18 +66,16 @@ async fn run() {
|
|||
#[cfg(feature = "ddinstagram")]
|
||||
let handler = handler.branch(
|
||||
dptree::filter(|msg: Message| {
|
||||
if let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
let should_match = has_matching_urls(&msg, &instagram::DOMAINS)
|
||||
&& !msg.text().unwrap_or_default().contains(REPLACE_SKIP_TOKEN);
|
||||
if should_match
|
||||
&& let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
&& let Some(chat_id) = msg.chat_id()
|
||||
{
|
||||
let state = map.entry(chat_id).or_insert(FixerState::default());
|
||||
return state.instagram
|
||||
&& msg
|
||||
.text()
|
||||
.map(|text| {
|
||||
instagram::MATCH_REGEX.is_match(text)
|
||||
&& !text.contains(REPLACE_SKIP_TOKEN)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
return map
|
||||
.entry(chat_id)
|
||||
.or_insert(FixerState::default())
|
||||
.instagram;
|
||||
}
|
||||
false
|
||||
})
|
||||
|
@ -89,18 +83,13 @@ async fn run() {
|
|||
);
|
||||
let handler = handler.branch(
|
||||
dptree::filter(|msg: Message| {
|
||||
if let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
let should_match = has_matching_urls(&msg, &youtube::DOMAINS)
|
||||
&& !msg.text().unwrap_or_default().contains(REPLACE_SKIP_TOKEN);
|
||||
if should_match
|
||||
&& let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
&& let Some(chat_id) = msg.chat_id()
|
||||
{
|
||||
let state = map.entry(chat_id).or_insert(FixerState::default());
|
||||
return state.youtube
|
||||
&& msg
|
||||
.text()
|
||||
.map(|text| {
|
||||
youtube::MATCH_REGEX.is_match(text)
|
||||
&& !text.contains(REPLACE_SKIP_TOKEN)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
return map.entry(chat_id).or_insert(FixerState::default()).youtube;
|
||||
}
|
||||
false
|
||||
})
|
||||
|
@ -108,17 +97,14 @@ async fn run() {
|
|||
);
|
||||
let handler = handler.branch(
|
||||
dptree::filter(|msg: Message| {
|
||||
if let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
let should_match = has_matching_urls(&msg, &medium::DOMAINS);
|
||||
let should_match =
|
||||
should_match && !msg.text().unwrap_or_default().contains(REPLACE_SKIP_TOKEN);
|
||||
if should_match
|
||||
&& let Ok(ref mut map) = FIXER_STATE.try_lock()
|
||||
&& let Some(chat_id) = msg.chat_id()
|
||||
{
|
||||
let state = map.entry(chat_id).or_insert(FixerState::default());
|
||||
return state.medium
|
||||
&& msg
|
||||
.text()
|
||||
.map(|text| {
|
||||
medium::MATCH_REGEX.is_match(text) && !text.contains(REPLACE_SKIP_TOKEN)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
return map.entry(chat_id).or_insert(FixerState::default()).medium;
|
||||
}
|
||||
false
|
||||
})
|
||||
|
|
|
@ -11,7 +11,8 @@ const HOST_MATCH_GROUP: &str = "host";
|
|||
const PATH_MATCH_GROUP: &str = "path";
|
||||
const USER_MATCH_GROUP: &str = "user";
|
||||
|
||||
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
pub const DOMAINS: [&str; 1] = ["medium.com"];
|
||||
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("https://(?<user>[a-zA-Z0-9]*)?.?(?<host>medium.com)/(?<path>.*)").unwrap()
|
||||
});
|
||||
|
||||
|
|
|
@ -9,7 +9,8 @@ use teloxide::{prelude::Requester, types::Message, utils::html::link, Bot};
|
|||
const HOST_MATCH_GROUP: &str = "host";
|
||||
const ROOT_MATCH_GROUP: &str = "root";
|
||||
|
||||
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
pub const DOMAINS: [&str; 2] = ["twitter.com", "x.com"];
|
||||
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("https://(?P<host>(?:mobile.)?(?P<root>(twitter|x)).com)/.*/status/[0-9]+.*")
|
||||
.unwrap()
|
||||
});
|
||||
|
|
12
src/utils.rs
12
src/utils.rs
|
@ -30,12 +30,22 @@ pub(crate) fn get_urls_from_message(msg: &Message) -> Vec<Url> {
|
|||
urls.push(url);
|
||||
}
|
||||
}
|
||||
info!(message_id = %msg.id.0, ?urls, "get_urls_from_message");
|
||||
let url_str = urls.iter().map(reqwest::Url::as_str).collect::<Vec<&str>>();
|
||||
info!(message_id = %msg.id.0, urls = ?url_str, "get_urls_from_message");
|
||||
return urls;
|
||||
}
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
pub(crate) fn has_matching_urls(msg: &Message, domains: &[&str]) -> bool {
|
||||
get_urls_from_message(msg).iter().any(|url| {
|
||||
if let Some(host) = url.host_str() {
|
||||
return domains.iter().any(|domain| host.ends_with(domain));
|
||||
}
|
||||
false
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn scrub_urls(msg: &Message) -> Option<String> {
|
||||
if let Some(text) = msg.text() {
|
||||
let urls = get_urls_from_message(msg);
|
||||
|
|
|
@ -6,7 +6,8 @@ use once_cell::sync::Lazy;
|
|||
use regex::Regex;
|
||||
use teloxide::{prelude::Requester, types::Message, utils::html::link, Bot};
|
||||
|
||||
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
pub const DOMAINS: [&str; 1] = ["youtube.com"];
|
||||
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("https://(?:www.)?youtube.com/(?P<shorts>shorts/)[A-Za-z0-9-_]{11}.*").unwrap()
|
||||
});
|
||||
|
||||
|
|
Loading…
Reference in New Issue