refactor: use a domain-based matcher

This commit is contained in:
Harsh Shandilya 2023-01-26 17:40:52 +00:00
parent 1b1987db53
commit f385510208
7 changed files with 37 additions and 25 deletions

1
Cargo.lock generated
View File

@ -698,6 +698,7 @@ dependencies = [
"tracing",
"tracing-journald",
"tracing-subscriber",
"url",
]
[[package]]

View File

@ -33,6 +33,7 @@ tokio = { version = "1.24.2", features = ["rt-multi-thread", "macros"] }
tracing = "0.1.37"
tracing-journald = { version = "0.3.0", optional = true }
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }
url = "*"
[profile.release]
lto = "fat"

View File

@ -15,7 +15,7 @@ use teloxide::{
const HOST_MATCH_GROUP: &str = "host";
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new("^https://(?:www.)?(?P<host>instagram.com)/(p|reel|tv)/[A-Za-z0-9]+.*/").unwrap()
});

View File

@ -22,8 +22,7 @@ use teloxide::{
types::{Message, Update},
Bot,
};
const REPLACE_SKIP_TOKEN: &str = "#skip";
use url::Host;
async fn run() {
if let Err(e) = logging::init() {
@ -42,39 +41,46 @@ async fn run() {
)
.branch(
dptree::filter(|msg: Message| {
twitter::FILTER_ENABLED.load(Ordering::Relaxed)
&& msg
.text()
.map(|text| {
twitter::MATCH_REGEX.is_match(text)
&& !text.contains(REPLACE_SKIP_TOKEN)
let urls = utils::get_urls_from_message(&msg);
let urls = utils::get_typed_urls(urls);
let has_twitter_url = urls.iter().any(|url| {
url.host()
.map(|f| {
f == Host::Domain("twitter.com")
|| f == Host::Domain("mobile.twitter.com")
})
.unwrap_or_default()
.is_some()
});
twitter::FILTER_ENABLED.load(Ordering::Relaxed) && has_twitter_url
})
.endpoint(twitter::handler),
);
#[cfg(feature = "ddinstagram")]
let handler = handler.branch(
dptree::filter(|msg: Message| {
instagram::FILTER_ENABLED.load(Ordering::Relaxed)
&& msg
.text()
.map(|text| {
instagram::MATCH_REGEX.is_match(text) && !text.contains(REPLACE_SKIP_TOKEN)
})
.unwrap_or_default()
let urls = utils::get_urls_from_message(&msg);
let urls = utils::get_typed_urls(urls);
let has_instagram_url = urls.iter().any(|url| {
url.host()
.map(|f| f == Host::Domain("instagram.com"))
.is_some()
});
instagram::FILTER_ENABLED.load(Ordering::Relaxed) && has_instagram_url
})
.endpoint(instagram::handler),
);
let handler = handler.branch(
dptree::filter(|msg: Message| {
youtube::FILTER_ENABLED.load(Ordering::Relaxed)
&& msg
.text()
.map(|text| {
youtube::MATCH_REGEX.is_match(text) && !text.contains(REPLACE_SKIP_TOKEN)
let urls = utils::get_urls_from_message(&msg);
let urls = utils::get_typed_urls(urls);
let has_youtube_url = urls.iter().any(|url| {
url.host()
.map(|f| {
f == Host::Domain("youtube.com") || f == Host::Domain("www.youtube.com")
})
.unwrap_or_default()
.is_some()
});
youtube::FILTER_ENABLED.load(Ordering::Relaxed) && has_youtube_url
})
.endpoint(youtube::handler),
);

View File

@ -15,7 +15,7 @@ use teloxide::{
const HOST_MATCH_GROUP: &str = "host";
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new("^https://(?P<host>(?:mobile.)?twitter.com)/.*/status/[0-9]+.*").unwrap()
});

View File

@ -33,6 +33,10 @@ pub(crate) fn get_urls_from_message(msg: &Message) -> Vec<String> {
Vec::new()
}
pub(crate) fn get_typed_urls(urls: Vec<String>) -> Vec<Url> {
urls.iter().flat_map(|url| Url::parse(url)).collect()
}
pub(crate) fn scrub_urls(msg: &Message) -> Option<String> {
if let Some(text) = msg.text() {
let urls = get_urls_from_message(msg);

View File

@ -13,7 +13,7 @@ use teloxide::{
Bot,
};
pub static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
static MATCH_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new("^https://(?:www.)?youtube.com/(?P<shorts>shorts/)[A-Za-z0-9-]{11}.*").unwrap()
});