feat: remove walls functionality

This commit is contained in:
Harsh Shandilya 2022-10-17 23:44:57 +05:30
parent 0790343eca
commit 9534fdf5da
No known key found for this signature in database
7 changed files with 0 additions and 564 deletions

View File

@ -1,5 +1,3 @@
BASE_DIR=/path/to/walls
BASE_URL=https://domain.tld/directory
TELOXIDE_TOKEN=69420:th1s1sth3t0k3n
BOT_NAME=my_fancy_bot
BOT_OWNER_ID=694205555

156
Cargo.lock generated
View File

@ -232,19 +232,6 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f916dfc5d356b0ed9dae65f1db9fc9770aa2851d2662b988ccf4fe3516e86348"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.12"
@ -365,15 +352,6 @@ dependencies = [
"scopeguard",
]
[[package]]
name = "fastrand"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
dependencies = [
"instant",
]
[[package]]
name = "flate2"
version = "1.0.24"
@ -399,16 +377,6 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "futures"
version = "0.3.24"
@ -498,15 +466,6 @@ dependencies = [
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "getrandom"
version = "0.2.7"
@ -682,12 +641,6 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "imagesize"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df19da1e92fbfec043ca97d622955381b1f3ee72a180ec999912df31b1ccd951"
[[package]]
name = "indexmap"
version = "1.9.1"
@ -698,15 +651,6 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "ipnet"
version = "2.5.0"
@ -758,16 +702,6 @@ version = "0.2.134"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb"
[[package]]
name = "lock_api"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.17"
@ -798,15 +732,6 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "mime"
version = "0.3.16"
@ -917,31 +842,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
dependencies = [
"instant",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
dependencies = [
"cfg-if",
"instant",
"libc",
"redox_syscall",
"smallvec",
"winapi",
]
[[package]]
name = "percent-encoding"
version = "2.2.0"
@ -1238,15 +1138,6 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -1263,12 +1154,6 @@ dependencies = [
"untrusted",
]
[[package]]
name = "seahash"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]]
name = "semver"
version = "1.0.14"
@ -1357,22 +1242,6 @@ dependencies = [
"autocfg",
]
[[package]]
name = "sled"
version = "0.34.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
dependencies = [
"crc32fast",
"crossbeam-epoch",
"crossbeam-utils",
"fs2",
"fxhash",
"libc",
"log",
"parking_lot",
]
[[package]]
name = "smallvec"
version = "1.10.0"
@ -1862,40 +1731,24 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "walls-bot-rs"
version = "0.18.0"
dependencies = [
"console-subscriber",
"dotenvy",
"fastrand",
"futures",
"imagesize",
"once_cell",
"regex",
"reqwest",
"seahash",
"serde",
"serde_derive",
"serde_json",
"sled",
"teloxide",
"tokio",
"tracing",
"tracing-journald",
"tracing-subscriber",
"walkdir",
]
[[package]]
@ -2025,15 +1878,6 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"

View File

@ -27,23 +27,18 @@ journald = ["tracing-journald"]
[dependencies]
console-subscriber = { version = "0.1.8", optional = true }
dotenvy = "0.15.5"
fastrand = "1.8.0"
futures = "*"
imagesize = "0.10.1"
once_cell = "1.15.0"
regex = "1.6.0"
reqwest = { version = "*", default-features = false, features = ["json", "rustls-tls"] }
seahash = "4.1.0"
serde = "*"
serde_derive = "*"
serde_json = "*"
sled = "0.34.7"
teloxide = { version = "0.11.0", features = ["auto-send", "cache-me", "ctrlc_handler", "macros", "rustls"], default-features = false }
tokio = { version = "1.21.2", features = ["rt-multi-thread", "macros"] }
tracing = "0.1.37"
tracing-journald = { version = "0.3.0", optional = true }
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }
walkdir = "2.3.2"
[profile.release]
lto = "fat"

View File

@ -9,7 +9,6 @@ use teloxide::{
Bot,
};
pub(crate) type SearchTerm = String;
pub(crate) type FilterState = String;
static BOT_OWNER: Lazy<UserId> = Lazy::new(|| {
let value = env::var("BOT_OWNER_ID").expect("BOT_OWNER_ID must be defined");
@ -27,12 +26,6 @@ static BOT_OWNER: Lazy<UserId> = Lazy::new(|| {
pub(crate) enum Command {
#[command(description = "display this text.")]
Help,
#[command(description = "return a picture matching a given query")]
Pic { search_term: SearchTerm },
#[command(description = "return a random picture")]
Random,
#[command(description = "search picture based on given string")]
Search { search_term: SearchTerm },
#[command(description = "enable or disable Instagram link replacement")]
Ddinstagram { filter_state: FilterState },
#[command(description = "enable or disable Twitter link replacement")]
@ -51,12 +44,6 @@ pub(crate) async fn handler(
bot.send_message(message.chat.id, Command::descriptions().to_string())
.await?;
}
Command::Pic { search_term: _ } | Command::Search { search_term: _ } => {
crate::walls::handler(bot, message, command).await?;
}
Command::Random => {
crate::walls::handler(bot, message, command).await?;
}
Command::Ddinstagram { filter_state } => {
if message.from().map(|from| from.id != *BOT_OWNER).is_some() {
bot.send_chat_action(message.chat.id, ChatAction::Typing)

View File

@ -5,11 +5,9 @@ mod ddinstagram;
mod logging;
mod utils;
mod vxtwitter;
mod walls;
use crate::commands::Command;
use crate::logging::TeloxideLogger;
use crate::walls::{BASE_DIR, FILES};
use dotenvy::dotenv;
use std::sync::{atomic::Ordering, Arc};
use teloxide::{
@ -19,8 +17,6 @@ use teloxide::{
types::{Message, Update},
Bot,
};
use tracing::debug;
use tracing::error;
const REPLACE_SKIP_TOKEN: &str = "#skip";
@ -31,12 +27,6 @@ async fn run() {
};
dotenv().ok();
if FILES.is_empty() {
error!("Failed to index files from {}", *BASE_DIR);
return;
}
debug!("Indexed {} files", FILES.len());
let bot = Bot::from_env();
let handler = Update::filter_message()

View File

@ -1,9 +1,6 @@
use once_cell::sync::Lazy;
use seahash::hash;
use std::{fmt::Write as _, fs::File, io::Read};
use teloxide::types::{Message, MessageEntityKind};
use tracing::trace;
use walkdir::WalkDir;
pub(crate) fn get_urls_from_message(msg: &Message) -> Vec<String> {
if let Some(entities) = msg.entities() && let Some(text) = msg.text() {
@ -23,103 +20,6 @@ pub(crate) fn get_urls_from_message(msg: &Message) -> Vec<String> {
Vec::with_capacity(0)
}
pub(crate) fn get_file_hash(file_path: &str) -> u64 {
let bytes = get_file_bytes(file_path);
hash(&bytes)
}
#[allow(clippy::cast_possible_truncation)]
fn get_file_bytes(file_path: &str) -> Vec<u8> {
let mut f = File::open(file_path).expect("no file found");
let metadata = std::fs::metadata(&file_path).expect("unable to read metadata");
let mut buffer = vec![0; metadata.len() as usize];
f.read_exact(&mut buffer).expect("buffer overflow");
buffer
}
pub(crate) fn escape_markdown_str(msg: &str) -> String {
msg.replace('_', r"\_")
}
pub(crate) fn file_name_to_label(msg: &str) -> String {
escape_markdown_str(msg)
.replace(r"\_", " ")
.replace(".jpg", "")
}
pub(crate) fn get_search_results(items: Vec<String>, search_term: &str) -> Vec<String> {
if search_term.contains('_') {
items
.into_iter()
.filter(|x| x.to_lowercase().starts_with(&search_term.to_lowercase()))
.collect()
} else {
items
.into_iter()
.filter(|x| tokenized_search(x, search_term))
.collect()
}
}
pub(crate) fn index_pictures(directory: &str) -> Vec<String> {
let mut images: Vec<String> = Vec::new();
for entry in WalkDir::new(directory)
.into_iter()
.filter_map(std::result::Result::ok)
.filter(|entry| entry.file_type().is_file())
{
images.push(String::from(
entry
.path()
.strip_prefix(directory)
.unwrap()
.to_str()
.unwrap(),
));
}
images
}
pub(crate) fn join_results_to_string(
search_term: &str,
items: &[String],
base_url: &str,
) -> String {
let mut ret = format!(
"Search results for '{}':\n",
file_name_to_label(search_term)
);
for item in items {
let _ = writeln!(ret, "[{}]({}/{})", file_name_to_label(item), base_url, item);
}
ret
}
pub(crate) fn tokenized_search(name: &str, search_term: &str) -> bool {
let term = search_term.to_lowercase();
let tokens = file_name_to_label(name)
.split(' ')
.map(str::to_lowercase)
.filter(|x| x.parse::<u8>().is_err())
.collect::<Vec<String>>();
if term.contains(' ') {
return tokens.join(" ").contains(&term);
}
for token in tokens {
if token == term {
return true;
}
}
false
}
pub(crate) fn get_random_file(files: &[String]) -> String {
files
.get(fastrand::usize(..files.len()))
.unwrap()
.to_string()
}
pub(crate) fn parse_bool(input: &str) -> Result<Option<bool>, String> {
const TRUE_VALUES: [&str; 4] = ["true", "on", "yes", "enable"];
const FALSE_VALUES: [&str; 4] = ["false", "off", "no", "disable"];
@ -153,58 +53,3 @@ pub(crate) fn parse_bool(input: &str) -> Result<Option<bool>, String> {
}
}
}
#[cfg(test)]
mod tests {
use super::{
escape_markdown_str, file_name_to_label, get_search_results, index_pictures,
tokenized_search,
};
#[test]
fn markdown_escape_test() {
assert_eq!(r"John\_Doe\_1.jpg", escape_markdown_str("John_Doe_1.jpg"));
assert_eq!(
"[Test link](https://example.com)",
escape_markdown_str("[Test link](https://example.com)")
);
}
#[test]
fn file_name_to_label_test() {
assert_eq!(file_name_to_label("John_Doe_1.jpg"), "John Doe 1");
assert!(!file_name_to_label("Jane_Doe.jpg").contains('_'));
}
#[test]
fn search_matches_full_terms_test() {
assert!(tokenized_search("John_Doe_1.jpg", "Doe"));
assert!(tokenized_search("Jane_Doe.jpg", "Jane"));
assert!(!tokenized_search("Jane_Doe_1.jpg", "1"));
}
#[test]
fn search_matches_by_token() {
let items = index_pictures("testdata");
assert!(!items.is_empty());
let results = get_search_results(items, "De");
assert!(!results.contains(&String::from("Demi_Lovato.jpg")));
assert!(results.contains(&String::from("Ana_De_Armas.jpg")));
}
#[test]
fn search_matches_multiple_terms() {
let items = index_pictures("testdata");
assert!(!items.is_empty());
let results = get_search_results(items, "De Armas");
assert!(results.contains(&String::from("Ana_De_Armas.jpg")));
}
#[test]
fn search_matches_lowercase_terms() {
let items = index_pictures("testdata");
assert!(!items.is_empty());
let results = get_search_results(items, "de armas");
assert!(results.contains(&String::from("Ana_De_Armas.jpg")));
}
}

View File

@ -1,223 +0,0 @@
use crate::{
commands::Command,
utils::{
file_name_to_label, get_file_hash, get_random_file, get_search_results, index_pictures,
join_results_to_string,
},
};
use once_cell::sync::Lazy;
use std::{env, error::Error, marker::Send, path::PathBuf};
use teloxide::{
payloads::{
SendDocument, SendDocumentSetters, SendMessageSetters, SendPhoto, SendPhotoSetters,
},
prelude::Requester,
requests::MultipartRequest,
types::{ChatAction, InputFile, Message, ParseMode},
Bot,
};
use tracing::debug;
static BASE_URL: Lazy<String> =
Lazy::new(|| env::var("BASE_URL").expect("BASE_URL must be defined"));
static TREE: Lazy<sled::Db> = Lazy::new(|| sled::open("file_id_cache").unwrap());
pub static BASE_DIR: Lazy<String> =
Lazy::new(|| env::var("BASE_DIR").expect("BASE_DIR must be defined"));
pub static FILES: Lazy<Vec<String>> = Lazy::new(|| index_pictures(&BASE_DIR));
/// Telegram mandates a photo can not be larger than 10 megabytes
const MAX_FILE_SIZE: u64 = 10_485_760;
/// Telegram mandates a photo can not be longer than 10000 pixels across any dimension
const MAX_DIMEN: usize = 10000;
fn search(search_term: &str) -> Vec<String> {
get_search_results((*FILES).clone(), search_term)
}
/// Given a file name, get its path on disk
fn get_file_path(file_name: &str) -> String {
format!("{}/{}", *BASE_DIR, file_name)
}
/// Given a file name, get its URL
fn get_file_url(file_name: &str) -> String {
format!("{}/{}", *BASE_URL, file_name)
}
fn basename(file_name: &str) -> String {
file_name.replace(&format!("{}/", *BASE_DIR), "")
}
/// Performs exhaustive checks on the given file path to verify if it needs to be sent as
/// a document.
fn should_send_as_document(file_path: &str) -> bool {
let file_name = basename(file_path);
if std::fs::metadata(file_path).unwrap().len() > MAX_FILE_SIZE {
debug!("{}: file size is larger than MAX_FILE_SIZE", file_name);
return true;
}
if let Ok(imagesize) = imagesize::size(file_path) {
if imagesize.height + imagesize.width > MAX_DIMEN {
debug!("{}: dimensions are larger than MAX_DIMEN", file_name);
return true;
};
if imagesize.width / imagesize.height > 20 {
debug!("{}: dimension ratio is larger than 20", file_name);
return true;
}
};
false
}
/// Send the given file as a document, with its name and link as caption
fn send_captioned_document(
bot: &Bot,
message: &Message,
file_url: &str,
file_name: &str,
file_path: &str,
) -> MultipartRequest<SendDocument> {
let file = if let Some(file_id) = get_remembered_file(file_path) {
InputFile::file_id(file_id)
} else {
InputFile::file(PathBuf::from(file_path))
};
bot.send_document(message.chat.id, file)
.caption(format!(
"[{}]({})",
&file_name_to_label(file_name),
file_url
))
.parse_mode(ParseMode::MarkdownV2)
.reply_to_message_id(message.id)
}
/// Send the given file as a picture, with its name and link as caption
fn send_captioned_picture(
bot: &Bot,
message: &Message,
file_url: &str,
file_name: &str,
file_path: &str,
) -> MultipartRequest<SendPhoto> {
let file = if let Some(file_id) = get_remembered_file(file_path) {
InputFile::file_id(file_id)
} else {
InputFile::file(PathBuf::from(file_path))
};
bot.send_photo(message.chat.id, file)
.caption(format!(
"[{}]({})",
&file_name_to_label(file_name),
file_url
))
.parse_mode(ParseMode::MarkdownV2)
.reply_to_message_id(message.id)
}
fn remember_file(file_path: &str, file_id: &str) {
let hash = get_file_hash(file_path);
if let Err(error) = TREE.insert(&format!("{}", hash), file_id) {
debug!("Failed to insert {} into db: {}", file_id, error);
};
}
fn get_remembered_file(file_path: &str) -> Option<String> {
let hash = get_file_hash(file_path);
if let Ok(Some(ivec)) = TREE.get(&format!("{}", hash)) {
if let Ok(id) = String::from_utf8(ivec.to_vec()) {
let file_name = basename(file_path);
debug!("Found id for {}: {}", file_name, id);
return Some(id);
}
};
None
}
async fn send_random_image(
bot: &Bot,
message: &Message,
images: Vec<String>,
) -> Result<(), Box<dyn Error + Sync + Send + 'static>> {
let file = get_random_file(&images);
let path = get_file_path(&file);
let link = get_file_url(&file);
if should_send_as_document(&path) {
bot.send_chat_action(message.chat.id, ChatAction::UploadDocument)
.await?;
let msg = send_captioned_document(bot, message, &link, &file, &path).await?;
if let Some(doc) = msg.document() {
let document = doc.clone();
remember_file(&path, &document.file.id);
};
} else {
bot.send_chat_action(message.chat.id, ChatAction::UploadPhoto)
.await?;
let msg = send_captioned_picture(bot, message, &link, &file, &path).await?;
if let Some(photos) = msg.photo() {
let photo = photos[0].clone();
remember_file(&path, &photo.file.id);
};
}
Ok(())
}
pub(crate) async fn handler(
bot: Bot,
message: Message,
command: Command,
) -> Result<(), Box<dyn Error + Send + Sync>> {
match command {
Command::Pic { search_term } => {
if search_term.is_empty() {
bot.send_chat_action(message.chat.id, ChatAction::Typing)
.await?;
bot.send_message(message.chat.id, "No search query passed")
.reply_to_message_id(message.id)
.await?;
} else {
let results = search(&search_term.replace(' ', "_"));
if results.is_empty() {
bot.send_chat_action(message.chat.id, ChatAction::Typing)
.await?;
bot.send_message(
message.chat.id,
format!("No picture found for '{}'", search_term),
)
.reply_to_message_id(message.id)
.await?;
} else {
send_random_image(&bot, &message, results).await?;
}
}
}
Command::Random => {
send_random_image(&bot, &message, (*FILES).clone()).await?;
}
Command::Search { search_term } => {
bot.send_chat_action(message.chat.id, ChatAction::Typing)
.await?;
let res = search(&search_term);
if res.is_empty() {
bot.send_message(
message.chat.id,
format!("No results found for '{}'", search_term),
)
.reply_to_message_id(message.id)
.await?;
} else {
bot.send_message(
message.chat.id,
join_results_to_string(&search_term, &res, &BASE_URL),
)
.parse_mode(ParseMode::MarkdownV2)
.disable_web_page_preview(true)
.reply_to_message_id(message.id)
.await?;
}
}
_ => (),
};
Ok(())
}