Misc Updates and favicon fixes (#5993)

- Updated crates
- Switched to rustls instead of native-tls
  Some dependency were already using rustls by default or without option.
  By removing native-tls we also have just one way of working here.

Updated favicon fetching which now is able to fetch more icons.
- Use rustls instead of native-tls
  This seems to work better, probably because of tls sniffing
- Use different user-agent and added several other headers
- Added SVG support. SVG Images will be sanitized first before stored or presented.
  Also, a special CSP for images will be sent to prevent scripts etc.. from SVG images.

Signed-off-by: BlackDex <black.dex@gmail.com>
This commit is contained in:
Mathijs van Veluw
2025-06-27 21:20:36 +02:00
committed by GitHub
parent ad75ce281e
commit f125d5f1a1
9 changed files with 260 additions and 196 deletions

View File

@@ -14,6 +14,7 @@ use reqwest::{
Client, Response,
};
use rocket::{http::ContentType, response::Redirect, Route};
use svg_hush::{data_url_filter, Filter};
use html5gum::{Emitter, HtmlString, Readable, StringReader, Tokenizer};
@@ -35,11 +36,29 @@ pub fn routes() -> Vec<Route> {
static CLIENT: Lazy<Client> = Lazy::new(|| {
// Generate the default headers
let mut default_headers = HeaderMap::new();
default_headers.insert(header::USER_AGENT, HeaderValue::from_static("Links (2.22; Linux X86_64; GNU C; text)"));
default_headers.insert(header::ACCEPT, HeaderValue::from_static("text/html, text/*;q=0.5, image/*, */*;q=0.1"));
default_headers.insert(header::ACCEPT_LANGUAGE, HeaderValue::from_static("en,*;q=0.1"));
default_headers.insert(
header::USER_AGENT,
HeaderValue::from_static(
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
),
);
default_headers.insert(header::ACCEPT, HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"));
default_headers.insert(header::ACCEPT_LANGUAGE, HeaderValue::from_static("en-US,en;q=0.9"));
default_headers.insert(header::CACHE_CONTROL, HeaderValue::from_static("no-cache"));
default_headers.insert(header::PRAGMA, HeaderValue::from_static("no-cache"));
default_headers.insert(header::UPGRADE_INSECURE_REQUESTS, HeaderValue::from_static("1"));
default_headers.insert("Sec-Ch-Ua-Mobile", HeaderValue::from_static("?0"));
default_headers.insert("Sec-Ch-Ua-Platform", HeaderValue::from_static("Linux"));
default_headers.insert(
"Sec-Ch-Ua",
HeaderValue::from_static("\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\", \"Google Chrome\";v=\"138\""),
);
default_headers.insert("Sec-Fetch-Site", HeaderValue::from_static("none"));
default_headers.insert("Sec-Fetch-Mode", HeaderValue::from_static("navigate"));
default_headers.insert("Sec-Fetch-User", HeaderValue::from_static("?1"));
default_headers.insert("Sec-Fetch-Dest", HeaderValue::from_static("document"));
// Generate the cookie store
let cookie_store = Arc::new(Jar::default());
@@ -53,6 +72,7 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
.pool_max_idle_per_host(5) // Configure the Hyper Pool to only have max 5 idle connections
.pool_idle_timeout(pool_idle_timeout) // Configure the Hyper Pool to timeout after 10 seconds
.default_headers(default_headers.clone())
.http1_title_case_headers()
.build()
.expect("Failed to build client")
});
@@ -561,6 +581,16 @@ async fn download_icon(domain: &str) -> Result<(Bytes, Option<&str>), Error> {
if buffer.is_empty() {
err_silent!("Empty response or unable find a valid icon", domain);
} else if icon_type == Some("svg+xml") {
let mut svg_filter = Filter::new();
svg_filter.set_data_url_filter(data_url_filter::allow_standard_images);
let mut sanitized_svg = Vec::new();
if svg_filter.filter(&*buffer, &mut sanitized_svg).is_err() {
icon_type = None;
buffer.clear();
} else {
buffer = sanitized_svg.into();
}
}
Ok((buffer, icon_type))
@@ -581,6 +611,16 @@ async fn save_icon(path: &str, icon: Vec<u8>) {
}
fn get_icon_type(bytes: &[u8]) -> Option<&'static str> {
fn check_svg_after_xml_declaration(bytes: &[u8]) -> Option<&'static str> {
// Look for SVG tag within the first 1KB
if let Ok(content) = std::str::from_utf8(&bytes[..bytes.len().min(1024)]) {
if content.contains("<svg") || content.contains("<SVG") {
return Some("svg+xml");
}
}
None
}
match bytes {
[137, 80, 78, 71, ..] => Some("png"),
[0, 0, 1, 0, ..] => Some("x-icon"),
@@ -588,6 +628,8 @@ fn get_icon_type(bytes: &[u8]) -> Option<&'static str> {
[255, 216, 255, ..] => Some("jpeg"),
[71, 73, 70, 56, ..] => Some("gif"),
[66, 77, ..] => Some("bmp"),
[60, 115, 118, 103, ..] => Some("svg+xml"), // Normal svg
[60, 63, 120, 109, 108, ..] => check_svg_after_xml_declaration(bytes), // An svg starting with <?xml
_ => None,
}
}
@@ -599,6 +641,12 @@ async fn stream_to_bytes_limit(res: Response, max_size: usize) -> Result<Bytes,
let mut buf = BytesMut::new();
let mut size = 0;
while let Some(chunk) = stream.next().await {
// It is possible that there might occure UnexpectedEof errors or others
// This is most of the time no issue, and if there is no chunked data anymore or at all parsing the HTML will not happen anyway.
// Therfore if chunk is an err, just break and continue with the data be have received.
if chunk.is_err() {
break;
}
let chunk = &chunk?;
size += chunk.len();
buf.extend(chunk);