Abstract persistent files through Apache OpenDAL (#5626)

* Abstract file access through Apache OpenDAL

* Add AWS S3 support via OpenDAL for data files

* PR improvements

* Additional PR improvements

* Config setting comments for local/remote data locations
This commit is contained in:
Chase Douglas
2025-05-29 12:40:58 -07:00
committed by GitHub
parent 200999c94e
commit 90f7e5ff80
19 changed files with 1460 additions and 191 deletions

View File

@@ -3,7 +3,7 @@ use std::{
process::exit,
sync::{
atomic::{AtomicBool, Ordering},
RwLock,
LazyLock, RwLock,
},
};
@@ -22,10 +22,32 @@ static CONFIG_FILE: Lazy<String> = Lazy::new(|| {
get_env("CONFIG_FILE").unwrap_or_else(|| format!("{data_folder}/config.json"))
});
static CONFIG_FILE_PARENT_DIR: LazyLock<String> = LazyLock::new(|| {
let path = std::path::PathBuf::from(&*CONFIG_FILE);
path.parent().unwrap_or(std::path::Path::new("data")).to_str().unwrap_or("data").to_string()
});
static CONFIG_FILENAME: LazyLock<String> = LazyLock::new(|| {
let path = std::path::PathBuf::from(&*CONFIG_FILE);
path.file_name().unwrap_or(std::ffi::OsStr::new("config.json")).to_str().unwrap_or("config.json").to_string()
});
pub static SKIP_CONFIG_VALIDATION: AtomicBool = AtomicBool::new(false);
pub static CONFIG: Lazy<Config> = Lazy::new(|| {
Config::load().unwrap_or_else(|e| {
std::thread::spawn(|| {
let rt = tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap_or_else(|e| {
println!("Error loading config:\n {e:?}\n");
exit(12)
});
rt.block_on(Config::load()).unwrap_or_else(|e| {
println!("Error loading config:\n {e:?}\n");
exit(12)
})
})
.join()
.unwrap_or_else(|e| {
println!("Error loading config:\n {e:?}\n");
exit(12)
})
@@ -110,10 +132,11 @@ macro_rules! make_config {
builder
}
fn from_file(path: &str) -> Result<Self, Error> {
let config_str = std::fs::read_to_string(path)?;
println!("[INFO] Using saved config from `{path}` for configuration.\n");
serde_json::from_str(&config_str).map_err(Into::into)
async fn from_file() -> Result<Self, Error> {
let operator = opendal_operator_for_path(&CONFIG_FILE_PARENT_DIR)?;
let config_bytes = operator.read(&CONFIG_FILENAME).await?;
println!("[INFO] Using saved config from `{}` for configuration.\n", *CONFIG_FILE);
serde_json::from_slice(&config_bytes.to_vec()).map_err(Into::into)
}
fn clear_non_editable(&mut self) {
@@ -1138,11 +1161,93 @@ fn smtp_convert_deprecated_ssl_options(smtp_ssl: Option<bool>, smtp_explicit_tls
"starttls".to_string()
}
fn opendal_operator_for_path(path: &str) -> Result<opendal::Operator, Error> {
// Cache of previously built operators by path
static OPERATORS_BY_PATH: LazyLock<dashmap::DashMap<String, opendal::Operator>> =
LazyLock::new(dashmap::DashMap::new);
if let Some(operator) = OPERATORS_BY_PATH.get(path) {
return Ok(operator.clone());
}
let operator = if path.starts_with("s3://") {
#[cfg(not(s3))]
return Err(opendal::Error::new(opendal::ErrorKind::ConfigInvalid, "S3 support is not enabled").into());
#[cfg(s3)]
opendal_s3_operator_for_path(path)?
} else {
let builder = opendal::services::Fs::default().root(path);
opendal::Operator::new(builder)?.finish()
};
OPERATORS_BY_PATH.insert(path.to_string(), operator.clone());
Ok(operator)
}
#[cfg(s3)]
fn opendal_s3_operator_for_path(path: &str) -> Result<opendal::Operator, Error> {
// This is a custom AWS credential loader that uses the official AWS Rust
// SDK config crate to load credentials. This ensures maximum compatibility
// with AWS credential configurations. For example, OpenDAL doesn't support
// AWS SSO temporary credentials yet.
struct OpenDALS3CredentialLoader {}
#[async_trait]
impl reqsign::AwsCredentialLoad for OpenDALS3CredentialLoader {
async fn load_credential(&self, _client: reqwest::Client) -> anyhow::Result<Option<reqsign::AwsCredential>> {
use aws_credential_types::provider::ProvideCredentials as _;
use tokio::sync::OnceCell;
static DEFAULT_CREDENTIAL_CHAIN: OnceCell<
aws_config::default_provider::credentials::DefaultCredentialsChain,
> = OnceCell::const_new();
let chain = DEFAULT_CREDENTIAL_CHAIN
.get_or_init(|| aws_config::default_provider::credentials::DefaultCredentialsChain::builder().build())
.await;
let creds = chain.provide_credentials().await?;
Ok(Some(reqsign::AwsCredential {
access_key_id: creds.access_key_id().to_string(),
secret_access_key: creds.secret_access_key().to_string(),
session_token: creds.session_token().map(|s| s.to_string()),
expires_in: creds.expiry().map(|expiration| expiration.into()),
}))
}
}
const OPEN_DAL_S3_CREDENTIAL_LOADER: OpenDALS3CredentialLoader = OpenDALS3CredentialLoader {};
let url = Url::parse(path).map_err(|e| format!("Invalid path S3 URL path {path:?}: {e}"))?;
let bucket = url.host_str().ok_or_else(|| format!("Missing Bucket name in data folder S3 URL {path:?}"))?;
let builder = opendal::services::S3::default()
.customized_credential_load(Box::new(OPEN_DAL_S3_CREDENTIAL_LOADER))
.enable_virtual_host_style()
.bucket(bucket)
.root(url.path())
.default_storage_class("INTELLIGENT_TIERING");
Ok(opendal::Operator::new(builder)?.finish())
}
pub enum PathType {
Data,
IconCache,
Attachments,
Sends,
RsaKey,
}
impl Config {
pub fn load() -> Result<Self, Error> {
pub async fn load() -> Result<Self, Error> {
// Loading from env and file
let _env = ConfigBuilder::from_env();
let _usr = ConfigBuilder::from_file(&CONFIG_FILE).unwrap_or_default();
let _usr = ConfigBuilder::from_file().await.unwrap_or_default();
// Create merged config, config file overwrites env
let mut _overrides = Vec::new();
@@ -1166,7 +1271,7 @@ impl Config {
})
}
pub fn update_config(&self, other: ConfigBuilder, ignore_non_editable: bool) -> Result<(), Error> {
pub async fn update_config(&self, other: ConfigBuilder, ignore_non_editable: bool) -> Result<(), Error> {
// Remove default values
//let builder = other.remove(&self.inner.read().unwrap()._env);
@@ -1198,20 +1303,19 @@ impl Config {
}
//Save to file
use std::{fs::File, io::Write};
let mut file = File::create(&*CONFIG_FILE)?;
file.write_all(config_str.as_bytes())?;
let operator = opendal_operator_for_path(&CONFIG_FILE_PARENT_DIR)?;
operator.write(&CONFIG_FILENAME, config_str).await?;
Ok(())
}
fn update_config_partial(&self, other: ConfigBuilder) -> Result<(), Error> {
async fn update_config_partial(&self, other: ConfigBuilder) -> Result<(), Error> {
let builder = {
let usr = &self.inner.read().unwrap()._usr;
let mut _overrides = Vec::new();
usr.merge(&other, false, &mut _overrides)
};
self.update_config(builder, false)
self.update_config(builder, false).await
}
/// Tests whether an email's domain is allowed. A domain is allowed if it
@@ -1253,8 +1357,9 @@ impl Config {
}
}
pub fn delete_user_config(&self) -> Result<(), Error> {
std::fs::remove_file(&*CONFIG_FILE)?;
pub async fn delete_user_config(&self) -> Result<(), Error> {
let operator = opendal_operator_for_path(&CONFIG_FILE_PARENT_DIR)?;
operator.delete(&CONFIG_FILENAME).await?;
// Empty user config
let usr = ConfigBuilder::default();
@@ -1284,7 +1389,7 @@ impl Config {
inner._enable_smtp && (inner.smtp_host.is_some() || inner.use_sendmail)
}
pub fn get_duo_akey(&self) -> String {
pub async fn get_duo_akey(&self) -> String {
if let Some(akey) = self._duo_akey() {
akey
} else {
@@ -1295,7 +1400,7 @@ impl Config {
_duo_akey: Some(akey_s.clone()),
..Default::default()
};
self.update_config_partial(builder).ok();
self.update_config_partial(builder).await.ok();
akey_s
}
@@ -1308,6 +1413,23 @@ impl Config {
token.is_some() && !token.unwrap().trim().is_empty()
}
pub fn opendal_operator_for_path_type(&self, path_type: PathType) -> Result<opendal::Operator, Error> {
let path = match path_type {
PathType::Data => self.data_folder(),
PathType::IconCache => self.icon_cache_folder(),
PathType::Attachments => self.attachments_folder(),
PathType::Sends => self.sends_folder(),
PathType::RsaKey => std::path::Path::new(&self.rsa_key_filename())
.parent()
.ok_or_else(|| std::io::Error::other("Failed to get directory of RSA key file"))?
.to_str()
.ok_or_else(|| std::io::Error::other("Failed to convert RSA key file directory to UTF-8 string"))?
.to_string(),
};
opendal_operator_for_path(&path)
}
pub fn render_template<T: serde::ser::Serialize>(&self, name: &str, data: &T) -> Result<String, Error> {
if self.reload_templates() {
warn!("RELOADING TEMPLATES");