Abstract persistent files through Apache OpenDAL (#5626)

* Abstract file access through Apache OpenDAL

* Add AWS S3 support via OpenDAL for data files

* PR improvements

* Additional PR improvements

* Config setting comments for local/remote data locations
This commit is contained in:
Chase Douglas
2025-05-29 12:40:58 -07:00
committed by GitHub
parent 200999c94e
commit 90f7e5ff80
19 changed files with 1460 additions and 191 deletions

View File

@@ -16,7 +16,7 @@ use tokio::{
time::{sleep, Duration},
};
use crate::CONFIG;
use crate::{config::PathType, CONFIG};
pub struct AppHeaders();
@@ -827,6 +827,26 @@ pub fn is_global(ip: std::net::IpAddr) -> bool {
ip.is_global()
}
/// Saves a Rocket temporary file to the OpenDAL Operator at the given path.
pub async fn save_temp_file(
path_type: PathType,
path: &str,
temp_file: rocket::fs::TempFile<'_>,
overwrite: bool,
) -> Result<(), crate::Error> {
use futures::AsyncWriteExt as _;
use tokio_util::compat::TokioAsyncReadCompatExt as _;
let operator = CONFIG.opendal_operator_for_path_type(path_type)?;
let mut read_stream = temp_file.open().await?.compat();
let mut writer = operator.writer_with(path).if_not_exists(!overwrite).await?.into_futures_async_write();
futures::io::copy(&mut read_stream, &mut writer).await?;
writer.close().await?;
Ok(())
}
/// These are some tests to check that the implementations match
/// The IPv4 can be all checked in 30 seconds or so and they are correct as of nightly 2023-07-17
/// The IPV6 can't be checked in a reasonable time, so we check over a hundred billion random ones, so far correct