Skip to content

Commit 77151c2

Browse files
committed
libsql-wal s3 multipart segment upload
1 parent eb59a04 commit 77151c2

3 files changed

Lines changed: 162 additions & 41 deletions

File tree

libsql-server/src/main.rs

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -734,33 +734,6 @@ async fn build_server(
734734
async fn main() -> Result<()> {
735735
let args = Cli::parse();
736736

737-
if let Some(ref subcommand) = args.subcommand {
738-
match subcommand {
739-
UtilsSubcommands::AdminShell {
740-
admin_api_url,
741-
namespace,
742-
auth,
743-
} => {
744-
let client = libsql_server::admin_shell::AdminShellClient::new(
745-
admin_api_url.clone(),
746-
auth.clone(),
747-
);
748-
if let Some(ns) = namespace {
749-
client.run_namespace(ns).await?;
750-
}
751-
}
752-
UtilsSubcommands::WalToolkit {
753-
command,
754-
path,
755-
s3_args,
756-
} => {
757-
command.exec(path, s3_args).await?;
758-
}
759-
}
760-
761-
return Ok(());
762-
}
763-
764737
if std::env::var("RUST_LOG").is_err() {
765738
std::env::set_var("RUST_LOG", "info");
766739
}
@@ -789,6 +762,33 @@ async fn main() -> Result<()> {
789762
)
790763
.init();
791764

765+
if let Some(ref subcommand) = args.subcommand {
766+
match subcommand {
767+
UtilsSubcommands::AdminShell {
768+
admin_api_url,
769+
namespace,
770+
auth,
771+
} => {
772+
let client = libsql_server::admin_shell::AdminShellClient::new(
773+
admin_api_url.clone(),
774+
auth.clone(),
775+
);
776+
if let Some(ns) = namespace {
777+
client.run_namespace(ns).await?;
778+
}
779+
}
780+
UtilsSubcommands::WalToolkit {
781+
command,
782+
path,
783+
s3_args,
784+
} => {
785+
command.exec(path, s3_args).await?;
786+
}
787+
}
788+
789+
return Ok(());
790+
}
791+
792792
args.print_welcome_message();
793793
let server = build_server(&args, set_log_level).await?;
794794
server.start().await?;

libsql-wal/src/storage/backend/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ pub trait Backend: Send + Sync + 'static {
5757
config: &Self::Config,
5858
namespace: &NamespaceName,
5959
key: &SegmentKey,
60-
segment_data: impl Stream<Item = Result<Bytes>> + Send + Sync + 'static,
60+
segment_data: impl Stream<Item = Result<Bytes>> + Send + 'static,
6161
) -> impl Future<Output = Result<()>> + Send;
6262

6363
/// Store `segment_data` with its associated `meta`
@@ -224,7 +224,7 @@ impl<T: Backend> Backend for Arc<T> {
224224
config: &Self::Config,
225225
namespace: &NamespaceName,
226226
key: &SegmentKey,
227-
segment_data: impl Stream<Item = Result<Bytes>> + Send + Sync + 'static,
227+
segment_data: impl Stream<Item = Result<Bytes>> + Send + 'static,
228228
) -> Result<()> {
229229
self.as_ref()
230230
.store_segment_data(config, namespace, key, segment_data)

libsql-wal/src/storage/backend/s3.rs

Lines changed: 133 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,15 @@ use aws_config::SdkConfig;
1212
use aws_sdk_s3::operation::create_bucket::CreateBucketError;
1313
use aws_sdk_s3::operation::get_object::GetObjectOutput;
1414
use aws_sdk_s3::primitives::{ByteStream, SdkBody};
15-
use aws_sdk_s3::types::{CreateBucketConfiguration, Object};
15+
use aws_sdk_s3::types::{
16+
CompletedMultipartUpload, CompletedPart, CreateBucketConfiguration, Object,
17+
};
1618
use aws_sdk_s3::Client;
1719
use bytes::{Bytes, BytesMut};
1820
use chrono::{DateTime, Utc};
19-
use http_body::{Frame as HttpFrame, SizeHint};
21+
use futures::future::poll_fn;
22+
use futures::{StreamExt, TryFutureExt as _, TryStreamExt};
23+
use http_body::{Body, Frame as HttpFrame, SizeHint};
2024
use libsql_sys::name::NamespaceName;
2125
use pin_project_lite::pin_project;
2226
use tokio::io::{AsyncBufReadExt, AsyncRead, AsyncReadExt, BufReader};
@@ -277,6 +281,121 @@ impl<IO: Io> S3Backend<IO> {
277281
Ok(())
278282
}
279283

284+
#[tracing::instrument(skip_all, fields(key))]
285+
async fn s3_put_multipart<B>(
286+
&self,
287+
config: &S3Config,
288+
key: impl ToString,
289+
data: B,
290+
) -> Result<()>
291+
where
292+
B: Body<Data = Bytes, Error = crate::storage::Error>,
293+
{
294+
const MAX_CHUNK_SIZE: u64 = 50 * 1024 * 1024; // 50MB
295+
let (s_chunks, r_chunks) = tokio::sync::mpsc::channel(8);
296+
let key = key.to_string();
297+
298+
let upload = self
299+
.client
300+
.create_multipart_upload()
301+
.bucket(&config.bucket)
302+
.key(&key)
303+
.send()
304+
.await
305+
.map_err(|e| Error::unhandled(e, "creating multipart upload"))?;
306+
let upload_id = upload.upload_id();
307+
308+
let make_chunk_fut = async {
309+
let mut current_chunk_file = self.io.tempfile()?;
310+
let mut current_chunk_len = 0;
311+
tokio::pin!(data);
312+
loop {
313+
let Some(frame) = poll_fn(|cx| data.as_mut().poll_frame(cx)).await else {
314+
break;
315+
};
316+
let frame = frame?;
317+
assert!(frame.is_data());
318+
let data = frame.into_data().unwrap();
319+
let offset = current_chunk_len;
320+
current_chunk_len += data.len() as u64;
321+
let (_, ret) = current_chunk_file.write_all_at_async(data, offset).await;
322+
ret?;
323+
if current_chunk_len >= MAX_CHUNK_SIZE {
324+
let new_chunk_file = self.io.tempfile()?;
325+
current_chunk_len = 0;
326+
let old_chunk_file = std::mem::replace(&mut current_chunk_file, new_chunk_file);
327+
if s_chunks.send(old_chunk_file).await.is_err() {
328+
break;
329+
}
330+
}
331+
}
332+
333+
// make sure we move the sender in the future so the chunk sender eventually exits.
334+
drop(s_chunks);
335+
336+
Ok(())
337+
};
338+
339+
let send_chunks_fut = async {
340+
let builder = tokio_stream::wrappers::ReceiverStream::new(r_chunks)
341+
.enumerate()
342+
.map(|(i, chunk)| {
343+
let i = i;
344+
self.client
345+
.upload_part()
346+
.bucket(&config.bucket)
347+
.key(&key)
348+
.part_number(i as i32 + 1) // part number must be between 1-10000
349+
.set_upload_id(upload_id.map(ToString::to_string))
350+
.body(FileStreamBody::new(chunk).into_byte_stream())
351+
.send()
352+
.map_err(|e| Error::unhandled(e, format!("sending chunk")))
353+
.map_ok(move |resp| {
354+
CompletedPart::builder()
355+
.set_e_tag(resp.e_tag)
356+
.set_part_number(Some(i as i32 + 1))
357+
.build()
358+
})
359+
})
360+
.buffered(8)
361+
.try_fold(CompletedMultipartUpload::builder(), |builder, completed| {
362+
std::future::ready(Ok(builder.parts(completed)))
363+
})
364+
.await?;
365+
366+
Ok(builder.build())
367+
};
368+
369+
let ret = tokio::try_join!(send_chunks_fut, make_chunk_fut);
370+
371+
match ret {
372+
Ok((parts, _)) => {
373+
self.client
374+
.complete_multipart_upload()
375+
.bucket(&config.bucket)
376+
.set_upload_id(upload_id.map(ToString::to_string))
377+
.multipart_upload(parts)
378+
.key(&key)
379+
.send()
380+
.await
381+
.map_err(|e| Error::unhandled(e, format!("completing multipart upload")))?;
382+
383+
Ok(())
384+
}
385+
Err(e) => {
386+
self.client
387+
.abort_multipart_upload()
388+
.bucket(&config.bucket)
389+
.set_upload_id(upload_id.map(ToString::to_string))
390+
.key(&key)
391+
.send()
392+
.await
393+
.map_err(|e| Error::unhandled(e, format!("aborting multipart upload")))?;
394+
Err(e)
395+
}
396+
}
397+
}
398+
280399
async fn fetch_segment_index_inner(
281400
&self,
282401
config: &S3Config,
@@ -491,20 +610,23 @@ impl<IO: Io> S3Backend<IO> {
491610
}
492611
}
493612

494-
async fn store_segment_data_inner(
613+
async fn store_segment_data_inner<B>(
495614
&self,
496615
config: &S3Config,
497616
namespace: &NamespaceName,
498-
body: ByteStream,
617+
body: B,
499618
segment_key: &SegmentKey,
500-
) -> Result<()> {
619+
) -> Result<()>
620+
where
621+
B: Body<Data = Bytes, Error = crate::storage::Error>,
622+
{
501623
let folder_key = FolderKey {
502624
cluster_id: &config.cluster_id,
503625
namespace,
504626
};
505627
let s3_data_key = s3_segment_data_key(&folder_key, segment_key);
506628

507-
self.s3_put(config, s3_data_key, body).await
629+
self.s3_put_multipart(config, s3_data_key, body).await
508630
}
509631

510632
async fn store_segment_index_inner(
@@ -635,7 +757,7 @@ where
635757
segment_index: Vec<u8>,
636758
) -> Result<()> {
637759
let segment_key = SegmentKey::from(&meta);
638-
let body = FileStreamBody::new(segment_data).into_byte_stream();
760+
let body = FileStreamBody::new(segment_data);
639761
self.store_segment_data_inner(config, &meta.namespace, body, &segment_key)
640762
.await?;
641763
self.store_segment_index(config, &meta.namespace, &segment_key, segment_index)
@@ -764,11 +886,10 @@ where
764886
config: &Self::Config,
765887
namespace: &NamespaceName,
766888
segment_key: &SegmentKey,
767-
segment_data: impl Stream<Item = Result<Bytes>> + Send + Sync + 'static,
889+
segment_data: impl Stream<Item = Result<Bytes>> + Send + 'static,
768890
) -> Result<()> {
769891
let byte_stream = StreamBody::new(segment_data);
770-
let body = ByteStream::from_body_1_x(byte_stream);
771-
self.store_segment_data_inner(config, namespace, body, &segment_key)
892+
self.store_segment_data_inner(config, namespace, byte_stream, &segment_key)
772893
.await?;
773894

774895
Ok(())
@@ -869,7 +990,7 @@ where
869990
F: FileExt,
870991
{
871992
type Data = Bytes;
872-
type Error = std::io::Error;
993+
type Error = crate::storage::Error;
873994

874995
fn poll_frame(
875996
mut self: Pin<&mut Self>,
@@ -903,7 +1024,7 @@ where
9031024
}
9041025
Poll::Ready(Err(e)) => {
9051026
self.state = StreamState::Done;
906-
return Poll::Ready(Some(Err(e)));
1027+
return Poll::Ready(Some(Err(e.into())));
9071028
}
9081029
Poll::Pending => return Poll::Pending,
9091030
},

0 commit comments

Comments
 (0)