From e05886aac542e9d27d7395551937596cf9c13008 Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 5 Jul 2022 19:11:01 -0400 Subject: [PATCH] give zipped files a containing directory to unzip into --- src/download.rs | 2 +- src/main.rs | 6 +- src/store.rs | 7 ++- src/upload.rs | 45 +++++++++----- src/zip.rs | 131 ++++++++++++++++++++++++++-------------- templates/download.html | 4 +- 6 files changed, 126 insertions(+), 69 deletions(-) diff --git a/src/download.rs b/src/download.rs index aad73e8..625d0de 100644 --- a/src/download.rs +++ b/src/download.rs @@ -96,7 +96,7 @@ impl DownloadingFile { fn selected(&self) -> Option<&UploadedFile> { match self.selection { DownloadSelection::All => None, - DownloadSelection::One(n) => Some(self.info.contents.as_ref()?.get(n)?), + DownloadSelection::One(n) => Some(self.info.contents.as_ref()?.files.get(n)?), } } diff --git a/src/main.rs b/src/main.rs index 228ca44..2aa7fdf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -114,7 +114,7 @@ async fn handle_download( if let Some(selection) = query.download { if let download::DownloadSelection::One(n) = selection { if let Some(ref files) = info.contents { - if n >= files.len() { + if n >= files.files.len() { return not_found(req, data, false); } } else { @@ -129,7 +129,7 @@ async fn handle_download( } .into_response(&req)) } else { - let offsets = info.contents.as_deref().map(zip::file_data_offsets); + let offsets = info.contents.as_ref().map(zip::file_data_offsets); Ok(DownloadPage { info: DownloadInfo { file: info, @@ -167,7 +167,7 @@ async fn download_info( }; let storage_path = data.config.storage_dir.join(code); - let offsets = info.contents.as_deref().map(zip::file_data_offsets); + let offsets = info.contents.as_ref().map(zip::file_data_offsets); Ok(web::Json(DownloadInfo { file: info, code: code.clone(), diff --git a/src/store.rs b/src/store.rs index f8dfe47..670699e 100644 --- a/src/store.rs +++ b/src/store.rs @@ -11,6 +11,7 @@ use rand::{ }; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; +use serde_with::{serde_as, PickFirst, FromInto}; use time::OffsetDateTime; use tokio::{ fs::File, @@ -18,6 +19,7 @@ use tokio::{ }; use crate::upload::UploadedFile; +use crate::zip::FileSet; const STATE_FILE_NAME: &str = "files.json"; const MAX_STORAGE_FILES: usize = 1024; @@ -36,6 +38,7 @@ pub fn is_valid_storage_code(s: &str) -> bool { .all(|c| c.is_ascii_alphanumeric() || c == &b'-') } +#[serde_as] #[skip_serializing_none] #[derive(Clone, Deserialize, Serialize)] pub struct StoredFile { @@ -45,7 +48,9 @@ pub struct StoredFile { pub modtime: OffsetDateTime, #[serde(with = "crate::timestamp")] pub expiry: OffsetDateTime, - pub contents: Option>, + #[serde_as(as = "Option>)>>")] + #[serde(default)] + pub contents: Option, } async fn is_valid_entry(key: &str, info: &StoredFile, storage_dir: &Path) -> bool { diff --git a/src/upload.rs b/src/upload.rs index 525fb43..0a4b492 100644 --- a/src/upload.rs +++ b/src/upload.rs @@ -6,6 +6,7 @@ use actix_web::web; use actix_web_actors::ws::{self, CloseCode}; use bytes::Bytes; use log::{debug, error, info, trace}; +use sanitise_file_name::{sanitise_with_options, Options as SanOptions}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use unicode_normalization::UnicodeNormalization; @@ -13,6 +14,7 @@ use unicode_normalization::UnicodeNormalization; use crate::{ log_auth_failure, store::{self, FileAddError, StoredFile}, + zip::FileSet, AppState, }; @@ -20,8 +22,18 @@ const MAX_FILES: usize = 256; const FILENAME_DATE_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!("[year]-[month]-[day]-[hour][minute][second]"); -fn sanitise(name: &str) -> String { - sanitise_file_name::sanitise(&name.nfd().collect::()) +/// Sanitises a filename after performing unicode normalization, +/// optionally reducing the length limit to leave space for an +/// extension yet to be added. +fn sanitise(name: &str, extension_length: usize) -> String { + let name = name.nfd().collect::(); + sanitise_with_options( + &name, + &SanOptions { + length_limit: SanOptions::DEFAULT.length_limit - extension_length, + ..SanOptions::DEFAULT + }, + ) } #[derive(thiserror::Error, Debug)] @@ -108,7 +120,7 @@ pub struct UploadedFile { impl UploadedFile { fn new(name: &str, size: u64, modtime: OffsetDateTime) -> Self { Self { - name: sanitise(name), + name: sanitise(name, 0), size, modtime, } @@ -256,7 +268,7 @@ impl Uploader { let mut file = raw_file.process(); while filenames.contains(&file.name) { info!("Duplicate file name: {}", file.name); - if file.name.len() >= sanitise_file_name::Options::DEFAULT.length_limit { + if file.name.len() >= SanOptions::DEFAULT.length_limit { return Err(Error::DuplicateFilename); } file.name.insert(0, '_'); @@ -278,25 +290,28 @@ impl Uploader { .write(true) .create_new(true) .open(&storage_path)?; - let (writer, name, size, modtime): (Box, _, _, _) = if files.len() > 1 { + let (writer, name, size, modtime, contents): (Box, _, _, _, _) = if files.len() > 1 { info!("Wrapping in zipfile generator"); let now = OffsetDateTime::now_utc(); - let zip_writer = super::zip::ZipGenerator::new(files.clone(), writer); - let size = zip_writer.total_size(); - let download_filename = collection_name - .map(|f| sanitise(&(f + ".zip"))) - .unwrap_or_else(|| { - super::APP_NAME.to_owned() - + &now.format(FILENAME_DATE_FORMAT).unwrap() - + ".zip" + let collection_name = + collection_name.map(|f| sanitise(&f, 4)).unwrap_or_else(|| { + super::APP_NAME.to_owned() + &now.format(FILENAME_DATE_FORMAT).unwrap() }); - (Box::new(zip_writer), download_filename, size, now) + let file_set = FileSet { + files, + directory_name: Some(collection_name.clone()), + }; + let zip_writer = + super::zip::ZipGenerator::new(file_set.clone(), writer); + let size = zip_writer.total_size(); + (Box::new(zip_writer), collection_name + ".zip", size, now, Some(file_set)) } else { ( Box::new(writer), files[0].name.clone(), files[0].size, files[0].modtime, + None ) }; self.writer = Some(writer); @@ -305,7 +320,7 @@ impl Uploader { size, modtime, expiry: OffsetDateTime::now_utc() + lifetime * time::Duration::DAY, - contents: if files.len() > 1 { Some(files) } else { None }, + contents, }; let state = self.app_state.clone(); let storage_filename = self.storage_filename.clone(); diff --git a/src/zip.rs b/src/zip.rs index 3160e84..38d42e2 100644 --- a/src/zip.rs +++ b/src/zip.rs @@ -2,10 +2,13 @@ use std::io::Write; use crc32fast::Hasher; use log::debug; +use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use crate::upload::UploadedFile; +const SLASH: u8 = 0x2f; + const SIGNATURE_SIZE: u64 = 4; const SHARED_FIELDS_SIZE: u64 = 26; const EXTRA_FIELD_SIZE: u64 = 41; @@ -25,45 +28,67 @@ const EOCD_TOTAL_SIZE: u64 = EOCD64_RECORD_SIZE + EOCD64_LOCATOR_SIZE + EOCD_REC const EMPTY_STRING_CRC32: u32 = 0; -fn file_entry_size(file: &UploadedFile) -> u64 { - FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE + file.name.len() as u64 + file.size +#[derive(Clone, Deserialize, Serialize)] +pub struct FileSet { + pub files: Vec, + // Optional for backwards compatibility only + pub directory_name: Option, } -fn file_entries_size(files: &[UploadedFile]) -> u64 { +impl From> for FileSet { + fn from(files: Vec) -> Self { + Self { files, directory_name: None } + } +} + +fn full_file_name_len(file: &UploadedFile, directory_name: &Option) -> u64 { + file.name.len() as u64 + if let Some(d) = directory_name { + d.len() as u64 + 1 + } else { + 0 + } +} + +fn file_entry_size(file: &UploadedFile, directory_name: &Option) -> u64 { + FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE + full_file_name_len(file, directory_name) + file.size +} + +fn file_entries_size(files: &FileSet, bound: Option) -> u64 { let mut total = 0; - for file in files.iter() { - total += file_entry_size(file) + let fs = if let Some(n) = bound { &files.files[..n] } else { &files.files }; + for file in fs.iter() { + total += file_entry_size(file, &files.directory_name) } total } -pub fn file_data_offset(files: &[UploadedFile], idx: usize) -> u64 { - file_entries_size(&files[..idx]) +pub fn file_data_offset(files: &FileSet, idx: usize) -> u64 { + file_entries_size(files, Some(idx)) + LOCAL_HEADER_SIZE_MINUS_FILENAME - + files[idx].name.len() as u64 + + full_file_name_len(&files.files[idx], &files.directory_name) } -pub fn file_data_offsets(files: &[UploadedFile]) -> Vec { +pub fn file_data_offsets(files: &FileSet) -> Vec { let mut offsets = Vec::new(); let mut offset: u64 = 0; - for file in files.iter() { - offset += LOCAL_HEADER_SIZE_MINUS_FILENAME + file.name.len() as u64; + for file in files.files.iter() { + offset += LOCAL_HEADER_SIZE_MINUS_FILENAME + full_file_name_len(file, &files.directory_name); offsets.push(offset); offset += file.size + DATA_DESCRIPTOR_SIZE; } offsets } -fn central_directory_size(files: &[UploadedFile]) -> u64 { +fn central_directory_size(files: &FileSet) -> u64 { let mut total = 0; - for file in files.iter() { - total += CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME + file.name.len() as u64; + for file in files.files.iter() { + total += CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME + full_file_name_len(file, &files.directory_name); } total } -fn zipfile_size(files: &[UploadedFile]) -> u64 { - file_entries_size(files) + central_directory_size(files) + EOCD_TOTAL_SIZE +fn zipfile_size(files: &FileSet) -> u64 { + file_entries_size(files, None) + central_directory_size(files) + EOCD_TOTAL_SIZE } fn fat_timestamp(time: OffsetDateTime) -> u32 { @@ -100,7 +125,7 @@ impl UploadedFile { /// Returns the fields shared by the ZIP local file header and /// central directory file header - "Version needed to extract" /// through "Extra field length". - fn shared_header_fields(&self, hash: Option) -> Vec { + fn shared_header_fields(&self, directory_name: &Option, hash: Option) -> Vec { let mut fields = vec![ 45, 0, // Minimum version required to extract: 4.5 for ZIP64 0b00001000, // General purpose bit flag: bit 3 - size and CRC-32 in data descriptor @@ -112,7 +137,7 @@ impl UploadedFile { append_value(&mut fields, hash.unwrap_or(0) as u64, 4); // Placeholders for compressed and uncompressed size in ZIP64 record, 4 bytes each append_ff(&mut fields, 8); - append_value(&mut fields, self.name.len() as u64, 2); + append_value(&mut fields, full_file_name_len(self, directory_name), 2); // Extra field length: 32 bytes for zip64, 9 bytes for timestamp fields.append(&mut vec![41, 0]); fields @@ -142,24 +167,34 @@ impl UploadedFile { field } - fn local_header(&self, local_header_offset: u64) -> Vec { + fn full_name_bytes(&self, directory_name: &Option) -> Vec { + let mut b = vec![]; + if let Some(d) = directory_name { + b.append(&mut d.to_owned().into_bytes()); + b.push(SLASH); + } + b.append(&mut self.name.clone().into_bytes()); + b + } + + fn local_header(&self, directory_name: &Option, local_header_offset: u64) -> Vec { let mut header = vec![0x50, 0x4b, 0x03, 0x04]; // Local file header signature - header.append(&mut self.shared_header_fields(None)); - header.append(&mut self.name.clone().into_bytes()); + header.append(&mut self.shared_header_fields(directory_name, None)); + header.append(&mut self.full_name_bytes(directory_name)); header.append(&mut self.extra_field(local_header_offset)); header } - fn central_directory_header(&self, local_header_offset: u64, hash: u32) -> Vec { + fn central_directory_header(&self, directory_name: &Option, local_header_offset: u64, hash: u32) -> Vec { let mut header = vec![ 0x50, 0x4b, 0x01, 0x02, // Central directory file header signature 45, 3, // Made by a Unix system supporting version 4.5 ]; - header.append(&mut self.shared_header_fields(Some(hash))); + header.append(&mut self.shared_header_fields(directory_name, Some(hash))); append_0(&mut header, 8); // Comment length, disk number, internal attributes, DOS external attributes append_value(&mut header, 0o100644, 2); // Unix external file attributes: -rw-r--r-- append_ff(&mut header, 4); // Relative offset of local file header: placeholder, see ZIP64 data - header.append(&mut self.name.clone().into_bytes()); + header.append(&mut self.full_name_bytes(directory_name)); header.append(&mut self.extra_field(local_header_offset)); header } @@ -174,8 +209,8 @@ impl UploadedFile { } } -fn end_of_central_directory(files: &[UploadedFile]) -> Vec { - let entries_size = file_entries_size(files); +fn end_of_central_directory(files: &FileSet) -> Vec { + let entries_size = file_entries_size(files, None); let directory_size = central_directory_size(files); let mut eocd = vec![ @@ -188,9 +223,10 @@ fn end_of_central_directory(files: &[UploadedFile]) -> Vec { 45, 0, // Minimum version 4.5 to extract ]); append_0(&mut eocd, 8); // Two 4-byte disk numbers, both 0 - // Number of central directory records, on this disk and in total - append_value(&mut eocd, files.len() as u64, 8); - append_value(&mut eocd, files.len() as u64, 8); + + // Number of central directory records, on this disk and in total + append_value(&mut eocd, files.files.len() as u64, 8); + append_value(&mut eocd, files.files.len() as u64, 8); append_value(&mut eocd, directory_size, 8); append_value(&mut eocd, entries_size, 8); // Offset of start of central directory @@ -207,7 +243,7 @@ fn end_of_central_directory(files: &[UploadedFile]) -> Vec { } pub struct ZipGenerator { - files: Vec, + files: FileSet, file_index: usize, byte_index: u64, pending_metadata: Vec, @@ -217,7 +253,7 @@ pub struct ZipGenerator { } impl ZipGenerator { - pub fn new(files: Vec, output: W) -> Self { + pub fn new(files: FileSet, output: W) -> Self { let mut result = Self { files, file_index: 0, @@ -239,39 +275,39 @@ impl ZipGenerator { let hash = std::mem::replace(&mut self.hasher, Hasher::new()).finalize(); self.hashes.push(hash); self.pending_metadata - .append(&mut self.files[self.file_index].data_descriptor(hash)); + .append(&mut self.files.files[self.file_index].data_descriptor(hash)); debug!( "Finishing file entry in zipfile: {}, hash {:x}", - self.files[self.file_index].name, hash + self.files.files[self.file_index].name, hash ); self.file_index += 1; self.start_new_file(); } fn start_new_file(&mut self) { - let mut offset = file_entries_size(&self.files[..self.file_index]); - while self.file_index < self.files.len() && self.files[self.file_index].size == 0 { + let mut offset = file_entries_size(&self.files, Some(self.file_index)); + while self.file_index < self.files.files.len() && self.files.files[self.file_index].size == 0 { debug!( "Empty file entry in zipfile: {}", - self.files[self.file_index].name + self.files.files[self.file_index].name ); self.hashes.push(EMPTY_STRING_CRC32); - let mut local_header = self.files[self.file_index].local_header(offset); + let mut local_header = self.files.files[self.file_index].local_header(&self.files.directory_name, offset); let mut data_descriptor = - self.files[self.file_index].data_descriptor(EMPTY_STRING_CRC32); + self.files.files[self.file_index].data_descriptor(EMPTY_STRING_CRC32); offset += local_header.len() as u64 + data_descriptor.len() as u64; self.file_index += 1; self.pending_metadata.append(&mut local_header); self.pending_metadata.append(&mut data_descriptor); } - if self.file_index < self.files.len() { + if self.file_index < self.files.files.len() { debug!( "Starting file entry in zipfile: {}", - self.files[self.file_index].name + self.files.files[self.file_index].name ); self.byte_index = 0; self.pending_metadata - .append(&mut self.files[self.file_index].local_header(offset)); + .append(&mut self.files.files[self.file_index].local_header(&self.files.directory_name, offset)); } else { self.finish_zipfile(); } @@ -280,14 +316,14 @@ impl ZipGenerator { fn finish_zipfile(&mut self) { debug!("Writing zipfile central directory"); let mut offset = 0; - for (i, file) in self.files.iter().enumerate() { + for (i, file) in self.files.files.iter().enumerate() { debug!( "Writing central directory entry: {}, hash {}", file.name, self.hashes[i] ); self.pending_metadata - .append(&mut file.central_directory_header(offset, self.hashes[i])); - offset += file_entry_size(file); + .append(&mut file.central_directory_header(&self.files.directory_name, offset, self.hashes[i])); + offset += file_entry_size(file, &self.files.directory_name); } debug!("Writing end of central directory"); self.pending_metadata @@ -308,10 +344,10 @@ impl Write for ZipGenerator { } } } - if self.file_index >= self.files.len() { + if self.file_index >= self.files.files.len() { return Ok(0); } - let bytes_remaining = self.files[self.file_index].size - self.byte_index; + let bytes_remaining = self.files.files[self.file_index].size - self.byte_index; if bytes_remaining < (buf.len() as u64) { buf = &buf[..bytes_remaining as usize]; } @@ -347,7 +383,8 @@ mod tests { fn test_no_files() { let mut output: Vec = vec![]; { - let mut zipgen = ZipGenerator::new(vec![], Box::new(std::io::Cursor::new(&mut output))); + let mut zipgen = + ZipGenerator::new(FileSet { files: vec![], directory_name: "test".to_owned() }, Box::new(std::io::Cursor::new(&mut output))); zipgen.write_all(&[]).unwrap(); zipgen.flush().unwrap(); } diff --git a/templates/download.html b/templates/download.html index bd2f7f0..1c6e13c 100644 --- a/templates/download.html +++ b/templates/download.html @@ -7,7 +7,7 @@ {% let formatted_total_size = bytesize::to_string(info.file.size.clone(), false).replace(" ", "") -%} {% match info.file.contents -%} {% when Some with (files) -%} - {{ files.len() }} files, {{ formatted_total_size }} total + {{ files.files.len() }} files, {{ formatted_total_size }} total {%- else -%} {{ formatted_total_size }} {%- endmatch %}, expires {{ info.file.expiry.format(DATE_DISPLAY_FORMAT).unwrap() }} @@ -37,7 +37,7 @@ Show file list {% let offsets = info.offsets.as_ref().unwrap() %} - {% for f in files %} + {% for f in files.files %}
{{ bytesize::to_string(f.size.clone(), false).replace(" ", "") }} {{ f.name }}