give zipped files a containing directory to unzip into

main
xenofem 2022-07-05 19:11:01 -04:00
parent faf018a0a5
commit e05886aac5
6 changed files with 126 additions and 69 deletions

View File

@ -96,7 +96,7 @@ impl DownloadingFile {
fn selected(&self) -> Option<&UploadedFile> {
match self.selection {
DownloadSelection::All => None,
DownloadSelection::One(n) => Some(self.info.contents.as_ref()?.get(n)?),
DownloadSelection::One(n) => Some(self.info.contents.as_ref()?.files.get(n)?),
}
}

View File

@ -114,7 +114,7 @@ async fn handle_download(
if let Some(selection) = query.download {
if let download::DownloadSelection::One(n) = selection {
if let Some(ref files) = info.contents {
if n >= files.len() {
if n >= files.files.len() {
return not_found(req, data, false);
}
} else {
@ -129,7 +129,7 @@ async fn handle_download(
}
.into_response(&req))
} else {
let offsets = info.contents.as_deref().map(zip::file_data_offsets);
let offsets = info.contents.as_ref().map(zip::file_data_offsets);
Ok(DownloadPage {
info: DownloadInfo {
file: info,
@ -167,7 +167,7 @@ async fn download_info(
};
let storage_path = data.config.storage_dir.join(code);
let offsets = info.contents.as_deref().map(zip::file_data_offsets);
let offsets = info.contents.as_ref().map(zip::file_data_offsets);
Ok(web::Json(DownloadInfo {
file: info,
code: code.clone(),

View File

@ -11,6 +11,7 @@ use rand::{
};
use serde::{Deserialize, Serialize};
use serde_with::skip_serializing_none;
use serde_with::{serde_as, PickFirst, FromInto};
use time::OffsetDateTime;
use tokio::{
fs::File,
@ -18,6 +19,7 @@ use tokio::{
};
use crate::upload::UploadedFile;
use crate::zip::FileSet;
const STATE_FILE_NAME: &str = "files.json";
const MAX_STORAGE_FILES: usize = 1024;
@ -36,6 +38,7 @@ pub fn is_valid_storage_code(s: &str) -> bool {
.all(|c| c.is_ascii_alphanumeric() || c == &b'-')
}
#[serde_as]
#[skip_serializing_none]
#[derive(Clone, Deserialize, Serialize)]
pub struct StoredFile {
@ -45,7 +48,9 @@ pub struct StoredFile {
pub modtime: OffsetDateTime,
#[serde(with = "crate::timestamp")]
pub expiry: OffsetDateTime,
pub contents: Option<Vec<UploadedFile>>,
#[serde_as(as = "Option<PickFirst<(_, FromInto<Vec<UploadedFile>>)>>")]
#[serde(default)]
pub contents: Option<FileSet>,
}
async fn is_valid_entry(key: &str, info: &StoredFile, storage_dir: &Path) -> bool {

View File

@ -6,6 +6,7 @@ use actix_web::web;
use actix_web_actors::ws::{self, CloseCode};
use bytes::Bytes;
use log::{debug, error, info, trace};
use sanitise_file_name::{sanitise_with_options, Options as SanOptions};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use unicode_normalization::UnicodeNormalization;
@ -13,6 +14,7 @@ use unicode_normalization::UnicodeNormalization;
use crate::{
log_auth_failure,
store::{self, FileAddError, StoredFile},
zip::FileSet,
AppState,
};
@ -20,8 +22,18 @@ const MAX_FILES: usize = 256;
const FILENAME_DATE_FORMAT: &[time::format_description::FormatItem] =
time::macros::format_description!("[year]-[month]-[day]-[hour][minute][second]");
fn sanitise(name: &str) -> String {
sanitise_file_name::sanitise(&name.nfd().collect::<String>())
/// Sanitises a filename after performing unicode normalization,
/// optionally reducing the length limit to leave space for an
/// extension yet to be added.
fn sanitise(name: &str, extension_length: usize) -> String {
let name = name.nfd().collect::<String>();
sanitise_with_options(
&name,
&SanOptions {
length_limit: SanOptions::DEFAULT.length_limit - extension_length,
..SanOptions::DEFAULT
},
)
}
#[derive(thiserror::Error, Debug)]
@ -108,7 +120,7 @@ pub struct UploadedFile {
impl UploadedFile {
fn new(name: &str, size: u64, modtime: OffsetDateTime) -> Self {
Self {
name: sanitise(name),
name: sanitise(name, 0),
size,
modtime,
}
@ -256,7 +268,7 @@ impl Uploader {
let mut file = raw_file.process();
while filenames.contains(&file.name) {
info!("Duplicate file name: {}", file.name);
if file.name.len() >= sanitise_file_name::Options::DEFAULT.length_limit {
if file.name.len() >= SanOptions::DEFAULT.length_limit {
return Err(Error::DuplicateFilename);
}
file.name.insert(0, '_');
@ -278,25 +290,28 @@ impl Uploader {
.write(true)
.create_new(true)
.open(&storage_path)?;
let (writer, name, size, modtime): (Box<dyn Write>, _, _, _) = if files.len() > 1 {
let (writer, name, size, modtime, contents): (Box<dyn Write>, _, _, _, _) = if files.len() > 1 {
info!("Wrapping in zipfile generator");
let now = OffsetDateTime::now_utc();
let zip_writer = super::zip::ZipGenerator::new(files.clone(), writer);
let size = zip_writer.total_size();
let download_filename = collection_name
.map(|f| sanitise(&(f + ".zip")))
.unwrap_or_else(|| {
super::APP_NAME.to_owned()
+ &now.format(FILENAME_DATE_FORMAT).unwrap()
+ ".zip"
let collection_name =
collection_name.map(|f| sanitise(&f, 4)).unwrap_or_else(|| {
super::APP_NAME.to_owned() + &now.format(FILENAME_DATE_FORMAT).unwrap()
});
(Box::new(zip_writer), download_filename, size, now)
let file_set = FileSet {
files,
directory_name: Some(collection_name.clone()),
};
let zip_writer =
super::zip::ZipGenerator::new(file_set.clone(), writer);
let size = zip_writer.total_size();
(Box::new(zip_writer), collection_name + ".zip", size, now, Some(file_set))
} else {
(
Box::new(writer),
files[0].name.clone(),
files[0].size,
files[0].modtime,
None
)
};
self.writer = Some(writer);
@ -305,7 +320,7 @@ impl Uploader {
size,
modtime,
expiry: OffsetDateTime::now_utc() + lifetime * time::Duration::DAY,
contents: if files.len() > 1 { Some(files) } else { None },
contents,
};
let state = self.app_state.clone();
let storage_filename = self.storage_filename.clone();

View File

@ -2,10 +2,13 @@ use std::io::Write;
use crc32fast::Hasher;
use log::debug;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use crate::upload::UploadedFile;
const SLASH: u8 = 0x2f;
const SIGNATURE_SIZE: u64 = 4;
const SHARED_FIELDS_SIZE: u64 = 26;
const EXTRA_FIELD_SIZE: u64 = 41;
@ -25,45 +28,67 @@ const EOCD_TOTAL_SIZE: u64 = EOCD64_RECORD_SIZE + EOCD64_LOCATOR_SIZE + EOCD_REC
const EMPTY_STRING_CRC32: u32 = 0;
fn file_entry_size(file: &UploadedFile) -> u64 {
FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE + file.name.len() as u64 + file.size
#[derive(Clone, Deserialize, Serialize)]
pub struct FileSet {
pub files: Vec<UploadedFile>,
// Optional for backwards compatibility only
pub directory_name: Option<String>,
}
fn file_entries_size(files: &[UploadedFile]) -> u64 {
impl From<Vec<UploadedFile>> for FileSet {
fn from(files: Vec<UploadedFile>) -> Self {
Self { files, directory_name: None }
}
}
fn full_file_name_len(file: &UploadedFile, directory_name: &Option<String>) -> u64 {
file.name.len() as u64 + if let Some(d) = directory_name {
d.len() as u64 + 1
} else {
0
}
}
fn file_entry_size(file: &UploadedFile, directory_name: &Option<String>) -> u64 {
FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE + full_file_name_len(file, directory_name) + file.size
}
fn file_entries_size(files: &FileSet, bound: Option<usize>) -> u64 {
let mut total = 0;
for file in files.iter() {
total += file_entry_size(file)
let fs = if let Some(n) = bound { &files.files[..n] } else { &files.files };
for file in fs.iter() {
total += file_entry_size(file, &files.directory_name)
}
total
}
pub fn file_data_offset(files: &[UploadedFile], idx: usize) -> u64 {
file_entries_size(&files[..idx])
pub fn file_data_offset(files: &FileSet, idx: usize) -> u64 {
file_entries_size(files, Some(idx))
+ LOCAL_HEADER_SIZE_MINUS_FILENAME
+ files[idx].name.len() as u64
+ full_file_name_len(&files.files[idx], &files.directory_name)
}
pub fn file_data_offsets(files: &[UploadedFile]) -> Vec<u64> {
pub fn file_data_offsets(files: &FileSet) -> Vec<u64> {
let mut offsets = Vec::new();
let mut offset: u64 = 0;
for file in files.iter() {
offset += LOCAL_HEADER_SIZE_MINUS_FILENAME + file.name.len() as u64;
for file in files.files.iter() {
offset += LOCAL_HEADER_SIZE_MINUS_FILENAME + full_file_name_len(file, &files.directory_name);
offsets.push(offset);
offset += file.size + DATA_DESCRIPTOR_SIZE;
}
offsets
}
fn central_directory_size(files: &[UploadedFile]) -> u64 {
fn central_directory_size(files: &FileSet) -> u64 {
let mut total = 0;
for file in files.iter() {
total += CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME + file.name.len() as u64;
for file in files.files.iter() {
total += CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME + full_file_name_len(file, &files.directory_name);
}
total
}
fn zipfile_size(files: &[UploadedFile]) -> u64 {
file_entries_size(files) + central_directory_size(files) + EOCD_TOTAL_SIZE
fn zipfile_size(files: &FileSet) -> u64 {
file_entries_size(files, None) + central_directory_size(files) + EOCD_TOTAL_SIZE
}
fn fat_timestamp(time: OffsetDateTime) -> u32 {
@ -100,7 +125,7 @@ impl UploadedFile {
/// Returns the fields shared by the ZIP local file header and
/// central directory file header - "Version needed to extract"
/// through "Extra field length".
fn shared_header_fields(&self, hash: Option<u32>) -> Vec<u8> {
fn shared_header_fields(&self, directory_name: &Option<String>, hash: Option<u32>) -> Vec<u8> {
let mut fields = vec![
45, 0, // Minimum version required to extract: 4.5 for ZIP64
0b00001000, // General purpose bit flag: bit 3 - size and CRC-32 in data descriptor
@ -112,7 +137,7 @@ impl UploadedFile {
append_value(&mut fields, hash.unwrap_or(0) as u64, 4);
// Placeholders for compressed and uncompressed size in ZIP64 record, 4 bytes each
append_ff(&mut fields, 8);
append_value(&mut fields, self.name.len() as u64, 2);
append_value(&mut fields, full_file_name_len(self, directory_name), 2);
// Extra field length: 32 bytes for zip64, 9 bytes for timestamp
fields.append(&mut vec![41, 0]);
fields
@ -142,24 +167,34 @@ impl UploadedFile {
field
}
fn local_header(&self, local_header_offset: u64) -> Vec<u8> {
fn full_name_bytes(&self, directory_name: &Option<String>) -> Vec<u8> {
let mut b = vec![];
if let Some(d) = directory_name {
b.append(&mut d.to_owned().into_bytes());
b.push(SLASH);
}
b.append(&mut self.name.clone().into_bytes());
b
}
fn local_header(&self, directory_name: &Option<String>, local_header_offset: u64) -> Vec<u8> {
let mut header = vec![0x50, 0x4b, 0x03, 0x04]; // Local file header signature
header.append(&mut self.shared_header_fields(None));
header.append(&mut self.name.clone().into_bytes());
header.append(&mut self.shared_header_fields(directory_name, None));
header.append(&mut self.full_name_bytes(directory_name));
header.append(&mut self.extra_field(local_header_offset));
header
}
fn central_directory_header(&self, local_header_offset: u64, hash: u32) -> Vec<u8> {
fn central_directory_header(&self, directory_name: &Option<String>, local_header_offset: u64, hash: u32) -> Vec<u8> {
let mut header = vec![
0x50, 0x4b, 0x01, 0x02, // Central directory file header signature
45, 3, // Made by a Unix system supporting version 4.5
];
header.append(&mut self.shared_header_fields(Some(hash)));
header.append(&mut self.shared_header_fields(directory_name, Some(hash)));
append_0(&mut header, 8); // Comment length, disk number, internal attributes, DOS external attributes
append_value(&mut header, 0o100644, 2); // Unix external file attributes: -rw-r--r--
append_ff(&mut header, 4); // Relative offset of local file header: placeholder, see ZIP64 data
header.append(&mut self.name.clone().into_bytes());
header.append(&mut self.full_name_bytes(directory_name));
header.append(&mut self.extra_field(local_header_offset));
header
}
@ -174,8 +209,8 @@ impl UploadedFile {
}
}
fn end_of_central_directory(files: &[UploadedFile]) -> Vec<u8> {
let entries_size = file_entries_size(files);
fn end_of_central_directory(files: &FileSet) -> Vec<u8> {
let entries_size = file_entries_size(files, None);
let directory_size = central_directory_size(files);
let mut eocd = vec![
@ -188,9 +223,10 @@ fn end_of_central_directory(files: &[UploadedFile]) -> Vec<u8> {
45, 0, // Minimum version 4.5 to extract
]);
append_0(&mut eocd, 8); // Two 4-byte disk numbers, both 0
// Number of central directory records, on this disk and in total
append_value(&mut eocd, files.len() as u64, 8);
append_value(&mut eocd, files.len() as u64, 8);
// Number of central directory records, on this disk and in total
append_value(&mut eocd, files.files.len() as u64, 8);
append_value(&mut eocd, files.files.len() as u64, 8);
append_value(&mut eocd, directory_size, 8);
append_value(&mut eocd, entries_size, 8); // Offset of start of central directory
@ -207,7 +243,7 @@ fn end_of_central_directory(files: &[UploadedFile]) -> Vec<u8> {
}
pub struct ZipGenerator<W: Write> {
files: Vec<UploadedFile>,
files: FileSet,
file_index: usize,
byte_index: u64,
pending_metadata: Vec<u8>,
@ -217,7 +253,7 @@ pub struct ZipGenerator<W: Write> {
}
impl<W: Write> ZipGenerator<W> {
pub fn new(files: Vec<UploadedFile>, output: W) -> Self {
pub fn new(files: FileSet, output: W) -> Self {
let mut result = Self {
files,
file_index: 0,
@ -239,39 +275,39 @@ impl<W: Write> ZipGenerator<W> {
let hash = std::mem::replace(&mut self.hasher, Hasher::new()).finalize();
self.hashes.push(hash);
self.pending_metadata
.append(&mut self.files[self.file_index].data_descriptor(hash));
.append(&mut self.files.files[self.file_index].data_descriptor(hash));
debug!(
"Finishing file entry in zipfile: {}, hash {:x}",
self.files[self.file_index].name, hash
self.files.files[self.file_index].name, hash
);
self.file_index += 1;
self.start_new_file();
}
fn start_new_file(&mut self) {
let mut offset = file_entries_size(&self.files[..self.file_index]);
while self.file_index < self.files.len() && self.files[self.file_index].size == 0 {
let mut offset = file_entries_size(&self.files, Some(self.file_index));
while self.file_index < self.files.files.len() && self.files.files[self.file_index].size == 0 {
debug!(
"Empty file entry in zipfile: {}",
self.files[self.file_index].name
self.files.files[self.file_index].name
);
self.hashes.push(EMPTY_STRING_CRC32);
let mut local_header = self.files[self.file_index].local_header(offset);
let mut local_header = self.files.files[self.file_index].local_header(&self.files.directory_name, offset);
let mut data_descriptor =
self.files[self.file_index].data_descriptor(EMPTY_STRING_CRC32);
self.files.files[self.file_index].data_descriptor(EMPTY_STRING_CRC32);
offset += local_header.len() as u64 + data_descriptor.len() as u64;
self.file_index += 1;
self.pending_metadata.append(&mut local_header);
self.pending_metadata.append(&mut data_descriptor);
}
if self.file_index < self.files.len() {
if self.file_index < self.files.files.len() {
debug!(
"Starting file entry in zipfile: {}",
self.files[self.file_index].name
self.files.files[self.file_index].name
);
self.byte_index = 0;
self.pending_metadata
.append(&mut self.files[self.file_index].local_header(offset));
.append(&mut self.files.files[self.file_index].local_header(&self.files.directory_name, offset));
} else {
self.finish_zipfile();
}
@ -280,14 +316,14 @@ impl<W: Write> ZipGenerator<W> {
fn finish_zipfile(&mut self) {
debug!("Writing zipfile central directory");
let mut offset = 0;
for (i, file) in self.files.iter().enumerate() {
for (i, file) in self.files.files.iter().enumerate() {
debug!(
"Writing central directory entry: {}, hash {}",
file.name, self.hashes[i]
);
self.pending_metadata
.append(&mut file.central_directory_header(offset, self.hashes[i]));
offset += file_entry_size(file);
.append(&mut file.central_directory_header(&self.files.directory_name, offset, self.hashes[i]));
offset += file_entry_size(file, &self.files.directory_name);
}
debug!("Writing end of central directory");
self.pending_metadata
@ -308,10 +344,10 @@ impl<W: Write> Write for ZipGenerator<W> {
}
}
}
if self.file_index >= self.files.len() {
if self.file_index >= self.files.files.len() {
return Ok(0);
}
let bytes_remaining = self.files[self.file_index].size - self.byte_index;
let bytes_remaining = self.files.files[self.file_index].size - self.byte_index;
if bytes_remaining < (buf.len() as u64) {
buf = &buf[..bytes_remaining as usize];
}
@ -347,7 +383,8 @@ mod tests {
fn test_no_files() {
let mut output: Vec<u8> = vec![];
{
let mut zipgen = ZipGenerator::new(vec![], Box::new(std::io::Cursor::new(&mut output)));
let mut zipgen =
ZipGenerator::new(FileSet { files: vec![], directory_name: "test".to_owned() }, Box::new(std::io::Cursor::new(&mut output)));
zipgen.write_all(&[]).unwrap();
zipgen.flush().unwrap();
}

View File

@ -7,7 +7,7 @@
{% let formatted_total_size = bytesize::to_string(info.file.size.clone(), false).replace(" ", "") -%}
{% match info.file.contents -%}
{% when Some with (files) -%}
{{ files.len() }} files, {{ formatted_total_size }} total
{{ files.files.len() }} files, {{ formatted_total_size }} total
{%- else -%}
{{ formatted_total_size }}
{%- endmatch %}, expires {{ info.file.expiry.format(DATE_DISPLAY_FORMAT).unwrap() }}
@ -37,7 +37,7 @@
<summary>Show file list</summary>
<table><tbody>
{% let offsets = info.offsets.as_ref().unwrap() %}
{% for f in files %}
{% for f in files.files %}
<tr class="{% if offsets.get(loop.index0.clone()).unwrap().clone() > info.available %}unavailable{% endif %}">
<td class="file_size">{{ bytesize::to_string(f.size.clone(), false).replace(" ", "") }}</td>
<td class="file_name">{{ f.name }}</td>