2022-04-26 23:54:29 -04:00
|
|
|
use std::io::Write;
|
|
|
|
use std::task::Waker;
|
|
|
|
|
|
|
|
use crc32fast::Hasher;
|
|
|
|
use log::debug;
|
|
|
|
use time::OffsetDateTime;
|
|
|
|
|
|
|
|
use crate::file::LiveWriter;
|
2022-04-27 00:55:36 -04:00
|
|
|
use crate::UploadedFile;
|
2022-04-26 23:54:29 -04:00
|
|
|
|
|
|
|
const SIGNATURE_SIZE: usize = 4;
|
|
|
|
const SHARED_FIELDS_SIZE: usize = 26;
|
|
|
|
const EXTRA_FIELD_SIZE: usize = 41;
|
2022-04-27 00:55:36 -04:00
|
|
|
const LOCAL_HEADER_SIZE_MINUS_FILENAME: usize =
|
|
|
|
SIGNATURE_SIZE + SHARED_FIELDS_SIZE + EXTRA_FIELD_SIZE;
|
2022-04-26 23:54:29 -04:00
|
|
|
const DATA_DESCRIPTOR_SIZE: usize = 24;
|
2022-04-27 00:55:36 -04:00
|
|
|
const FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE: usize =
|
|
|
|
LOCAL_HEADER_SIZE_MINUS_FILENAME + DATA_DESCRIPTOR_SIZE;
|
2022-04-26 23:54:29 -04:00
|
|
|
|
2022-04-27 00:55:36 -04:00
|
|
|
const CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME: usize =
|
|
|
|
SIGNATURE_SIZE + 2 + SHARED_FIELDS_SIZE + 14 + EXTRA_FIELD_SIZE;
|
2022-04-26 23:54:29 -04:00
|
|
|
|
|
|
|
const EOCD64_RECORD_SIZE: usize = 56;
|
|
|
|
const EOCD64_LOCATOR_SIZE: usize = 20;
|
|
|
|
const EOCD_RECORD_SIZE: usize = 22;
|
|
|
|
const EOCD_TOTAL_SIZE: usize = EOCD64_RECORD_SIZE + EOCD64_LOCATOR_SIZE + EOCD_RECORD_SIZE;
|
|
|
|
|
|
|
|
const EMPTY_STRING_CRC32: u32 = 0;
|
|
|
|
|
|
|
|
fn file_entry_size(file: &UploadedFile) -> usize {
|
|
|
|
FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE + file.name.len() + file.size
|
|
|
|
}
|
|
|
|
|
|
|
|
fn file_entries_size(files: &[UploadedFile]) -> usize {
|
|
|
|
let mut total = 0;
|
|
|
|
for file in files.iter() {
|
|
|
|
total += file_entry_size(file)
|
|
|
|
}
|
|
|
|
total
|
|
|
|
}
|
|
|
|
|
|
|
|
fn central_directory_size(files: &[UploadedFile]) -> usize {
|
|
|
|
let mut total = 0;
|
|
|
|
for file in files.iter() {
|
|
|
|
total += CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME + file.name.len();
|
|
|
|
}
|
|
|
|
total
|
|
|
|
}
|
|
|
|
|
|
|
|
fn zipfile_size(files: &[UploadedFile]) -> usize {
|
|
|
|
file_entries_size(files) + central_directory_size(files) + EOCD_TOTAL_SIZE
|
|
|
|
}
|
|
|
|
|
|
|
|
fn fat_timestamp(time: OffsetDateTime) -> u32 {
|
|
|
|
(((time.year() - 1980) as u32) << 25)
|
|
|
|
| ((time.month() as u32) << 21)
|
|
|
|
| ((time.day() as u32) << 16)
|
|
|
|
| ((time.hour() as u32) << 11)
|
|
|
|
| ((time.minute() as u32) << 5)
|
|
|
|
| ((time.second() as u32) >> 1)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Append a value to a byte vector as little-endian bytes
|
|
|
|
fn append_value(data: &mut Vec<u8>, mut value: u64, len: usize) {
|
2022-04-27 00:55:36 -04:00
|
|
|
data.resize_with(data.len() + len, || {
|
|
|
|
let byte = value as u8;
|
|
|
|
value >>= 8;
|
|
|
|
byte
|
|
|
|
});
|
2022-04-26 23:54:29 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
fn append_repeated_byte(data: &mut Vec<u8>, byte: u8, count: usize) {
|
|
|
|
data.resize(data.len() + count, byte);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn append_0(data: &mut Vec<u8>, count: usize) {
|
|
|
|
append_repeated_byte(data, 0, count);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn append_ff(data: &mut Vec<u8>, count: usize) {
|
|
|
|
append_repeated_byte(data, 0xff, count);
|
|
|
|
}
|
|
|
|
|
|
|
|
impl UploadedFile {
|
|
|
|
/// Returns the fields shared by the ZIP local file header and
|
|
|
|
/// central directory file header - "Version needed to extract"
|
|
|
|
/// through "Extra field length".
|
|
|
|
fn shared_header_fields(&self, hash: Option<u32>) -> Vec<u8> {
|
|
|
|
let mut fields = vec![
|
2022-04-27 00:55:36 -04:00
|
|
|
45, 0, // Minimum version required to extract: 4.5 for ZIP64 extensions
|
|
|
|
0b00001000, 0, // General purpose bit flag: size and CRC-32 in data descriptor
|
|
|
|
0, 0, // Compression method: none
|
2022-04-26 23:54:29 -04:00
|
|
|
];
|
|
|
|
append_value(&mut fields, fat_timestamp(self.modtime) as u64, 4);
|
|
|
|
// Use 0s as a placeholder if the CRC-32 hash isn't known yet
|
|
|
|
append_value(&mut fields, hash.unwrap_or(0) as u64, 4);
|
|
|
|
// Placeholders for compressed and uncompressed size in ZIP64 record, 4 bytes each
|
|
|
|
append_ff(&mut fields, 8);
|
|
|
|
append_value(&mut fields, self.name.len() as u64, 2);
|
|
|
|
// Extra field length: 32 bytes for zip64, 9 bytes for timestamp
|
|
|
|
fields.append(&mut vec![41, 0]);
|
|
|
|
fields
|
|
|
|
}
|
|
|
|
|
|
|
|
fn extra_field(&self, local_header_offset: usize) -> Vec<u8> {
|
|
|
|
let mut field = vec![
|
2022-04-27 00:55:36 -04:00
|
|
|
0x01, 0x00, // Zip64 extended information
|
|
|
|
28, 0, // 28 bytes of data
|
2022-04-26 23:54:29 -04:00
|
|
|
];
|
|
|
|
// Original size and compressed size - if this is in the local
|
|
|
|
// header, we're supposed to leave these blank and point to
|
|
|
|
// the data descriptor, but I'm assuming it won't hurt to fill
|
|
|
|
// them in regardless
|
|
|
|
append_value(&mut field, self.size as u64, 8);
|
|
|
|
append_value(&mut field, self.size as u64, 8);
|
|
|
|
append_value(&mut field, local_header_offset as u64, 8);
|
|
|
|
append_0(&mut field, 4); // File starts on disk 0, there's no other disk
|
|
|
|
|
|
|
|
field.append(&mut vec![
|
|
|
|
0x55, 0x54, // Extended timestamp
|
2022-04-27 00:55:36 -04:00
|
|
|
5, 0, // 5 bytes of data
|
2022-04-26 23:54:29 -04:00
|
|
|
0b00000001, // Flags: Only modification time is present
|
|
|
|
]);
|
|
|
|
append_value(&mut field, self.modtime.unix_timestamp() as u64, 4);
|
|
|
|
|
|
|
|
field
|
|
|
|
}
|
|
|
|
|
|
|
|
fn local_header(&self, local_header_offset: usize) -> Vec<u8> {
|
|
|
|
let mut header = vec![0x50, 0x4b, 0x03, 0x04]; // Local file header signature
|
|
|
|
header.append(&mut self.shared_header_fields(None));
|
|
|
|
header.append(&mut self.name.clone().into_bytes());
|
|
|
|
header.append(&mut self.extra_field(local_header_offset));
|
|
|
|
header
|
|
|
|
}
|
|
|
|
|
|
|
|
fn central_directory_header(&self, local_header_offset: usize, hash: u32) -> Vec<u8> {
|
|
|
|
let mut header = vec![
|
2022-04-27 00:28:26 -04:00
|
|
|
// Central directory file header signature
|
|
|
|
0x50, 0x4b, 0x01, 0x02,
|
|
|
|
// Made by a "DOS" system supporting version 4.5 - if we
|
|
|
|
// say it's made by a Unix system, then unzip will expect
|
|
|
|
// it to have embedded Unix permission information, and
|
|
|
|
// will set all the files to mode 000 when that's not
|
|
|
|
// present :/
|
|
|
|
45, 0,
|
2022-04-26 23:54:29 -04:00
|
|
|
];
|
|
|
|
header.append(&mut self.shared_header_fields(Some(hash)));
|
|
|
|
header.append(&mut vec![
|
2022-04-27 00:55:36 -04:00
|
|
|
0, 0, // File comment length: 0
|
|
|
|
0, 0, // Disk number where file starts: 0
|
|
|
|
0, 0, // Internal file attributes: nothing
|
2022-04-26 23:54:29 -04:00
|
|
|
0, 0, 0, 0, // External file attributes: nothing
|
2022-04-27 00:55:36 -04:00
|
|
|
0xff, 0xff, 0xff,
|
|
|
|
0xff, // Relative offset of local file header: placeholder, see ZIP64 data
|
2022-04-26 23:54:29 -04:00
|
|
|
]);
|
|
|
|
header.append(&mut self.name.clone().into_bytes());
|
|
|
|
header.append(&mut self.extra_field(local_header_offset));
|
|
|
|
header
|
|
|
|
}
|
|
|
|
|
|
|
|
fn data_descriptor(&self, hash: u32) -> Vec<u8> {
|
|
|
|
let mut descriptor = vec![0x50, 0x4b, 0x07, 0x08]; // Data descriptor signature
|
|
|
|
append_value(&mut descriptor, hash as u64, 4);
|
|
|
|
// Compressed and uncompressed sizes
|
|
|
|
append_value(&mut descriptor, self.size as u64, 8);
|
|
|
|
append_value(&mut descriptor, self.size as u64, 8);
|
|
|
|
descriptor
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn end_of_central_directory(files: &[UploadedFile]) -> Vec<u8> {
|
|
|
|
let entries_size = file_entries_size(files) as u64;
|
|
|
|
let directory_size = central_directory_size(files) as u64;
|
|
|
|
|
|
|
|
let mut eocd = vec![
|
|
|
|
0x50, 0x4b, 0x06, 0x06, // EOCD64 record signature
|
2022-04-27 00:55:36 -04:00
|
|
|
44, // Size of remaining EOCD64 record
|
2022-04-26 23:54:29 -04:00
|
|
|
];
|
|
|
|
append_0(&mut eocd, 7); // pad out the rest of the size field
|
|
|
|
eocd.append(&mut vec![
|
2022-04-27 00:28:26 -04:00
|
|
|
45, 0, // Made by a "DOS" system supporting version 4.5, see above
|
2022-04-26 23:54:29 -04:00
|
|
|
45, 0, // Minimum version 4.5 to extract
|
|
|
|
]);
|
|
|
|
append_0(&mut eocd, 8); // Two 4-byte disk numbers, both 0
|
2022-04-27 00:55:36 -04:00
|
|
|
// Number of central directory records, on this disk and in total
|
2022-04-26 23:54:29 -04:00
|
|
|
append_value(&mut eocd, files.len() as u64, 8);
|
|
|
|
append_value(&mut eocd, files.len() as u64, 8);
|
|
|
|
append_value(&mut eocd, directory_size, 8);
|
|
|
|
append_value(&mut eocd, entries_size, 8); // Offset of start of central directory
|
|
|
|
|
|
|
|
eocd.append(&mut vec![0x50, 0x4b, 0x06, 0x07]); // EOCD64 locator signature
|
|
|
|
append_0(&mut eocd, 4); // disk number
|
|
|
|
append_value(&mut eocd, entries_size + directory_size, 8); // EOCD64 record offset
|
|
|
|
append_0(&mut eocd, 4); // total number of disks;
|
|
|
|
|
|
|
|
eocd.append(&mut vec![0x50, 0x4b, 0x05, 0x06]); // EOCD record signature
|
|
|
|
append_ff(&mut eocd, 16); // Zip64 placeholders for disk numbers, record counts, and offsets
|
|
|
|
append_0(&mut eocd, 2); // Comment length: 0
|
|
|
|
|
|
|
|
eocd
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct ZipGenerator<'a> {
|
|
|
|
files: Vec<UploadedFile>,
|
|
|
|
file_index: usize,
|
|
|
|
byte_index: usize,
|
|
|
|
pending_metadata: Vec<u8>,
|
|
|
|
hasher: Hasher,
|
|
|
|
hashes: Vec<u32>,
|
2022-04-27 00:55:36 -04:00
|
|
|
output: Box<dyn LiveWriter + 'a>,
|
2022-04-26 23:54:29 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> ZipGenerator<'a> {
|
|
|
|
pub fn new(files: Vec<UploadedFile>, output: Box<dyn LiveWriter + 'a>) -> Self {
|
|
|
|
let mut result = Self {
|
|
|
|
files,
|
|
|
|
file_index: 0,
|
|
|
|
byte_index: 0,
|
|
|
|
pending_metadata: vec![],
|
|
|
|
hasher: Hasher::new(),
|
|
|
|
hashes: vec![],
|
|
|
|
output,
|
|
|
|
};
|
|
|
|
result.start_new_file();
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn total_size(&self) -> usize {
|
|
|
|
zipfile_size(&self.files)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn finish_file(&mut self) {
|
|
|
|
let hash = std::mem::replace(&mut self.hasher, Hasher::new()).finalize();
|
|
|
|
self.hashes.push(hash);
|
2022-04-27 00:55:36 -04:00
|
|
|
self.pending_metadata
|
|
|
|
.append(&mut self.files[self.file_index].data_descriptor(hash));
|
|
|
|
debug!(
|
|
|
|
"Finishing file entry in zipfile: {}, hash {}",
|
|
|
|
self.files[self.file_index].name, hash
|
|
|
|
);
|
2022-04-26 23:54:29 -04:00
|
|
|
self.file_index += 1;
|
|
|
|
self.start_new_file();
|
|
|
|
}
|
|
|
|
|
|
|
|
fn start_new_file(&mut self) {
|
|
|
|
let mut offset = file_entries_size(&self.files[..self.file_index]);
|
|
|
|
while self.file_index < self.files.len() && self.files[self.file_index].size == 0 {
|
2022-04-27 00:55:36 -04:00
|
|
|
debug!(
|
|
|
|
"Empty file entry in zipfile: {}",
|
|
|
|
self.files[self.file_index].name
|
|
|
|
);
|
2022-04-26 23:54:29 -04:00
|
|
|
self.hashes.push(EMPTY_STRING_CRC32);
|
|
|
|
let mut local_header = self.files[self.file_index].local_header(offset);
|
2022-04-27 00:55:36 -04:00
|
|
|
let mut data_descriptor =
|
|
|
|
self.files[self.file_index].data_descriptor(EMPTY_STRING_CRC32);
|
2022-04-26 23:54:29 -04:00
|
|
|
offset += local_header.len() + data_descriptor.len();
|
|
|
|
self.file_index += 1;
|
|
|
|
self.pending_metadata.append(&mut local_header);
|
|
|
|
self.pending_metadata.append(&mut data_descriptor);
|
|
|
|
}
|
|
|
|
if self.file_index < self.files.len() {
|
2022-04-27 00:55:36 -04:00
|
|
|
debug!(
|
|
|
|
"Starting file entry in zipfile: {}",
|
|
|
|
self.files[self.file_index].name
|
|
|
|
);
|
2022-04-26 23:54:29 -04:00
|
|
|
self.byte_index = 0;
|
2022-04-27 00:55:36 -04:00
|
|
|
self.pending_metadata
|
|
|
|
.append(&mut self.files[self.file_index].local_header(offset));
|
2022-04-26 23:54:29 -04:00
|
|
|
} else {
|
|
|
|
self.finish_zipfile();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn finish_zipfile(&mut self) {
|
|
|
|
debug!("Writing zipfile central directory");
|
|
|
|
let mut offset = 0;
|
|
|
|
for (i, file) in self.files.iter().enumerate() {
|
2022-04-27 00:55:36 -04:00
|
|
|
debug!(
|
|
|
|
"Writing central directory entry: {}, hash {}",
|
|
|
|
file.name, self.hashes[i]
|
|
|
|
);
|
|
|
|
self.pending_metadata
|
|
|
|
.append(&mut file.central_directory_header(offset, self.hashes[i]));
|
2022-04-26 23:54:29 -04:00
|
|
|
offset += file_entry_size(file);
|
|
|
|
}
|
|
|
|
debug!("Writing end of central directory");
|
2022-04-27 00:55:36 -04:00
|
|
|
self.pending_metadata
|
|
|
|
.append(&mut end_of_central_directory(&self.files));
|
2022-04-26 23:54:29 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> LiveWriter for ZipGenerator<'a> {
|
|
|
|
fn add_waker(&mut self, waker: Waker) {
|
|
|
|
self.output.add_waker(waker);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Write for ZipGenerator<'a> {
|
|
|
|
fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {
|
|
|
|
while !self.pending_metadata.is_empty() {
|
|
|
|
let result = self.output.write(self.pending_metadata.as_slice());
|
|
|
|
match result {
|
2022-04-27 00:55:36 -04:00
|
|
|
Ok(0) | Err(_) => {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
Ok(n) => {
|
|
|
|
self.pending_metadata.drain(..n);
|
|
|
|
}
|
2022-04-26 23:54:29 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if self.file_index >= self.files.len() {
|
|
|
|
return Ok(0);
|
|
|
|
}
|
|
|
|
let bytes_remaining = self.files[self.file_index].size - self.byte_index;
|
|
|
|
if bytes_remaining < buf.len() {
|
|
|
|
buf = &buf[..bytes_remaining];
|
|
|
|
}
|
|
|
|
let result = self.output.write(buf);
|
|
|
|
match result {
|
|
|
|
Ok(0) | Err(_) => (),
|
|
|
|
Ok(n) => {
|
|
|
|
self.hasher.update(&buf[..n]);
|
|
|
|
self.byte_index += n;
|
|
|
|
if n == bytes_remaining {
|
|
|
|
self.finish_file();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
fn flush(&mut self) -> std::io::Result<()> {
|
|
|
|
debug!("Flushing zipfile writer");
|
|
|
|
if !self.pending_metadata.is_empty() {
|
|
|
|
self.output.write_all(self.pending_metadata.as_slice())?;
|
|
|
|
self.pending_metadata.clear();
|
|
|
|
}
|
|
|
|
self.output.flush()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
fn test_no_files() {
|
|
|
|
let mut output: Vec<u8> = vec![];
|
|
|
|
{
|
|
|
|
let mut zipgen = ZipGenerator::new(vec![], Box::new(std::io::Cursor::new(&mut output)));
|
|
|
|
zipgen.write_all(&[]).unwrap();
|
|
|
|
zipgen.flush().unwrap();
|
|
|
|
}
|
|
|
|
|
|
|
|
eprintln!("{:?}", &output);
|
|
|
|
{
|
|
|
|
let mut reader = std::io::BufReader::new(output.as_slice());
|
|
|
|
let zipfile = zip::read::read_zipfile_from_stream(&mut reader).unwrap();
|
|
|
|
assert!(zipfile.is_none());
|
|
|
|
}
|
|
|
|
let archive = zip::ZipArchive::new(std::io::Cursor::new(output)).unwrap();
|
|
|
|
assert!(archive.is_empty());
|
|
|
|
}
|
|
|
|
}
|