use std::io::Write; use std::task::Waker; use crc32fast::Hasher; use log::debug; use time::OffsetDateTime; use crate::UploadedFile; use crate::file::LiveWriter; const SIGNATURE_SIZE: usize = 4; const SHARED_FIELDS_SIZE: usize = 26; const EXTRA_FIELD_SIZE: usize = 41; const LOCAL_HEADER_SIZE_MINUS_FILENAME: usize = SIGNATURE_SIZE + SHARED_FIELDS_SIZE + EXTRA_FIELD_SIZE; const DATA_DESCRIPTOR_SIZE: usize = 24; const FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE: usize = LOCAL_HEADER_SIZE_MINUS_FILENAME + DATA_DESCRIPTOR_SIZE; const CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME: usize = SIGNATURE_SIZE + 2 + SHARED_FIELDS_SIZE + 14 + EXTRA_FIELD_SIZE; const EOCD64_RECORD_SIZE: usize = 56; const EOCD64_LOCATOR_SIZE: usize = 20; const EOCD_RECORD_SIZE: usize = 22; const EOCD_TOTAL_SIZE: usize = EOCD64_RECORD_SIZE + EOCD64_LOCATOR_SIZE + EOCD_RECORD_SIZE; const EMPTY_STRING_CRC32: u32 = 0; fn file_entry_size(file: &UploadedFile) -> usize { FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE + file.name.len() + file.size } fn file_entries_size(files: &[UploadedFile]) -> usize { let mut total = 0; for file in files.iter() { total += file_entry_size(file) } total } fn central_directory_size(files: &[UploadedFile]) -> usize { let mut total = 0; for file in files.iter() { total += CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME + file.name.len(); } total } fn zipfile_size(files: &[UploadedFile]) -> usize { file_entries_size(files) + central_directory_size(files) + EOCD_TOTAL_SIZE } fn fat_timestamp(time: OffsetDateTime) -> u32 { (((time.year() - 1980) as u32) << 25) | ((time.month() as u32) << 21) | ((time.day() as u32) << 16) | ((time.hour() as u32) << 11) | ((time.minute() as u32) << 5) | ((time.second() as u32) >> 1) } /// Append a value to a byte vector as little-endian bytes fn append_value(data: &mut Vec, mut value: u64, len: usize) { data.resize_with(data.len() + len, || { let byte = value as u8; value >>= 8; byte }); } fn append_repeated_byte(data: &mut Vec, byte: u8, count: usize) { data.resize(data.len() + count, byte); } fn append_0(data: &mut Vec, count: usize) { append_repeated_byte(data, 0, count); } fn append_ff(data: &mut Vec, count: usize) { append_repeated_byte(data, 0xff, count); } impl UploadedFile { /// Returns the fields shared by the ZIP local file header and /// central directory file header - "Version needed to extract" /// through "Extra field length". fn shared_header_fields(&self, hash: Option) -> Vec { let mut fields = vec![ 45, 0, // Minimum version required to extract: 4.5 for ZIP64 extensions 0b00001000, 0, // General purpose bit flag: size and CRC-32 in data descriptor 0, 0, // Compression method: none ]; append_value(&mut fields, fat_timestamp(self.modtime) as u64, 4); // Use 0s as a placeholder if the CRC-32 hash isn't known yet append_value(&mut fields, hash.unwrap_or(0) as u64, 4); // Placeholders for compressed and uncompressed size in ZIP64 record, 4 bytes each append_ff(&mut fields, 8); append_value(&mut fields, self.name.len() as u64, 2); // Extra field length: 32 bytes for zip64, 9 bytes for timestamp fields.append(&mut vec![41, 0]); fields } fn extra_field(&self, local_header_offset: usize) -> Vec { let mut field = vec![ 0x01, 0x00, // Zip64 extended information 28, 0, // 28 bytes of data ]; // Original size and compressed size - if this is in the local // header, we're supposed to leave these blank and point to // the data descriptor, but I'm assuming it won't hurt to fill // them in regardless append_value(&mut field, self.size as u64, 8); append_value(&mut field, self.size as u64, 8); append_value(&mut field, local_header_offset as u64, 8); append_0(&mut field, 4); // File starts on disk 0, there's no other disk field.append(&mut vec![ 0x55, 0x54, // Extended timestamp 5, 0, // 5 bytes of data 0b00000001, // Flags: Only modification time is present ]); append_value(&mut field, self.modtime.unix_timestamp() as u64, 4); field } fn local_header(&self, local_header_offset: usize) -> Vec { let mut header = vec![0x50, 0x4b, 0x03, 0x04]; // Local file header signature header.append(&mut self.shared_header_fields(None)); header.append(&mut self.name.clone().into_bytes()); header.append(&mut self.extra_field(local_header_offset)); header } fn central_directory_header(&self, local_header_offset: usize, hash: u32) -> Vec { let mut header = vec![ 0x50, 0x4b, 0x01, 0x02, // Central directory file header signature 45, 3, // Made by a Unix system supporting version 4.5 ]; header.append(&mut self.shared_header_fields(Some(hash))); header.append(&mut vec![ 0, 0, // File comment length: 0 0, 0, // Disk number where file starts: 0 0, 0, // Internal file attributes: nothing 0, 0, 0, 0, // External file attributes: nothing 0xff, 0xff, 0xff, 0xff, // Relative offset of local file header: placeholder, see ZIP64 data ]); header.append(&mut self.name.clone().into_bytes()); header.append(&mut self.extra_field(local_header_offset)); header } fn data_descriptor(&self, hash: u32) -> Vec { let mut descriptor = vec![0x50, 0x4b, 0x07, 0x08]; // Data descriptor signature append_value(&mut descriptor, hash as u64, 4); // Compressed and uncompressed sizes append_value(&mut descriptor, self.size as u64, 8); append_value(&mut descriptor, self.size as u64, 8); descriptor } } fn end_of_central_directory(files: &[UploadedFile]) -> Vec { let entries_size = file_entries_size(files) as u64; let directory_size = central_directory_size(files) as u64; let mut eocd = vec![ 0x50, 0x4b, 0x06, 0x06, // EOCD64 record signature 44, // Size of remaining EOCD64 record ]; append_0(&mut eocd, 7); // pad out the rest of the size field eocd.append(&mut vec![ 45, 3, // Made by a Unix system supporting version 4.5 45, 0, // Minimum version 4.5 to extract ]); append_0(&mut eocd, 8); // Two 4-byte disk numbers, both 0 // Number of central directory records, on this disk and in total append_value(&mut eocd, files.len() as u64, 8); append_value(&mut eocd, files.len() as u64, 8); append_value(&mut eocd, directory_size, 8); append_value(&mut eocd, entries_size, 8); // Offset of start of central directory eocd.append(&mut vec![0x50, 0x4b, 0x06, 0x07]); // EOCD64 locator signature append_0(&mut eocd, 4); // disk number append_value(&mut eocd, entries_size + directory_size, 8); // EOCD64 record offset append_0(&mut eocd, 4); // total number of disks; eocd.append(&mut vec![0x50, 0x4b, 0x05, 0x06]); // EOCD record signature append_ff(&mut eocd, 16); // Zip64 placeholders for disk numbers, record counts, and offsets append_0(&mut eocd, 2); // Comment length: 0 eocd } pub struct ZipGenerator<'a> { files: Vec, file_index: usize, byte_index: usize, pending_metadata: Vec, hasher: Hasher, hashes: Vec, output: Box } impl<'a> ZipGenerator<'a> { pub fn new(files: Vec, output: Box) -> Self { let mut result = Self { files, file_index: 0, byte_index: 0, pending_metadata: vec![], hasher: Hasher::new(), hashes: vec![], output, }; result.start_new_file(); result } pub fn total_size(&self) -> usize { zipfile_size(&self.files) } fn finish_file(&mut self) { let hash = std::mem::replace(&mut self.hasher, Hasher::new()).finalize(); self.hashes.push(hash); self.pending_metadata.append(&mut self.files[self.file_index].data_descriptor(hash)); debug!("Finishing file entry in zipfile: {}, hash {}", self.files[self.file_index].name, hash); self.file_index += 1; self.start_new_file(); } fn start_new_file(&mut self) { let mut offset = file_entries_size(&self.files[..self.file_index]); while self.file_index < self.files.len() && self.files[self.file_index].size == 0 { debug!("Empty file entry in zipfile: {}", self.files[self.file_index].name); self.hashes.push(EMPTY_STRING_CRC32); let mut local_header = self.files[self.file_index].local_header(offset); let mut data_descriptor = self.files[self.file_index].data_descriptor(EMPTY_STRING_CRC32); offset += local_header.len() + data_descriptor.len(); self.file_index += 1; self.pending_metadata.append(&mut local_header); self.pending_metadata.append(&mut data_descriptor); } if self.file_index < self.files.len() { debug!("Starting file entry in zipfile: {}", self.files[self.file_index].name); self.byte_index = 0; self.pending_metadata.append(&mut self.files[self.file_index].local_header(offset)); } else { self.finish_zipfile(); } } fn finish_zipfile(&mut self) { debug!("Writing zipfile central directory"); let mut offset = 0; for (i, file) in self.files.iter().enumerate() { debug!("Writing central directory entry: {}, hash {}", file.name, self.hashes[i]); self.pending_metadata.append(&mut file.central_directory_header(offset, self.hashes[i])); offset += file_entry_size(file); } debug!("Writing end of central directory"); self.pending_metadata.append(&mut end_of_central_directory(&self.files)); } } impl<'a> LiveWriter for ZipGenerator<'a> { fn add_waker(&mut self, waker: Waker) { self.output.add_waker(waker); } } impl<'a> Write for ZipGenerator<'a> { fn write(&mut self, mut buf: &[u8]) -> std::io::Result { while !self.pending_metadata.is_empty() { let result = self.output.write(self.pending_metadata.as_slice()); match result { Ok(0) | Err(_) => { return result; } Ok(n) => { self.pending_metadata.drain(..n); } } } if self.file_index >= self.files.len() { return Ok(0); } let bytes_remaining = self.files[self.file_index].size - self.byte_index; if bytes_remaining < buf.len() { buf = &buf[..bytes_remaining]; } let result = self.output.write(buf); match result { Ok(0) | Err(_) => (), Ok(n) => { self.hasher.update(&buf[..n]); self.byte_index += n; if n == bytes_remaining { self.finish_file(); } } } result } fn flush(&mut self) -> std::io::Result<()> { debug!("Flushing zipfile writer"); if !self.pending_metadata.is_empty() { self.output.write_all(self.pending_metadata.as_slice())?; self.pending_metadata.clear(); } self.output.flush() } } #[cfg(test)] mod tests { use super::*; fn test_no_files() { let mut output: Vec = vec![]; { let mut zipgen = ZipGenerator::new(vec![], Box::new(std::io::Cursor::new(&mut output))); zipgen.write_all(&[]).unwrap(); zipgen.flush().unwrap(); } eprintln!("{:?}", &output); { let mut reader = std::io::BufReader::new(output.as_slice()); let zipfile = zip::read::read_zipfile_from_stream(&mut reader).unwrap(); assert!(zipfile.is_none()); } let archive = zip::ZipArchive::new(std::io::Cursor::new(output)).unwrap(); assert!(archive.is_empty()); } }