use std::io::Write; use crc32fast::Hasher; use log::debug; use time::OffsetDateTime; use crate::upload::UploadedFile; const SLASH: u8 = 0x2f; const SIGNATURE_SIZE: u64 = 4; const SHARED_FIELDS_SIZE: u64 = 26; const EXTRA_FIELD_SIZE: u64 = 41; const LOCAL_HEADER_SIZE_MINUS_FILENAME: u64 = SIGNATURE_SIZE + SHARED_FIELDS_SIZE + EXTRA_FIELD_SIZE; const DATA_DESCRIPTOR_SIZE: u64 = 24; const FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE: u64 = LOCAL_HEADER_SIZE_MINUS_FILENAME + DATA_DESCRIPTOR_SIZE; const CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME: u64 = SIGNATURE_SIZE + 2 + SHARED_FIELDS_SIZE + 14 + EXTRA_FIELD_SIZE; const EOCD64_RECORD_SIZE: u64 = 56; const EOCD64_LOCATOR_SIZE: u64 = 20; const EOCD_RECORD_SIZE: u64 = 22; const EOCD_TOTAL_SIZE: u64 = EOCD64_RECORD_SIZE + EOCD64_LOCATOR_SIZE + EOCD_RECORD_SIZE; const EMPTY_STRING_CRC32: u32 = 0; pub use crate::state::v1::FileSet; fn full_file_name_len(file: &UploadedFile, directory_name: &Option) -> u64 { file.name.len() as u64 + if let Some(d) = directory_name { d.len() as u64 + 1 } else { 0 } } fn file_entry_size(file: &UploadedFile, directory_name: &Option) -> u64 { FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE + full_file_name_len(file, directory_name) + file.size } fn file_entries_size(files: &FileSet, bound: Option) -> u64 { let mut total = 0; let fs = if let Some(n) = bound { &files.files[..n] } else { &files.files }; for file in fs.iter() { total += file_entry_size(file, &files.directory_name) } total } pub fn file_data_offset(files: &FileSet, idx: usize) -> u64 { file_entries_size(files, Some(idx)) + LOCAL_HEADER_SIZE_MINUS_FILENAME + full_file_name_len(&files.files[idx], &files.directory_name) } pub fn file_data_offsets(files: &FileSet) -> Vec { let mut offsets = Vec::new(); let mut offset: u64 = 0; for file in files.files.iter() { offset += LOCAL_HEADER_SIZE_MINUS_FILENAME + full_file_name_len(file, &files.directory_name); offsets.push(offset); offset += file.size + DATA_DESCRIPTOR_SIZE; } offsets } fn central_directory_size(files: &FileSet) -> u64 { let mut total = 0; for file in files.files.iter() { total += CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME + full_file_name_len(file, &files.directory_name); } total } fn zipfile_size(files: &FileSet) -> u64 { file_entries_size(files, None) + central_directory_size(files) + EOCD_TOTAL_SIZE } fn fat_timestamp(time: OffsetDateTime) -> u32 { (((time.year() - 1980) as u32) << 25) | ((time.month() as u32) << 21) | ((time.day() as u32) << 16) | ((time.hour() as u32) << 11) | ((time.minute() as u32) << 5) | ((time.second() as u32) >> 1) } /// Append a value to a byte vector as little-endian bytes fn append_value(data: &mut Vec, mut value: u64, len: usize) { data.resize_with(data.len() + len, || { let byte = value as u8; value >>= 8; byte }); } fn append_repeated_byte(data: &mut Vec, byte: u8, count: usize) { data.resize(data.len() + count, byte); } fn append_0(data: &mut Vec, count: usize) { append_repeated_byte(data, 0, count); } fn append_ff(data: &mut Vec, count: usize) { append_repeated_byte(data, 0xff, count); } impl UploadedFile { /// Returns the fields shared by the ZIP local file header and /// central directory file header - "Version needed to extract" /// through "Extra field length". fn shared_header_fields(&self, directory_name: &Option, hash: Option) -> Vec { let mut fields = vec![ 45, 0, // Minimum version required to extract: 4.5 for ZIP64 0b00001000, // General purpose bit flag: bit 3 - size and CRC-32 in data descriptor 0b00001000, // General purpose bit flag: bit 11 - UTF-8 filenames 0, 0, // Compression method: none ]; append_value(&mut fields, fat_timestamp(self.modtime) as u64, 4); // Use 0s as a placeholder if the CRC-32 hash isn't known yet append_value(&mut fields, hash.unwrap_or(0) as u64, 4); // Placeholders for compressed and uncompressed size in ZIP64 record, 4 bytes each append_ff(&mut fields, 8); append_value(&mut fields, full_file_name_len(self, directory_name), 2); // Extra field length: 32 bytes for zip64, 9 bytes for timestamp fields.append(&mut vec![41, 0]); fields } fn extra_field(&self, local_header_offset: u64) -> Vec { let mut field = vec![ 0x01, 0x00, // Zip64 extended information 28, 0, // 28 bytes of data ]; // Original size and compressed size - if this is in the local // header, we're supposed to leave these blank and point to // the data descriptor, but I'm assuming it won't hurt to fill // them in regardless append_value(&mut field, self.size, 8); append_value(&mut field, self.size, 8); append_value(&mut field, local_header_offset, 8); append_0(&mut field, 4); // File starts on disk 0, there's no other disk field.append(&mut vec![ 0x55, 0x54, // Extended timestamp 5, 0, // 5 bytes of data 0b00000001, // Flags: Only modification time is present ]); append_value(&mut field, self.modtime.unix_timestamp() as u64, 4); field } fn full_name_bytes(&self, directory_name: &Option) -> Vec { let mut b = vec![]; if let Some(d) = directory_name { b.append(&mut d.to_owned().into_bytes()); b.push(SLASH); } b.append(&mut self.name.clone().into_bytes()); b } fn local_header(&self, directory_name: &Option, local_header_offset: u64) -> Vec { let mut header = vec![0x50, 0x4b, 0x03, 0x04]; // Local file header signature header.append(&mut self.shared_header_fields(directory_name, None)); header.append(&mut self.full_name_bytes(directory_name)); header.append(&mut self.extra_field(local_header_offset)); header } fn central_directory_header( &self, directory_name: &Option, local_header_offset: u64, hash: u32, ) -> Vec { let mut header = vec![ 0x50, 0x4b, 0x01, 0x02, // Central directory file header signature 45, 3, // Made by a Unix system supporting version 4.5 ]; header.append(&mut self.shared_header_fields(directory_name, Some(hash))); append_0(&mut header, 8); // Comment length, disk number, internal attributes, DOS external attributes append_value(&mut header, 0o100644, 2); // Unix external file attributes: -rw-r--r-- append_ff(&mut header, 4); // Relative offset of local file header: placeholder, see ZIP64 data header.append(&mut self.full_name_bytes(directory_name)); header.append(&mut self.extra_field(local_header_offset)); header } fn data_descriptor(&self, hash: u32) -> Vec { let mut descriptor = vec![0x50, 0x4b, 0x07, 0x08]; // Data descriptor signature append_value(&mut descriptor, hash as u64, 4); // Compressed and uncompressed sizes append_value(&mut descriptor, self.size, 8); append_value(&mut descriptor, self.size, 8); descriptor } } fn end_of_central_directory(files: &FileSet) -> Vec { let entries_size = file_entries_size(files, None); let directory_size = central_directory_size(files); let mut eocd = vec![ 0x50, 0x4b, 0x06, 0x06, // EOCD64 record signature 44, // Size of remaining EOCD64 record ]; append_0(&mut eocd, 7); // pad out the rest of the size field eocd.append(&mut vec![ 45, 3, // Made by a Unix system supporting version 4.5 45, 0, // Minimum version 4.5 to extract ]); append_0(&mut eocd, 8); // Two 4-byte disk numbers, both 0 // Number of central directory records, on this disk and in total append_value(&mut eocd, files.files.len() as u64, 8); append_value(&mut eocd, files.files.len() as u64, 8); append_value(&mut eocd, directory_size, 8); append_value(&mut eocd, entries_size, 8); // Offset of start of central directory eocd.append(&mut vec![0x50, 0x4b, 0x06, 0x07]); // EOCD64 locator signature append_0(&mut eocd, 4); // disk number append_value(&mut eocd, entries_size + directory_size, 8); // EOCD64 record offset append_0(&mut eocd, 4); // total number of disks; eocd.append(&mut vec![0x50, 0x4b, 0x05, 0x06]); // EOCD record signature append_ff(&mut eocd, 16); // Zip64 placeholders for disk numbers, record counts, and offsets append_0(&mut eocd, 2); // Comment length: 0 eocd } pub struct ZipGenerator { files: FileSet, file_index: usize, byte_index: u64, pending_metadata: Vec, hasher: Hasher, hashes: Vec, output: W, } impl ZipGenerator { pub fn new(files: FileSet, output: W) -> Self { let mut result = Self { files, file_index: 0, byte_index: 0, pending_metadata: vec![], hasher: Hasher::new(), hashes: vec![], output, }; result.start_new_file(); result } pub fn total_size(&self) -> u64 { zipfile_size(&self.files) } fn finish_file(&mut self) { let hash = std::mem::replace(&mut self.hasher, Hasher::new()).finalize(); self.hashes.push(hash); self.pending_metadata .append(&mut self.files.files[self.file_index].data_descriptor(hash)); debug!( "Finishing file entry in zipfile: {}, hash {:x}", self.files.files[self.file_index].name, hash ); self.file_index += 1; self.start_new_file(); } fn start_new_file(&mut self) { let mut offset = file_entries_size(&self.files, Some(self.file_index)); while self.file_index < self.files.files.len() && self.files.files[self.file_index].size == 0 { debug!( "Empty file entry in zipfile: {}", self.files.files[self.file_index].name ); self.hashes.push(EMPTY_STRING_CRC32); let mut local_header = self.files.files[self.file_index].local_header(&self.files.directory_name, offset); let mut data_descriptor = self.files.files[self.file_index].data_descriptor(EMPTY_STRING_CRC32); offset += local_header.len() as u64 + data_descriptor.len() as u64; self.file_index += 1; self.pending_metadata.append(&mut local_header); self.pending_metadata.append(&mut data_descriptor); } if self.file_index < self.files.files.len() { debug!( "Starting file entry in zipfile: {}", self.files.files[self.file_index].name ); self.byte_index = 0; self.pending_metadata.append( &mut self.files.files[self.file_index] .local_header(&self.files.directory_name, offset), ); } else { self.finish_zipfile(); } } fn finish_zipfile(&mut self) { debug!("Writing zipfile central directory"); let mut offset = 0; for (i, file) in self.files.files.iter().enumerate() { debug!( "Writing central directory entry: {}, hash {}", file.name, self.hashes[i] ); self.pending_metadata .append(&mut file.central_directory_header( &self.files.directory_name, offset, self.hashes[i], )); offset += file_entry_size(file, &self.files.directory_name); } debug!("Writing end of central directory"); self.pending_metadata .append(&mut end_of_central_directory(&self.files)); } } impl Write for ZipGenerator { fn write(&mut self, mut buf: &[u8]) -> std::io::Result { while !self.pending_metadata.is_empty() { let result = self.output.write(self.pending_metadata.as_slice()); match result { Ok(0) | Err(_) => { return result; } Ok(n) => { self.pending_metadata.drain(..n); } } } if self.file_index >= self.files.files.len() { return Ok(0); } let bytes_remaining = self.files.files[self.file_index].size - self.byte_index; if bytes_remaining < (buf.len() as u64) { buf = &buf[..bytes_remaining as usize]; } let result = self.output.write(buf); match result { Ok(0) | Err(_) => (), Ok(n) => { self.hasher.update(&buf[..n]); let n = n as u64; self.byte_index += n; if n == bytes_remaining { self.finish_file(); } } } result } fn flush(&mut self) -> std::io::Result<()> { debug!("Flushing zipfile writer"); if !self.pending_metadata.is_empty() { self.output.write_all(self.pending_metadata.as_slice())?; self.pending_metadata.clear(); } self.output.flush() } } #[cfg(test)] mod tests { use super::*; fn test_no_files() { let mut output: Vec = vec![]; { let mut zipgen = ZipGenerator::new( FileSet { files: vec![], directory_name: Some("test".to_owned()), }, Box::new(std::io::Cursor::new(&mut output)), ); zipgen.write_all(&[]).unwrap(); zipgen.flush().unwrap(); } eprintln!("{:?}", &output); { let mut reader = std::io::BufReader::new(output.as_slice()); let zipfile = zip::read::read_zipfile_from_stream(&mut reader).unwrap(); assert!(zipfile.is_none()); } let archive = zip::ZipArchive::new(std::io::Cursor::new(output)).unwrap(); assert!(archive.is_empty()); } }