transbeam/src/zip.rs

416 lines
14 KiB
Rust

use std::io::Write;
use crc32fast::Hasher;
use log::debug;
use time::OffsetDateTime;
use crate::upload::UploadedFile;
const SLASH: u8 = 0x2f;
const SIGNATURE_SIZE: u64 = 4;
const SHARED_FIELDS_SIZE: u64 = 26;
const EXTRA_FIELD_SIZE: u64 = 41;
const LOCAL_HEADER_SIZE_MINUS_FILENAME: u64 =
SIGNATURE_SIZE + SHARED_FIELDS_SIZE + EXTRA_FIELD_SIZE;
const DATA_DESCRIPTOR_SIZE: u64 = 24;
const FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE: u64 =
LOCAL_HEADER_SIZE_MINUS_FILENAME + DATA_DESCRIPTOR_SIZE;
const CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME: u64 =
SIGNATURE_SIZE + 2 + SHARED_FIELDS_SIZE + 14 + EXTRA_FIELD_SIZE;
const EOCD64_RECORD_SIZE: u64 = 56;
const EOCD64_LOCATOR_SIZE: u64 = 20;
const EOCD_RECORD_SIZE: u64 = 22;
const EOCD_TOTAL_SIZE: u64 = EOCD64_RECORD_SIZE + EOCD64_LOCATOR_SIZE + EOCD_RECORD_SIZE;
const EMPTY_STRING_CRC32: u32 = 0;
pub use crate::state::v1::FileSet;
fn full_file_name_len(file: &UploadedFile, directory_name: &Option<String>) -> u64 {
file.name.len() as u64
+ if let Some(d) = directory_name {
d.len() as u64 + 1
} else {
0
}
}
fn file_entry_size(file: &UploadedFile, directory_name: &Option<String>) -> u64 {
FILE_ENTRY_SIZE_MINUS_FILENAME_AND_FILE + full_file_name_len(file, directory_name) + file.size
}
fn file_entries_size(files: &FileSet, bound: Option<usize>) -> u64 {
let mut total = 0;
let fs = if let Some(n) = bound {
&files.files[..n]
} else {
&files.files
};
for file in fs.iter() {
total += file_entry_size(file, &files.directory_name)
}
total
}
pub fn file_data_offset(files: &FileSet, idx: usize) -> u64 {
file_entries_size(files, Some(idx))
+ LOCAL_HEADER_SIZE_MINUS_FILENAME
+ full_file_name_len(&files.files[idx], &files.directory_name)
}
pub fn file_data_offsets(files: &FileSet) -> Vec<u64> {
let mut offsets = Vec::new();
let mut offset: u64 = 0;
for file in files.files.iter() {
offset +=
LOCAL_HEADER_SIZE_MINUS_FILENAME + full_file_name_len(file, &files.directory_name);
offsets.push(offset);
offset += file.size + DATA_DESCRIPTOR_SIZE;
}
offsets
}
fn central_directory_size(files: &FileSet) -> u64 {
let mut total = 0;
for file in files.files.iter() {
total += CENTRAL_DIRECTORY_HEADER_SIZE_MINUS_FILENAME
+ full_file_name_len(file, &files.directory_name);
}
total
}
fn zipfile_size(files: &FileSet) -> u64 {
file_entries_size(files, None) + central_directory_size(files) + EOCD_TOTAL_SIZE
}
fn fat_timestamp(time: OffsetDateTime) -> u32 {
(((time.year() - 1980) as u32) << 25)
| ((time.month() as u32) << 21)
| ((time.day() as u32) << 16)
| ((time.hour() as u32) << 11)
| ((time.minute() as u32) << 5)
| ((time.second() as u32) >> 1)
}
/// Append a value to a byte vector as little-endian bytes
fn append_value(data: &mut Vec<u8>, mut value: u64, len: usize) {
data.resize_with(data.len() + len, || {
let byte = value as u8;
value >>= 8;
byte
});
}
fn append_repeated_byte(data: &mut Vec<u8>, byte: u8, count: usize) {
data.resize(data.len() + count, byte);
}
fn append_0(data: &mut Vec<u8>, count: usize) {
append_repeated_byte(data, 0, count);
}
fn append_ff(data: &mut Vec<u8>, count: usize) {
append_repeated_byte(data, 0xff, count);
}
impl UploadedFile {
/// Returns the fields shared by the ZIP local file header and
/// central directory file header - "Version needed to extract"
/// through "Extra field length".
fn shared_header_fields(&self, directory_name: &Option<String>, hash: Option<u32>) -> Vec<u8> {
let mut fields = vec![
45, 0, // Minimum version required to extract: 4.5 for ZIP64
0b00001000, // General purpose bit flag: bit 3 - size and CRC-32 in data descriptor
0b00001000, // General purpose bit flag: bit 11 - UTF-8 filenames
0, 0, // Compression method: none
];
append_value(&mut fields, fat_timestamp(self.modtime) as u64, 4);
// Use 0s as a placeholder if the CRC-32 hash isn't known yet
append_value(&mut fields, hash.unwrap_or(0) as u64, 4);
// Placeholders for compressed and uncompressed size in ZIP64 record, 4 bytes each
append_ff(&mut fields, 8);
append_value(&mut fields, full_file_name_len(self, directory_name), 2);
// Extra field length: 32 bytes for zip64, 9 bytes for timestamp
fields.append(&mut vec![41, 0]);
fields
}
fn extra_field(&self, local_header_offset: u64) -> Vec<u8> {
let mut field = vec![
0x01, 0x00, // Zip64 extended information
28, 0, // 28 bytes of data
];
// Original size and compressed size - if this is in the local
// header, we're supposed to leave these blank and point to
// the data descriptor, but I'm assuming it won't hurt to fill
// them in regardless
append_value(&mut field, self.size, 8);
append_value(&mut field, self.size, 8);
append_value(&mut field, local_header_offset, 8);
append_0(&mut field, 4); // File starts on disk 0, there's no other disk
field.append(&mut vec![
0x55, 0x54, // Extended timestamp
5, 0, // 5 bytes of data
0b00000001, // Flags: Only modification time is present
]);
append_value(&mut field, self.modtime.unix_timestamp() as u64, 4);
field
}
fn full_name_bytes(&self, directory_name: &Option<String>) -> Vec<u8> {
let mut b = vec![];
if let Some(d) = directory_name {
b.append(&mut d.to_owned().into_bytes());
b.push(SLASH);
}
b.append(&mut self.name.clone().into_bytes());
b
}
fn local_header(&self, directory_name: &Option<String>, local_header_offset: u64) -> Vec<u8> {
let mut header = vec![0x50, 0x4b, 0x03, 0x04]; // Local file header signature
header.append(&mut self.shared_header_fields(directory_name, None));
header.append(&mut self.full_name_bytes(directory_name));
header.append(&mut self.extra_field(local_header_offset));
header
}
fn central_directory_header(
&self,
directory_name: &Option<String>,
local_header_offset: u64,
hash: u32,
) -> Vec<u8> {
let mut header = vec![
0x50, 0x4b, 0x01, 0x02, // Central directory file header signature
45, 3, // Made by a Unix system supporting version 4.5
];
header.append(&mut self.shared_header_fields(directory_name, Some(hash)));
append_0(&mut header, 8); // Comment length, disk number, internal attributes, DOS external attributes
append_value(&mut header, 0o100644, 2); // Unix external file attributes: -rw-r--r--
append_ff(&mut header, 4); // Relative offset of local file header: placeholder, see ZIP64 data
header.append(&mut self.full_name_bytes(directory_name));
header.append(&mut self.extra_field(local_header_offset));
header
}
fn data_descriptor(&self, hash: u32) -> Vec<u8> {
let mut descriptor = vec![0x50, 0x4b, 0x07, 0x08]; // Data descriptor signature
append_value(&mut descriptor, hash as u64, 4);
// Compressed and uncompressed sizes
append_value(&mut descriptor, self.size, 8);
append_value(&mut descriptor, self.size, 8);
descriptor
}
}
fn end_of_central_directory(files: &FileSet) -> Vec<u8> {
let entries_size = file_entries_size(files, None);
let directory_size = central_directory_size(files);
let mut eocd = vec![
0x50, 0x4b, 0x06, 0x06, // EOCD64 record signature
44, // Size of remaining EOCD64 record
];
append_0(&mut eocd, 7); // pad out the rest of the size field
eocd.append(&mut vec![
45, 3, // Made by a Unix system supporting version 4.5
45, 0, // Minimum version 4.5 to extract
]);
append_0(&mut eocd, 8); // Two 4-byte disk numbers, both 0
// Number of central directory records, on this disk and in total
append_value(&mut eocd, files.files.len() as u64, 8);
append_value(&mut eocd, files.files.len() as u64, 8);
append_value(&mut eocd, directory_size, 8);
append_value(&mut eocd, entries_size, 8); // Offset of start of central directory
eocd.append(&mut vec![0x50, 0x4b, 0x06, 0x07]); // EOCD64 locator signature
append_0(&mut eocd, 4); // disk number
append_value(&mut eocd, entries_size + directory_size, 8); // EOCD64 record offset
append_0(&mut eocd, 4); // total number of disks;
eocd.append(&mut vec![0x50, 0x4b, 0x05, 0x06]); // EOCD record signature
append_ff(&mut eocd, 16); // Zip64 placeholders for disk numbers, record counts, and offsets
append_0(&mut eocd, 2); // Comment length: 0
eocd
}
pub struct ZipGenerator<W: Write> {
files: FileSet,
file_index: usize,
byte_index: u64,
pending_metadata: Vec<u8>,
hasher: Hasher,
hashes: Vec<u32>,
output: W,
}
impl<W: Write> ZipGenerator<W> {
pub fn new(files: FileSet, output: W) -> Self {
let mut result = Self {
files,
file_index: 0,
byte_index: 0,
pending_metadata: vec![],
hasher: Hasher::new(),
hashes: vec![],
output,
};
result.start_new_file();
result
}
pub fn total_size(&self) -> u64 {
zipfile_size(&self.files)
}
fn finish_file(&mut self) {
let hash = std::mem::replace(&mut self.hasher, Hasher::new()).finalize();
self.hashes.push(hash);
self.pending_metadata
.append(&mut self.files.files[self.file_index].data_descriptor(hash));
debug!(
"Finishing file entry in zipfile: {}, hash {:x}",
self.files.files[self.file_index].name, hash
);
self.file_index += 1;
self.start_new_file();
}
fn start_new_file(&mut self) {
let mut offset = file_entries_size(&self.files, Some(self.file_index));
while self.file_index < self.files.files.len()
&& self.files.files[self.file_index].size == 0
{
debug!(
"Empty file entry in zipfile: {}",
self.files.files[self.file_index].name
);
self.hashes.push(EMPTY_STRING_CRC32);
let mut local_header =
self.files.files[self.file_index].local_header(&self.files.directory_name, offset);
let mut data_descriptor =
self.files.files[self.file_index].data_descriptor(EMPTY_STRING_CRC32);
offset += local_header.len() as u64 + data_descriptor.len() as u64;
self.file_index += 1;
self.pending_metadata.append(&mut local_header);
self.pending_metadata.append(&mut data_descriptor);
}
if self.file_index < self.files.files.len() {
debug!(
"Starting file entry in zipfile: {}",
self.files.files[self.file_index].name
);
self.byte_index = 0;
self.pending_metadata.append(
&mut self.files.files[self.file_index]
.local_header(&self.files.directory_name, offset),
);
} else {
self.finish_zipfile();
}
}
fn finish_zipfile(&mut self) {
debug!("Writing zipfile central directory");
let mut offset = 0;
for (i, file) in self.files.files.iter().enumerate() {
debug!(
"Writing central directory entry: {}, hash {}",
file.name, self.hashes[i]
);
self.pending_metadata
.append(&mut file.central_directory_header(
&self.files.directory_name,
offset,
self.hashes[i],
));
offset += file_entry_size(file, &self.files.directory_name);
}
debug!("Writing end of central directory");
self.pending_metadata
.append(&mut end_of_central_directory(&self.files));
}
}
impl<W: Write> Write for ZipGenerator<W> {
fn write(&mut self, mut buf: &[u8]) -> std::io::Result<usize> {
while !self.pending_metadata.is_empty() {
let result = self.output.write(self.pending_metadata.as_slice());
match result {
Ok(0) | Err(_) => {
return result;
}
Ok(n) => {
self.pending_metadata.drain(..n);
}
}
}
if self.file_index >= self.files.files.len() {
return Ok(0);
}
let bytes_remaining = self.files.files[self.file_index].size - self.byte_index;
if bytes_remaining < (buf.len() as u64) {
buf = &buf[..bytes_remaining as usize];
}
let result = self.output.write(buf);
match result {
Ok(0) | Err(_) => (),
Ok(n) => {
self.hasher.update(&buf[..n]);
let n = n as u64;
self.byte_index += n;
if n == bytes_remaining {
self.finish_file();
}
}
}
result
}
fn flush(&mut self) -> std::io::Result<()> {
debug!("Flushing zipfile writer");
if !self.pending_metadata.is_empty() {
self.output.write_all(self.pending_metadata.as_slice())?;
self.pending_metadata.clear();
}
self.output.flush()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_no_files() {
let mut output: Vec<u8> = vec![];
{
let mut zipgen = ZipGenerator::new(
FileSet {
files: vec![],
directory_name: Some("test".to_owned()),
},
Box::new(std::io::Cursor::new(&mut output)),
);
zipgen.write_all(&[]).unwrap();
zipgen.flush().unwrap();
}
eprintln!("{:?}", &output);
{
let mut reader = std::io::BufReader::new(output.as_slice());
let zipfile = zip::read::read_zipfile_from_stream(&mut reader).unwrap();
assert!(zipfile.is_none());
}
let archive = zip::ZipArchive::new(std::io::Cursor::new(output)).unwrap();
assert!(archive.is_empty());
}
}