commit fa71aa0e143fee5f83c5d36ab6ab9157bb8165ce Author: alexmullins Date: Tue Oct 27 04:12:51 2015 -0500 Initial commit and README.txt diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..b12931b --- /dev/null +++ b/README.txt @@ -0,0 +1,54 @@ +This is a fork of the Go archive/zip package to add support +for reading password protected AES encrypted files. Only supports +Winzip's AES extension: http://www.winzip.com/aes_info.htm. This +package DOES NOT intend to implement the encryption methods +mentioned in the original PKWARE spec (sections 6.0 and 7.0): +https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + +WinZip AES specifies +==================================================================== +1. Encryption-Decryption w/ AES-CTR (128, 192, or 256 bits) +2. Key generation with PBKDF2-HMAC-SHA1 (1000 iteration count) that +generates a master key broken into the following: + a. First m bytes is for the encryption key + b. Next n bytes is for the authentication key + c. Last 2 bytes is the password verification value. +3. Following salt lengths are used w/ password during keygen: + ------------------------------ + AES Key Size | Salt Size + ------------------------------ + 128bit(16bytes) | 8 bytes + 192bit(24bytes) | 12 bytes + 256bit(32bytes) | 16 bytes + ------------------------------- +4. Master key len = AESKeyLen + AuthKeyLen + PWVLen: + a. AES 128 = 16 + 16 + 2 = 34 bytes of key material + b. AES 192 = 24 + 24 + 2 = 50 bytes of key material + c. AES 256 = 32 + 32 + 2 = 66 bytes of key material +5. Authentication Key is same size as AES key. +6. Authentication with HMAC-SHA1-80 (truncated to 80bits). +7. A new master key is generated for every file. +8. The file header and directory header compression method will +be 99 (decimal). The actual compression method will be in the +extra's payload at the end of the directory header. +9. A extra field will be added to the file header and directory +header identified by 0x9901 and contains the following info: + a. Header ID (2 bytes) + b. Data Size (2 bytes) + c. Vendor Version (2 bytes) + d. Vendor ID (2 bytes) + e. AES Strength (1 byte) + f. Compression Method (2 bytes) +10. The Data Size is always 7. +11. The Vendor Version can either be 0x0001 (AE-1) or +0x0002 (AE-2). +12. Vendor ID is ASCII "AE" +13. AES Strength: + a. 0x01 - AES-128 + b. 0x02 - AES-192 + c. 0x03 - AES-256 +14. Compression Method is the actual compression method +used that was replaced by the encryption process. +15. AE-1 keeps the CRC and should be verified after decompression. +16. AE-2 removes the CRC and shouldn't be verified after decompression. +Refer to http://www.winzip.com/aes_info.htm#winzip11 for the reasoning. diff --git a/example_test.go b/example_test.go new file mode 100644 index 0000000..c2ed9e7 --- /dev/null +++ b/example_test.go @@ -0,0 +1,75 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip_test + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "log" + "os" +) + +func ExampleWriter() { + // Create a buffer to write our archive to. + buf := new(bytes.Buffer) + + // Create a new zip archive. + w := zip.NewWriter(buf) + + // Add some files to the archive. + var files = []struct { + Name, Body string + }{ + {"readme.txt", "This archive contains some text files."}, + {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, + {"todo.txt", "Get animal handling licence.\nWrite more examples."}, + } + for _, file := range files { + f, err := w.Create(file.Name) + if err != nil { + log.Fatal(err) + } + _, err = f.Write([]byte(file.Body)) + if err != nil { + log.Fatal(err) + } + } + + // Make sure to check the error on Close. + err := w.Close() + if err != nil { + log.Fatal(err) + } +} + +func ExampleReader() { + // Open a zip archive for reading. + r, err := zip.OpenReader("testdata/readme.zip") + if err != nil { + log.Fatal(err) + } + defer r.Close() + + // Iterate through the files in the archive, + // printing some of their contents. + for _, f := range r.File { + fmt.Printf("Contents of %s:\n", f.Name) + rc, err := f.Open() + if err != nil { + log.Fatal(err) + } + _, err = io.CopyN(os.Stdout, rc, 68) + if err != nil { + log.Fatal(err) + } + rc.Close() + fmt.Println() + } + // Output: + // Contents of README: + // This is the source code repository for the Go programming language. +} diff --git a/reader.go b/reader.go new file mode 100644 index 0000000..519748b --- /dev/null +++ b/reader.go @@ -0,0 +1,471 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "encoding/binary" + "errors" + "fmt" + "hash" + "hash/crc32" + "io" + "os" +) + +var ( + ErrFormat = errors.New("zip: not a valid zip file") + ErrAlgorithm = errors.New("zip: unsupported compression algorithm") + ErrChecksum = errors.New("zip: checksum error") +) + +type Reader struct { + r io.ReaderAt + File []*File + Comment string +} + +type ReadCloser struct { + f *os.File + Reader +} + +type File struct { + FileHeader + zipr io.ReaderAt + zipsize int64 + headerOffset int64 +} + +func (f *File) hasDataDescriptor() bool { + return f.Flags&0x8 != 0 +} + +// OpenReader will open the Zip file specified by name and return a ReadCloser. +func OpenReader(name string) (*ReadCloser, error) { + f, err := os.Open(name) + if err != nil { + return nil, err + } + fi, err := f.Stat() + if err != nil { + f.Close() + return nil, err + } + r := new(ReadCloser) + if err := r.init(f, fi.Size()); err != nil { + f.Close() + return nil, err + } + r.f = f + return r, nil +} + +// NewReader returns a new Reader reading from r, which is assumed to +// have the given size in bytes. +func NewReader(r io.ReaderAt, size int64) (*Reader, error) { + zr := new(Reader) + if err := zr.init(r, size); err != nil { + return nil, err + } + return zr, nil +} + +func (z *Reader) init(r io.ReaderAt, size int64) error { + end, err := readDirectoryEnd(r, size) + if err != nil { + return err + } + if end.directoryRecords > uint64(size)/fileHeaderLen { + return fmt.Errorf("archive/zip: TOC declares impossible %d files in %d byte zip", end.directoryRecords, size) + } + z.r = r + z.File = make([]*File, 0, end.directoryRecords) + z.Comment = end.comment + rs := io.NewSectionReader(r, 0, size) + if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { + return err + } + buf := bufio.NewReader(rs) + + // The count of files inside a zip is truncated to fit in a uint16. + // Gloss over this by reading headers until we encounter + // a bad one, and then only report a ErrFormat or UnexpectedEOF if + // the file count modulo 65536 is incorrect. + for { + f := &File{zipr: r, zipsize: size} + err = readDirectoryHeader(f, buf) + if err == ErrFormat || err == io.ErrUnexpectedEOF { + break + } + if err != nil { + return err + } + z.File = append(z.File, f) + } + if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here + // Return the readDirectoryHeader error if we read + // the wrong number of directory entries. + return err + } + return nil +} + +// Close closes the Zip file, rendering it unusable for I/O. +func (rc *ReadCloser) Close() error { + return rc.f.Close() +} + +// DataOffset returns the offset of the file's possibly-compressed +// data, relative to the beginning of the zip file. +// +// Most callers should instead use Open, which transparently +// decompresses data and verifies checksums. +func (f *File) DataOffset() (offset int64, err error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return + } + return f.headerOffset + bodyOffset, nil +} + +// Open returns a ReadCloser that provides access to the File's contents. +// Multiple files may be read concurrently. +func (f *File) Open() (rc io.ReadCloser, err error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return + } + size := int64(f.CompressedSize64) + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) + dcomp := decompressor(f.Method) + if dcomp == nil { + err = ErrAlgorithm + return + } + rc = dcomp(r) + var desr io.Reader + if f.hasDataDescriptor() { + desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) + } + rc = &checksumReader{ + rc: rc, + hash: crc32.NewIEEE(), + f: f, + desr: desr, + } + return +} + +type checksumReader struct { + rc io.ReadCloser + hash hash.Hash32 + nread uint64 // number of bytes read so far + f *File + desr io.Reader // if non-nil, where to read the data descriptor + err error // sticky error +} + +func (r *checksumReader) Read(b []byte) (n int, err error) { + if r.err != nil { + return 0, r.err + } + n, err = r.rc.Read(b) + r.hash.Write(b[:n]) + r.nread += uint64(n) + if err == nil { + return + } + if err == io.EOF { + if r.nread != r.f.UncompressedSize64 { + return 0, io.ErrUnexpectedEOF + } + if r.desr != nil { + if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { + if err1 == io.EOF { + err = io.ErrUnexpectedEOF + } else { + err = err1 + } + } else if r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } else { + // If there's not a data descriptor, we still compare + // the CRC32 of what we've read against the file header + // or TOC's CRC32, if it seems like it was set. + if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } + } + r.err = err + return +} + +func (r *checksumReader) Close() error { return r.rc.Close() } + +// findBodyOffset does the minimum work to verify the file has a header +// and returns the file body offset. +func (f *File) findBodyOffset() (int64, error) { + var buf [fileHeaderLen]byte + if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { + return 0, err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != fileHeaderSignature { + return 0, ErrFormat + } + b = b[22:] // skip over most of the header + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + return int64(fileHeaderLen + filenameLen + extraLen), nil +} + +// readDirectoryHeader attempts to read a directory header from r. +// It returns io.ErrUnexpectedEOF if it cannot read a complete header, +// and ErrFormat if it doesn't find a valid header signature. +func readDirectoryHeader(f *File, r io.Reader) error { + var buf [directoryHeaderLen]byte + if _, err := io.ReadFull(r, buf[:]); err != nil { + return err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != directoryHeaderSignature { + return ErrFormat + } + f.CreatorVersion = b.uint16() + f.ReaderVersion = b.uint16() + f.Flags = b.uint16() + f.Method = b.uint16() + f.ModifiedTime = b.uint16() + f.ModifiedDate = b.uint16() + f.CRC32 = b.uint32() + f.CompressedSize = b.uint32() + f.UncompressedSize = b.uint32() + f.CompressedSize64 = uint64(f.CompressedSize) + f.UncompressedSize64 = uint64(f.UncompressedSize) + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + commentLen := int(b.uint16()) + b = b[4:] // skipped start disk number and internal attributes (2x uint16) + f.ExternalAttrs = b.uint32() + f.headerOffset = int64(b.uint32()) + d := make([]byte, filenameLen+extraLen+commentLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen : filenameLen+extraLen] + f.Comment = string(d[filenameLen+extraLen:]) + + if len(f.Extra) > 0 { + b := readBuf(f.Extra) + for len(b) >= 4 { // need at least tag and size + tag := b.uint16() + size := b.uint16() + if int(size) > len(b) { + return ErrFormat + } + if tag == zip64ExtraId { + // update directory values from the zip64 extra block + eb := readBuf(b[:size]) + if len(eb) >= 8 { + f.UncompressedSize64 = eb.uint64() + } + if len(eb) >= 8 { + f.CompressedSize64 = eb.uint64() + } + if len(eb) >= 8 { + f.headerOffset = int64(eb.uint64()) + } + } + b = b[size:] + } + // Should have consumed the whole header. + // But popular zip & JAR creation tools are broken and + // may pad extra zeros at the end, so accept those + // too. See golang.org/issue/8186. + for _, v := range b { + if v != 0 { + return ErrFormat + } + } + } + return nil +} + +func readDataDescriptor(r io.Reader, f *File) error { + var buf [dataDescriptorLen]byte + + // The spec says: "Although not originally assigned a + // signature, the value 0x08074b50 has commonly been adopted + // as a signature value for the data descriptor record. + // Implementers should be aware that ZIP files may be + // encountered with or without this signature marking data + // descriptors and should account for either case when reading + // ZIP files to ensure compatibility." + // + // dataDescriptorLen includes the size of the signature but + // first read just those 4 bytes to see if it exists. + if _, err := io.ReadFull(r, buf[:4]); err != nil { + return err + } + off := 0 + maybeSig := readBuf(buf[:4]) + if maybeSig.uint32() != dataDescriptorSignature { + // No data descriptor signature. Keep these four + // bytes. + off += 4 + } + if _, err := io.ReadFull(r, buf[off:12]); err != nil { + return err + } + b := readBuf(buf[:12]) + if b.uint32() != f.CRC32 { + return ErrChecksum + } + + // The two sizes that follow here can be either 32 bits or 64 bits + // but the spec is not very clear on this and different + // interpretations has been made causing incompatibilities. We + // already have the sizes from the central directory so we can + // just ignore these. + + return nil +} + +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { + // look for directoryEndSignature in the last 1k, then in the last 65k + var buf []byte + var directoryEndOffset int64 + for i, bLen := range []int64{1024, 65 * 1024} { + if bLen > size { + bLen = size + } + buf = make([]byte, int(bLen)) + if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { + return nil, err + } + if p := findSignatureInBlock(buf); p >= 0 { + buf = buf[p:] + directoryEndOffset = size - bLen + int64(p) + break + } + if i == 1 || bLen == size { + return nil, ErrFormat + } + } + + // read header into struct + b := readBuf(buf[4:]) // skip signature + d := &directoryEnd{ + diskNbr: uint32(b.uint16()), + dirDiskNbr: uint32(b.uint16()), + dirRecordsThisDisk: uint64(b.uint16()), + directoryRecords: uint64(b.uint16()), + directorySize: uint64(b.uint32()), + directoryOffset: uint64(b.uint32()), + commentLen: b.uint16(), + } + l := int(d.commentLen) + if l > len(b) { + return nil, errors.New("zip: invalid comment length") + } + d.comment = string(b[:l]) + + p, err := findDirectory64End(r, directoryEndOffset) + if err == nil && p >= 0 { + err = readDirectory64End(r, p, d) + } + if err != nil { + return nil, err + } + + // Make sure directoryOffset points to somewhere in our file. + if o := int64(d.directoryOffset); o < 0 || o >= size { + return nil, ErrFormat + } + return d, nil +} + +// findDirectory64End tries to read the zip64 locator just before the +// directory end and returns the offset of the zip64 directory end if +// found. +func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { + locOffset := directoryEndOffset - directory64LocLen + if locOffset < 0 { + return -1, nil // no need to look for a header outside the file + } + buf := make([]byte, directory64LocLen) + if _, err := r.ReadAt(buf, locOffset); err != nil { + return -1, err + } + b := readBuf(buf) + if sig := b.uint32(); sig != directory64LocSignature { + return -1, nil + } + b = b[4:] // skip number of the disk with the start of the zip64 end of central directory + p := b.uint64() // relative offset of the zip64 end of central directory record + return int64(p), nil +} + +// readDirectory64End reads the zip64 directory end and updates the +// directory end with the zip64 directory end values. +func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { + buf := make([]byte, directory64EndLen) + if _, err := r.ReadAt(buf, offset); err != nil { + return err + } + + b := readBuf(buf) + if sig := b.uint32(); sig != directory64EndSignature { + return ErrFormat + } + + b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) + d.diskNbr = b.uint32() // number of this disk + d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory + d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk + d.directoryRecords = b.uint64() // total number of entries in the central directory + d.directorySize = b.uint64() // size of the central directory + d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number + + return nil +} + +func findSignatureInBlock(b []byte) int { + for i := len(b) - directoryEndLen; i >= 0; i-- { + // defined from directoryEndSignature in struct.go + if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { + // n is length of comment + n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 + if n+directoryEndLen+i <= len(b) { + return i + } + } + } + return -1 +} + +type readBuf []byte + +func (b *readBuf) uint16() uint16 { + v := binary.LittleEndian.Uint16(*b) + *b = (*b)[2:] + return v +} + +func (b *readBuf) uint32() uint32 { + v := binary.LittleEndian.Uint32(*b) + *b = (*b)[4:] + return v +} + +func (b *readBuf) uint64() uint64 { + v := binary.LittleEndian.Uint64(*b) + *b = (*b)[8:] + return v +} diff --git a/reader_test.go b/reader_test.go new file mode 100644 index 0000000..547dd39 --- /dev/null +++ b/reader_test.go @@ -0,0 +1,607 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "io" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strings" + "testing" + "time" +) + +type ZipTest struct { + Name string + Source func() (r io.ReaderAt, size int64) // if non-nil, used instead of testdata/ file + Comment string + File []ZipTestFile + Error error // the error that Opening this file should return +} + +type ZipTestFile struct { + Name string + Content []byte // if blank, will attempt to compare against File + ContentErr error + File string // name of file to compare to (relative to testdata/) + Mtime string // modified time in format "mm-dd-yy hh:mm:ss" + Mode os.FileMode +} + +// Caution: The Mtime values found for the test files should correspond to +// the values listed with unzip -l . However, the values +// listed by unzip appear to be off by some hours. When creating +// fresh test files and testing them, this issue is not present. +// The test files were created in Sydney, so there might be a time +// zone issue. The time zone information does have to be encoded +// somewhere, because otherwise unzip -l could not provide a different +// time from what the archive/zip package provides, but there appears +// to be no documentation about this. + +var tests = []ZipTest{ + { + Name: "test.zip", + Comment: "This is a zipfile comment.", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte("This is a test text file.\n"), + Mtime: "09-05-10 12:12:02", + Mode: 0644, + }, + { + Name: "gophercolor16x16.png", + File: "gophercolor16x16.png", + Mtime: "09-05-10 15:52:58", + Mode: 0644, + }, + }, + }, + { + Name: "test-trailing-junk.zip", + Comment: "This is a zipfile comment.", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte("This is a test text file.\n"), + Mtime: "09-05-10 12:12:02", + Mode: 0644, + }, + { + Name: "gophercolor16x16.png", + File: "gophercolor16x16.png", + Mtime: "09-05-10 15:52:58", + Mode: 0644, + }, + }, + }, + { + Name: "r.zip", + Source: returnRecursiveZip, + File: []ZipTestFile{ + { + Name: "r/r.zip", + Content: rZipBytes(), + Mtime: "03-04-10 00:24:16", + Mode: 0666, + }, + }, + }, + { + Name: "symlink.zip", + File: []ZipTestFile{ + { + Name: "symlink", + Content: []byte("../target"), + Mode: 0777 | os.ModeSymlink, + }, + }, + }, + { + Name: "readme.zip", + }, + { + Name: "readme.notzip", + Error: ErrFormat, + }, + { + Name: "dd.zip", + File: []ZipTestFile{ + { + Name: "filename", + Content: []byte("This is a test textfile.\n"), + Mtime: "02-02-11 13:06:20", + Mode: 0666, + }, + }, + }, + { + // created in windows XP file manager. + Name: "winxp.zip", + File: crossPlatform, + }, + { + // created by Zip 3.0 under Linux + Name: "unix.zip", + File: crossPlatform, + }, + { + // created by Go, before we wrote the "optional" data + // descriptor signatures (which are required by OS X) + Name: "go-no-datadesc-sig.zip", + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Mtime: "03-08-12 16:59:10", + Mode: 0644, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Mtime: "03-08-12 16:59:12", + Mode: 0644, + }, + }, + }, + { + // created by Go, after we wrote the "optional" data + // descriptor signatures (which are required by OS X) + Name: "go-with-datadesc-sig.zip", + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Mode: 0666, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Mode: 0666, + }, + }, + }, + { + Name: "Bad-CRC32-in-data-descriptor", + Source: returnCorruptCRC32Zip, + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Mode: 0666, + ContentErr: ErrChecksum, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Mode: 0666, + }, + }, + }, + // Tests that we verify (and accept valid) crc32s on files + // with crc32s in their file header (not in data descriptors) + { + Name: "crc32-not-streamed.zip", + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Mtime: "03-08-12 16:59:10", + Mode: 0644, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Mtime: "03-08-12 16:59:12", + Mode: 0644, + }, + }, + }, + // Tests that we verify (and reject invalid) crc32s on files + // with crc32s in their file header (not in data descriptors) + { + Name: "crc32-not-streamed.zip", + Source: returnCorruptNotStreamedZip, + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Mtime: "03-08-12 16:59:10", + Mode: 0644, + ContentErr: ErrChecksum, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Mtime: "03-08-12 16:59:12", + Mode: 0644, + }, + }, + }, + { + Name: "zip64.zip", + File: []ZipTestFile{ + { + Name: "README", + Content: []byte("This small file is in ZIP64 format.\n"), + Mtime: "08-10-12 14:33:32", + Mode: 0644, + }, + }, + }, + // Another zip64 file with different Extras fields. (golang.org/issue/7069) + { + Name: "zip64-2.zip", + File: []ZipTestFile{ + { + Name: "README", + Content: []byte("This small file is in ZIP64 format.\n"), + Mtime: "08-10-12 14:33:32", + Mode: 0644, + }, + }, + }, +} + +var crossPlatform = []ZipTestFile{ + { + Name: "hello", + Content: []byte("world \r\n"), + Mode: 0666, + }, + { + Name: "dir/bar", + Content: []byte("foo \r\n"), + Mode: 0666, + }, + { + Name: "dir/empty/", + Content: []byte{}, + Mode: os.ModeDir | 0777, + }, + { + Name: "readonly", + Content: []byte("important \r\n"), + Mode: 0444, + }, +} + +func TestReader(t *testing.T) { + for _, zt := range tests { + readTestZip(t, zt) + } +} + +func readTestZip(t *testing.T, zt ZipTest) { + var z *Reader + var err error + if zt.Source != nil { + rat, size := zt.Source() + z, err = NewReader(rat, size) + } else { + var rc *ReadCloser + rc, err = OpenReader(filepath.Join("testdata", zt.Name)) + if err == nil { + defer rc.Close() + z = &rc.Reader + } + } + if err != zt.Error { + t.Errorf("%s: error=%v, want %v", zt.Name, err, zt.Error) + return + } + + // bail if file is not zip + if err == ErrFormat { + return + } + + // bail here if no Files expected to be tested + // (there may actually be files in the zip, but we don't care) + if zt.File == nil { + return + } + + if z.Comment != zt.Comment { + t.Errorf("%s: comment=%q, want %q", zt.Name, z.Comment, zt.Comment) + } + if len(z.File) != len(zt.File) { + t.Fatalf("%s: file count=%d, want %d", zt.Name, len(z.File), len(zt.File)) + } + + // test read of each file + for i, ft := range zt.File { + readTestFile(t, zt, ft, z.File[i]) + } + + // test simultaneous reads + n := 0 + done := make(chan bool) + for i := 0; i < 5; i++ { + for j, ft := range zt.File { + go func(j int, ft ZipTestFile) { + readTestFile(t, zt, ft, z.File[j]) + done <- true + }(j, ft) + n++ + } + } + for ; n > 0; n-- { + <-done + } +} + +func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File) { + if f.Name != ft.Name { + t.Errorf("%s: name=%q, want %q", zt.Name, f.Name, ft.Name) + } + + if ft.Mtime != "" { + mtime, err := time.Parse("01-02-06 15:04:05", ft.Mtime) + if err != nil { + t.Error(err) + return + } + if ft := f.ModTime(); !ft.Equal(mtime) { + t.Errorf("%s: %s: mtime=%s, want %s", zt.Name, f.Name, ft, mtime) + } + } + + testFileMode(t, zt.Name, f, ft.Mode) + + var b bytes.Buffer + r, err := f.Open() + if err != nil { + t.Errorf("%s: %v", zt.Name, err) + return + } + + _, err = io.Copy(&b, r) + if err != ft.ContentErr { + t.Errorf("%s: copying contents: %v (want %v)", zt.Name, err, ft.ContentErr) + } + if err != nil { + return + } + r.Close() + + size := uint64(f.UncompressedSize) + if size == uint32max { + size = f.UncompressedSize64 + } + if g := uint64(b.Len()); g != size { + t.Errorf("%v: read %v bytes but f.UncompressedSize == %v", f.Name, g, size) + } + + var c []byte + if ft.Content != nil { + c = ft.Content + } else if c, err = ioutil.ReadFile("testdata/" + ft.File); err != nil { + t.Error(err) + return + } + + if b.Len() != len(c) { + t.Errorf("%s: len=%d, want %d", f.Name, b.Len(), len(c)) + return + } + + for i, b := range b.Bytes() { + if b != c[i] { + t.Errorf("%s: content[%d]=%q want %q", f.Name, i, b, c[i]) + return + } + } +} + +func testFileMode(t *testing.T, zipName string, f *File, want os.FileMode) { + mode := f.Mode() + if want == 0 { + t.Errorf("%s: %s mode: got %v, want none", zipName, f.Name, mode) + } else if mode != want { + t.Errorf("%s: %s mode: want %v, got %v", zipName, f.Name, want, mode) + } +} + +func TestInvalidFiles(t *testing.T) { + const size = 1024 * 70 // 70kb + b := make([]byte, size) + + // zeroes + _, err := NewReader(bytes.NewReader(b), size) + if err != ErrFormat { + t.Errorf("zeroes: error=%v, want %v", err, ErrFormat) + } + + // repeated directoryEndSignatures + sig := make([]byte, 4) + binary.LittleEndian.PutUint32(sig, directoryEndSignature) + for i := 0; i < size-4; i += 4 { + copy(b[i:i+4], sig) + } + _, err = NewReader(bytes.NewReader(b), size) + if err != ErrFormat { + t.Errorf("sigs: error=%v, want %v", err, ErrFormat) + } +} + +func messWith(fileName string, corrupter func(b []byte)) (r io.ReaderAt, size int64) { + data, err := ioutil.ReadFile(filepath.Join("testdata", fileName)) + if err != nil { + panic("Error reading " + fileName + ": " + err.Error()) + } + corrupter(data) + return bytes.NewReader(data), int64(len(data)) +} + +func returnCorruptCRC32Zip() (r io.ReaderAt, size int64) { + return messWith("go-with-datadesc-sig.zip", func(b []byte) { + // Corrupt one of the CRC32s in the data descriptor: + b[0x2d]++ + }) +} + +func returnCorruptNotStreamedZip() (r io.ReaderAt, size int64) { + return messWith("crc32-not-streamed.zip", func(b []byte) { + // Corrupt foo.txt's final crc32 byte, in both + // the file header and TOC. (0x7e -> 0x7f) + b[0x11]++ + b[0x9d]++ + + // TODO(bradfitz): add a new test that only corrupts + // one of these values, and verify that that's also an + // error. Currently, the reader code doesn't verify the + // fileheader and TOC's crc32 match if they're both + // non-zero and only the second line above, the TOC, + // is what matters. + }) +} + +// rZipBytes returns the bytes of a recursive zip file, without +// putting it on disk and triggering certain virus scanners. +func rZipBytes() []byte { + s := ` +0000000 50 4b 03 04 14 00 00 00 08 00 08 03 64 3c f9 f4 +0000010 89 64 48 01 00 00 b8 01 00 00 07 00 00 00 72 2f +0000020 72 2e 7a 69 70 00 25 00 da ff 50 4b 03 04 14 00 +0000030 00 00 08 00 08 03 64 3c f9 f4 89 64 48 01 00 00 +0000040 b8 01 00 00 07 00 00 00 72 2f 72 2e 7a 69 70 00 +0000050 2f 00 d0 ff 00 25 00 da ff 50 4b 03 04 14 00 00 +0000060 00 08 00 08 03 64 3c f9 f4 89 64 48 01 00 00 b8 +0000070 01 00 00 07 00 00 00 72 2f 72 2e 7a 69 70 00 2f +0000080 00 d0 ff c2 54 8e 57 39 00 05 00 fa ff c2 54 8e +0000090 57 39 00 05 00 fa ff 00 05 00 fa ff 00 14 00 eb +00000a0 ff c2 54 8e 57 39 00 05 00 fa ff 00 05 00 fa ff +00000b0 00 14 00 eb ff 42 88 21 c4 00 00 14 00 eb ff 42 +00000c0 88 21 c4 00 00 14 00 eb ff 42 88 21 c4 00 00 14 +00000d0 00 eb ff 42 88 21 c4 00 00 14 00 eb ff 42 88 21 +00000e0 c4 00 00 00 00 ff ff 00 00 00 ff ff 00 34 00 cb +00000f0 ff 42 88 21 c4 00 00 00 00 ff ff 00 00 00 ff ff +0000100 00 34 00 cb ff 42 e8 21 5e 0f 00 00 00 ff ff 0a +0000110 f0 66 64 12 61 c0 15 dc e8 a0 48 bf 48 af 2a b3 +0000120 20 c0 9b 95 0d c4 67 04 42 53 06 06 06 40 00 06 +0000130 00 f9 ff 6d 01 00 00 00 00 42 e8 21 5e 0f 00 00 +0000140 00 ff ff 0a f0 66 64 12 61 c0 15 dc e8 a0 48 bf +0000150 48 af 2a b3 20 c0 9b 95 0d c4 67 04 42 53 06 06 +0000160 06 40 00 06 00 f9 ff 6d 01 00 00 00 00 50 4b 01 +0000170 02 14 00 14 00 00 00 08 00 08 03 64 3c f9 f4 89 +0000180 64 48 01 00 00 b8 01 00 00 07 00 00 00 00 00 00 +0000190 00 00 00 00 00 00 00 00 00 00 00 72 2f 72 2e 7a +00001a0 69 70 50 4b 05 06 00 00 00 00 01 00 01 00 35 00 +00001b0 00 00 6d 01 00 00 00 00` + s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "") + s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "") + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +func returnRecursiveZip() (r io.ReaderAt, size int64) { + b := rZipBytes() + return bytes.NewReader(b), int64(len(b)) +} + +func TestIssue8186(t *testing.T) { + // Directory headers & data found in the TOC of a JAR file. + dirEnts := []string{ + "PK\x01\x02\n\x00\n\x00\x00\b\x00\x004\x9d3?\xaa\x1b\x06\xf0\x81\x02\x00\x00\x81\x02\x00\x00-\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00res/drawable-xhdpi-v4/ic_actionbar_accept.png\xfe\xca\x00\x00\x00", + "PK\x01\x02\n\x00\n\x00\x00\b\x00\x004\x9d3?\x90K\x89\xc7t\n\x00\x00t\n\x00\x00\x0e\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd1\x02\x00\x00resources.arsc\x00\x00\x00", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xff$\x18\xed3\x03\x00\x00\xb4\b\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00t\r\x00\x00AndroidManifest.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\x14\xc5K\xab\x192\x02\x00\xc8\xcd\x04\x00\v\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe8\x10\x00\x00classes.dex", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?E\x96\nD\xac\x01\x00\x00P\x03\x00\x00&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00:C\x02\x00res/layout/actionbar_set_wallpaper.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?Ļ\x14\xe3\xd8\x01\x00\x00\xd8\x03\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00:E\x02\x00res/layout/wallpaper_cropper.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?}\xc1\x15\x9eZ\x01\x00\x00!\x02\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00`G\x02\x00META-INF/MANIFEST.MF", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xe6\x98Ьo\x01\x00\x00\x84\x02\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfcH\x02\x00META-INF/CERT.SF", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xbfP\x96b\x86\x04\x00\x00\xb2\x06\x00\x00\x11\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa9J\x02\x00META-INF/CERT.RSA", + } + for i, s := range dirEnts { + var f File + err := readDirectoryHeader(&f, strings.NewReader(s)) + if err != nil { + t.Errorf("error reading #%d: %v", i, err) + } + } +} + +// Verify we return ErrUnexpectedEOF when length is short. +func TestIssue10957(t *testing.T) { + data := []byte("PK\x03\x040000000PK\x01\x0200000" + + "0000000000000000000\x00" + + "\x00\x00\x00\x00\x00000000000000PK\x01" + + "\x020000000000000000000" + + "00000\v\x00\x00\x00\x00\x00000000000" + + "00000000000000PK\x01\x0200" + + "00000000000000000000" + + "00\v\x00\x00\x00\x00\x00000000000000" + + "00000000000PK\x01\x020000<" + + "0\x00\x0000000000000000\v\x00\v" + + "\x00\x00\x00\x00\x0000000000\x00\x00\x00\x00000" + + "00000000PK\x01\x0200000000" + + "0000000000000000\v\x00\x00\x00" + + "\x00\x0000PK\x05\x06000000\x05\x000000" + + "\v\x00\x00\x00\x00\x00") + z, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + for i, f := range z.File { + r, err := f.Open() + if err != nil { + continue + } + if f.UncompressedSize64 < 1e6 { + n, err := io.Copy(ioutil.Discard, r) + if i == 3 && err != io.ErrUnexpectedEOF { + t.Errorf("File[3] error = %v; want io.ErrUnexpectedEOF", err) + } + if err == nil && uint64(n) != f.UncompressedSize64 { + t.Errorf("file %d: bad size: copied=%d; want=%d", i, n, f.UncompressedSize64) + } + } + r.Close() + } +} + +// Verify the number of files is sane. +func TestIssue10956(t *testing.T) { + data := []byte("PK\x06\x06PK\x06\a0000\x00\x00\x00\x00\x00\x00\x00\x00" + + "0000PK\x05\x06000000000000" + + "0000\v\x00000\x00\x00\x00\x00\x00\x00\x000") + _, err := NewReader(bytes.NewReader(data), int64(len(data))) + const want = "TOC declares impossible 3472328296227680304 files in 57 byte" + if err == nil && !strings.Contains(err.Error(), want) { + t.Errorf("error = %v; want %q", err, want) + } +} + +// Verify we return ErrUnexpectedEOF when reading truncated data descriptor. +func TestIssue11146(t *testing.T) { + data := []byte("PK\x03\x040000000000000000" + + "000000\x01\x00\x00\x000\x01\x00\x00\xff\xff0000" + + "0000000000000000PK\x01\x02" + + "0000\b0\b\x00000000000000" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000000PK\x05\x06\x00\x00" + + "\x00\x0000\x01\x0000008\x00\x00\x00\x00\x00") + z, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + r, err := z.File[0].Open() + if err != nil { + t.Fatal(err) + } + _, err = ioutil.ReadAll(r) + if err != io.ErrUnexpectedEOF { + t.Errorf("File[0] error = %v; want io.ErrUnexpectedEOF", err) + } + r.Close() +} diff --git a/register.go b/register.go new file mode 100644 index 0000000..4211ec7 --- /dev/null +++ b/register.go @@ -0,0 +1,110 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "compress/flate" + "errors" + "io" + "io/ioutil" + "sync" +) + +// A Compressor returns a compressing writer, writing to the +// provided writer. On Close, any pending data should be flushed. +type Compressor func(io.Writer) (io.WriteCloser, error) + +// Decompressor is a function that wraps a Reader with a decompressing Reader. +// The decompressed ReadCloser is returned to callers who open files from +// within the archive. These callers are responsible for closing this reader +// when they're finished reading. +type Decompressor func(io.Reader) io.ReadCloser + +var flateWriterPool sync.Pool + +func newFlateWriter(w io.Writer) io.WriteCloser { + fw, ok := flateWriterPool.Get().(*flate.Writer) + if ok { + fw.Reset(w) + } else { + fw, _ = flate.NewWriter(w, 5) + } + return &pooledFlateWriter{fw: fw} +} + +type pooledFlateWriter struct { + mu sync.Mutex // guards Close and Write + fw *flate.Writer +} + +func (w *pooledFlateWriter) Write(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.fw == nil { + return 0, errors.New("Write after Close") + } + return w.fw.Write(p) +} + +func (w *pooledFlateWriter) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + var err error + if w.fw != nil { + err = w.fw.Close() + flateWriterPool.Put(w.fw) + w.fw = nil + } + return err +} + +var ( + mu sync.RWMutex // guards compressor and decompressor maps + + compressors = map[uint16]Compressor{ + Store: func(w io.Writer) (io.WriteCloser, error) { return &nopCloser{w}, nil }, + Deflate: func(w io.Writer) (io.WriteCloser, error) { return newFlateWriter(w), nil }, + } + + decompressors = map[uint16]Decompressor{ + Store: ioutil.NopCloser, + Deflate: flate.NewReader, + } +) + +// RegisterDecompressor allows custom decompressors for a specified method ID. +func RegisterDecompressor(method uint16, d Decompressor) { + mu.Lock() + defer mu.Unlock() + + if _, ok := decompressors[method]; ok { + panic("decompressor already registered") + } + decompressors[method] = d +} + +// RegisterCompressor registers custom compressors for a specified method ID. +// The common methods Store and Deflate are built in. +func RegisterCompressor(method uint16, comp Compressor) { + mu.Lock() + defer mu.Unlock() + + if _, ok := compressors[method]; ok { + panic("compressor already registered") + } + compressors[method] = comp +} + +func compressor(method uint16) Compressor { + mu.RLock() + defer mu.RUnlock() + return compressors[method] +} + +func decompressor(method uint16) Decompressor { + mu.RLock() + defer mu.RUnlock() + return decompressors[method] +} diff --git a/struct.go b/struct.go new file mode 100644 index 0000000..137d049 --- /dev/null +++ b/struct.go @@ -0,0 +1,313 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zip provides support for reading and writing ZIP archives. + +See: http://www.pkware.com/documents/casestudies/APPNOTE.TXT + +This package does not support disk spanning. + +A note about ZIP64: + +To be backwards compatible the FileHeader has both 32 and 64 bit Size +fields. The 64 bit fields will always contain the correct value and +for normal archives both fields will be the same. For files requiring +the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit +fields must be used instead. +*/ +package zip + +import ( + "os" + "path" + "time" +) + +// Compression methods. +const ( + Store uint16 = 0 + Deflate uint16 = 8 +) + +const ( + fileHeaderSignature = 0x04034b50 + directoryHeaderSignature = 0x02014b50 + directoryEndSignature = 0x06054b50 + directory64LocSignature = 0x07064b50 + directory64EndSignature = 0x06064b50 + dataDescriptorSignature = 0x08074b50 // de-facto standard; required by OS X Finder + fileHeaderLen = 30 // + filename + extra + directoryHeaderLen = 46 // + filename + extra + comment + directoryEndLen = 22 // + comment + dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size + dataDescriptor64Len = 24 // descriptor with 8 byte sizes + directory64LocLen = 20 // + directory64EndLen = 56 // + extra + + // Constants for the first byte in CreatorVersion + creatorFAT = 0 + creatorUnix = 3 + creatorNTFS = 11 + creatorVFAT = 14 + creatorMacOSX = 19 + + // version numbers + zipVersion20 = 20 // 2.0 + zipVersion45 = 45 // 4.5 (reads and writes zip64 archives) + + // limits for non zip64 files + uint16max = (1 << 16) - 1 + uint32max = (1 << 32) - 1 + + // extra header id's + zip64ExtraId = 0x0001 // zip64 Extended Information Extra Field +) + +// FileHeader describes a file within a zip file. +// See the zip spec for details. +type FileHeader struct { + // Name is the name of the file. + // It must be a relative path: it must not start with a drive + // letter (e.g. C:) or leading slash, and only forward slashes + // are allowed. + Name string + + CreatorVersion uint16 + ReaderVersion uint16 + Flags uint16 + Method uint16 + ModifiedTime uint16 // MS-DOS time + ModifiedDate uint16 // MS-DOS date + CRC32 uint32 + CompressedSize uint32 // Deprecated: Use CompressedSize64 instead. + UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead. + CompressedSize64 uint64 + UncompressedSize64 uint64 + Extra []byte + ExternalAttrs uint32 // Meaning depends on CreatorVersion + Comment string +} + +// FileInfo returns an os.FileInfo for the FileHeader. +func (h *FileHeader) FileInfo() os.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements os.FileInfo. +type headerFileInfo struct { + fh *FileHeader +} + +func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) } +func (fi headerFileInfo) Size() int64 { + if fi.fh.UncompressedSize64 > 0 { + return int64(fi.fh.UncompressedSize64) + } + return int64(fi.fh.UncompressedSize) +} +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { return fi.fh.ModTime() } +func (fi headerFileInfo) Mode() os.FileMode { return fi.fh.Mode() } +func (fi headerFileInfo) Sys() interface{} { return fi.fh } + +// FileInfoHeader creates a partially-populated FileHeader from an +// os.FileInfo. +// Because os.FileInfo's Name method returns only the base name of +// the file it describes, it may be necessary to modify the Name field +// of the returned header to provide the full path name of the file. +func FileInfoHeader(fi os.FileInfo) (*FileHeader, error) { + size := fi.Size() + fh := &FileHeader{ + Name: fi.Name(), + UncompressedSize64: uint64(size), + } + fh.SetModTime(fi.ModTime()) + fh.SetMode(fi.Mode()) + if fh.UncompressedSize64 > uint32max { + fh.UncompressedSize = uint32max + } else { + fh.UncompressedSize = uint32(fh.UncompressedSize64) + } + return fh, nil +} + +type directoryEnd struct { + diskNbr uint32 // unused + dirDiskNbr uint32 // unused + dirRecordsThisDisk uint64 // unused + directoryRecords uint64 + directorySize uint64 + directoryOffset uint64 // relative to file + commentLen uint16 + comment string +} + +// msDosTimeToTime converts an MS-DOS date and time into a time.Time. +// The resolution is 2s. +// See: http://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx +func msDosTimeToTime(dosDate, dosTime uint16) time.Time { + return time.Date( + // date bits 0-4: day of month; 5-8: month; 9-15: years since 1980 + int(dosDate>>9+1980), + time.Month(dosDate>>5&0xf), + int(dosDate&0x1f), + + // time bits 0-4: second/2; 5-10: minute; 11-15: hour + int(dosTime>>11), + int(dosTime>>5&0x3f), + int(dosTime&0x1f*2), + 0, // nanoseconds + + time.UTC, + ) +} + +// timeToMsDosTime converts a time.Time to an MS-DOS date and time. +// The resolution is 2s. +// See: http://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx +func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) { + t = t.In(time.UTC) + fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9) + fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11) + return +} + +// ModTime returns the modification time in UTC. +// The resolution is 2s. +func (h *FileHeader) ModTime() time.Time { + return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime) +} + +// SetModTime sets the ModifiedTime and ModifiedDate fields to the given time in UTC. +// The resolution is 2s. +func (h *FileHeader) SetModTime(t time.Time) { + h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t) +} + +const ( + // Unix constants. The specification doesn't mention them, + // but these seem to be the values agreed on by tools. + s_IFMT = 0xf000 + s_IFSOCK = 0xc000 + s_IFLNK = 0xa000 + s_IFREG = 0x8000 + s_IFBLK = 0x6000 + s_IFDIR = 0x4000 + s_IFCHR = 0x2000 + s_IFIFO = 0x1000 + s_ISUID = 0x800 + s_ISGID = 0x400 + s_ISVTX = 0x200 + + msdosDir = 0x10 + msdosReadOnly = 0x01 +) + +// Mode returns the permission and mode bits for the FileHeader. +func (h *FileHeader) Mode() (mode os.FileMode) { + switch h.CreatorVersion >> 8 { + case creatorUnix, creatorMacOSX: + mode = unixModeToFileMode(h.ExternalAttrs >> 16) + case creatorNTFS, creatorVFAT, creatorFAT: + mode = msdosModeToFileMode(h.ExternalAttrs) + } + if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' { + mode |= os.ModeDir + } + return mode +} + +// SetMode changes the permission and mode bits for the FileHeader. +func (h *FileHeader) SetMode(mode os.FileMode) { + h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8 + h.ExternalAttrs = fileModeToUnixMode(mode) << 16 + + // set MSDOS attributes too, as the original zip does. + if mode&os.ModeDir != 0 { + h.ExternalAttrs |= msdosDir + } + if mode&0200 == 0 { + h.ExternalAttrs |= msdosReadOnly + } +} + +// isZip64 reports whether the file size exceeds the 32 bit limit +func (fh *FileHeader) isZip64() bool { + return fh.CompressedSize64 > uint32max || fh.UncompressedSize64 > uint32max +} + +func msdosModeToFileMode(m uint32) (mode os.FileMode) { + if m&msdosDir != 0 { + mode = os.ModeDir | 0777 + } else { + mode = 0666 + } + if m&msdosReadOnly != 0 { + mode &^= 0222 + } + return mode +} + +func fileModeToUnixMode(mode os.FileMode) uint32 { + var m uint32 + switch mode & os.ModeType { + default: + m = s_IFREG + case os.ModeDir: + m = s_IFDIR + case os.ModeSymlink: + m = s_IFLNK + case os.ModeNamedPipe: + m = s_IFIFO + case os.ModeSocket: + m = s_IFSOCK + case os.ModeDevice: + if mode&os.ModeCharDevice != 0 { + m = s_IFCHR + } else { + m = s_IFBLK + } + } + if mode&os.ModeSetuid != 0 { + m |= s_ISUID + } + if mode&os.ModeSetgid != 0 { + m |= s_ISGID + } + if mode&os.ModeSticky != 0 { + m |= s_ISVTX + } + return m | uint32(mode&0777) +} + +func unixModeToFileMode(m uint32) os.FileMode { + mode := os.FileMode(m & 0777) + switch m & s_IFMT { + case s_IFBLK: + mode |= os.ModeDevice + case s_IFCHR: + mode |= os.ModeDevice | os.ModeCharDevice + case s_IFDIR: + mode |= os.ModeDir + case s_IFIFO: + mode |= os.ModeNamedPipe + case s_IFLNK: + mode |= os.ModeSymlink + case s_IFREG: + // nothing to do + case s_IFSOCK: + mode |= os.ModeSocket + } + if m&s_ISGID != 0 { + mode |= os.ModeSetgid + } + if m&s_ISUID != 0 { + mode |= os.ModeSetuid + } + if m&s_ISVTX != 0 { + mode |= os.ModeSticky + } + return mode +} diff --git a/testdata/crc32-not-streamed.zip b/testdata/crc32-not-streamed.zip new file mode 100644 index 0000000..f268d88 Binary files /dev/null and b/testdata/crc32-not-streamed.zip differ diff --git a/testdata/dd.zip b/testdata/dd.zip new file mode 100644 index 0000000..e53378b Binary files /dev/null and b/testdata/dd.zip differ diff --git a/testdata/go-no-datadesc-sig.zip b/testdata/go-no-datadesc-sig.zip new file mode 100644 index 0000000..c3d593f Binary files /dev/null and b/testdata/go-no-datadesc-sig.zip differ diff --git a/testdata/go-with-datadesc-sig.zip b/testdata/go-with-datadesc-sig.zip new file mode 100644 index 0000000..bcfe121 Binary files /dev/null and b/testdata/go-with-datadesc-sig.zip differ diff --git a/testdata/gophercolor16x16.png b/testdata/gophercolor16x16.png new file mode 100644 index 0000000..48854ff Binary files /dev/null and b/testdata/gophercolor16x16.png differ diff --git a/testdata/readme.notzip b/testdata/readme.notzip new file mode 100644 index 0000000..8173727 Binary files /dev/null and b/testdata/readme.notzip differ diff --git a/testdata/readme.zip b/testdata/readme.zip new file mode 100644 index 0000000..5642a67 Binary files /dev/null and b/testdata/readme.zip differ diff --git a/testdata/symlink.zip b/testdata/symlink.zip new file mode 100644 index 0000000..af84693 Binary files /dev/null and b/testdata/symlink.zip differ diff --git a/testdata/test-trailing-junk.zip b/testdata/test-trailing-junk.zip new file mode 100644 index 0000000..42281b4 Binary files /dev/null and b/testdata/test-trailing-junk.zip differ diff --git a/testdata/test.zip b/testdata/test.zip new file mode 100644 index 0000000..03890c0 Binary files /dev/null and b/testdata/test.zip differ diff --git a/testdata/unix.zip b/testdata/unix.zip new file mode 100644 index 0000000..ce1a981 Binary files /dev/null and b/testdata/unix.zip differ diff --git a/testdata/winxp.zip b/testdata/winxp.zip new file mode 100644 index 0000000..3919322 Binary files /dev/null and b/testdata/winxp.zip differ diff --git a/testdata/zip64-2.zip b/testdata/zip64-2.zip new file mode 100644 index 0000000..f844e35 Binary files /dev/null and b/testdata/zip64-2.zip differ diff --git a/testdata/zip64.zip b/testdata/zip64.zip new file mode 100644 index 0000000..a2ee1fa Binary files /dev/null and b/testdata/zip64.zip differ diff --git a/writer.go b/writer.go new file mode 100644 index 0000000..3be2b5f --- /dev/null +++ b/writer.go @@ -0,0 +1,374 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "encoding/binary" + "errors" + "hash" + "hash/crc32" + "io" +) + +// TODO(adg): support zip file comments +// TODO(adg): support specifying deflate level + +// Writer implements a zip file writer. +type Writer struct { + cw *countWriter + dir []*header + last *fileWriter + closed bool +} + +type header struct { + *FileHeader + offset uint64 +} + +// NewWriter returns a new Writer writing a zip file to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}} +} + +// SetOffset sets the offset of the beginning of the zip data within the +// underlying writer. It should be used when the zip data is appended to an +// existing file, such as a binary executable. +// It must be called before any data is written. +func (w *Writer) SetOffset(n int64) { + if w.cw.count != 0 { + panic("zip: SetOffset called after data was written") + } + w.cw.count = n +} + +// Flush flushes any buffered data to the underlying writer. +// Calling Flush is not normally necessary; calling Close is sufficient. +func (w *Writer) Flush() error { + return w.cw.w.(*bufio.Writer).Flush() +} + +// Close finishes writing the zip file by writing the central directory. +// It does not (and can not) close the underlying writer. +func (w *Writer) Close() error { + if w.last != nil && !w.last.closed { + if err := w.last.close(); err != nil { + return err + } + w.last = nil + } + if w.closed { + return errors.New("zip: writer closed twice") + } + w.closed = true + + // write central directory + start := w.cw.count + for _, h := range w.dir { + var buf [directoryHeaderLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(directoryHeaderSignature)) + b.uint16(h.CreatorVersion) + b.uint16(h.ReaderVersion) + b.uint16(h.Flags) + b.uint16(h.Method) + b.uint16(h.ModifiedTime) + b.uint16(h.ModifiedDate) + b.uint32(h.CRC32) + if h.isZip64() || h.offset > uint32max { + // the file needs a zip64 header. store maxint in both + // 32 bit size fields (and offset later) to signal that the + // zip64 extra header should be used. + b.uint32(uint32max) // compressed size + b.uint32(uint32max) // uncompressed size + + // append a zip64 extra block to Extra + var buf [28]byte // 2x uint16 + 3x uint64 + eb := writeBuf(buf[:]) + eb.uint16(zip64ExtraId) + eb.uint16(24) // size = 3x uint64 + eb.uint64(h.UncompressedSize64) + eb.uint64(h.CompressedSize64) + eb.uint64(h.offset) + h.Extra = append(h.Extra, buf[:]...) + } else { + b.uint32(h.CompressedSize) + b.uint32(h.UncompressedSize) + } + b.uint16(uint16(len(h.Name))) + b.uint16(uint16(len(h.Extra))) + b.uint16(uint16(len(h.Comment))) + b = b[4:] // skip disk number start and internal file attr (2x uint16) + b.uint32(h.ExternalAttrs) + if h.offset > uint32max { + b.uint32(uint32max) + } else { + b.uint32(uint32(h.offset)) + } + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + if _, err := io.WriteString(w.cw, h.Name); err != nil { + return err + } + if _, err := w.cw.Write(h.Extra); err != nil { + return err + } + if _, err := io.WriteString(w.cw, h.Comment); err != nil { + return err + } + } + end := w.cw.count + + records := uint64(len(w.dir)) + size := uint64(end - start) + offset := uint64(start) + + if records > uint16max || size > uint32max || offset > uint32max { + var buf [directory64EndLen + directory64LocLen]byte + b := writeBuf(buf[:]) + + // zip64 end of central directory record + b.uint32(directory64EndSignature) + b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64) + b.uint16(zipVersion45) // version made by + b.uint16(zipVersion45) // version needed to extract + b.uint32(0) // number of this disk + b.uint32(0) // number of the disk with the start of the central directory + b.uint64(records) // total number of entries in the central directory on this disk + b.uint64(records) // total number of entries in the central directory + b.uint64(size) // size of the central directory + b.uint64(offset) // offset of start of central directory with respect to the starting disk number + + // zip64 end of central directory locator + b.uint32(directory64LocSignature) + b.uint32(0) // number of the disk with the start of the zip64 end of central directory + b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record + b.uint32(1) // total number of disks + + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + + // store max values in the regular end record to signal that + // that the zip64 values should be used instead + records = uint16max + size = uint32max + offset = uint32max + } + + // write end record + var buf [directoryEndLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(directoryEndSignature)) + b = b[4:] // skip over disk number and first disk number (2x uint16) + b.uint16(uint16(records)) // number of entries this disk + b.uint16(uint16(records)) // number of entries total + b.uint32(uint32(size)) // size of directory + b.uint32(uint32(offset)) // start of directory + // skipped size of comment (always zero) + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + + return w.cw.w.(*bufio.Writer).Flush() +} + +// Create adds a file to the zip file using the provided name. +// It returns a Writer to which the file contents should be written. +// The name must be a relative path: it must not start with a drive +// letter (e.g. C:) or leading slash, and only forward slashes are +// allowed. +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, or Close. +func (w *Writer) Create(name string) (io.Writer, error) { + header := &FileHeader{ + Name: name, + Method: Deflate, + } + return w.CreateHeader(header) +} + +// CreateHeader adds a file to the zip file using the provided FileHeader +// for the file metadata. +// It returns a Writer to which the file contents should be written. +// +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, or Close. The provided FileHeader fh +// must not be modified after a call to CreateHeader. +func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { + if w.last != nil && !w.last.closed { + if err := w.last.close(); err != nil { + return nil, err + } + } + if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { + // See https://golang.org/issue/11144 confusion. + return nil, errors.New("archive/zip: invalid duplicate FileHeader") + } + + fh.Flags |= 0x8 // we will write a data descriptor + + fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte + fh.ReaderVersion = zipVersion20 + + fw := &fileWriter{ + zipw: w.cw, + compCount: &countWriter{w: w.cw}, + crc32: crc32.NewIEEE(), + } + comp := compressor(fh.Method) + if comp == nil { + return nil, ErrAlgorithm + } + var err error + fw.comp, err = comp(fw.compCount) + if err != nil { + return nil, err + } + fw.rawCount = &countWriter{w: fw.comp} + + h := &header{ + FileHeader: fh, + offset: uint64(w.cw.count), + } + w.dir = append(w.dir, h) + fw.header = h + + if err := writeHeader(w.cw, fh); err != nil { + return nil, err + } + + w.last = fw + return fw, nil +} + +func writeHeader(w io.Writer, h *FileHeader) error { + var buf [fileHeaderLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(fileHeaderSignature)) + b.uint16(h.ReaderVersion) + b.uint16(h.Flags) + b.uint16(h.Method) + b.uint16(h.ModifiedTime) + b.uint16(h.ModifiedDate) + b.uint32(0) // since we are writing a data descriptor crc32, + b.uint32(0) // compressed size, + b.uint32(0) // and uncompressed size should be zero + b.uint16(uint16(len(h.Name))) + b.uint16(uint16(len(h.Extra))) + if _, err := w.Write(buf[:]); err != nil { + return err + } + if _, err := io.WriteString(w, h.Name); err != nil { + return err + } + _, err := w.Write(h.Extra) + return err +} + +type fileWriter struct { + *header + zipw io.Writer + rawCount *countWriter + comp io.WriteCloser + compCount *countWriter + crc32 hash.Hash32 + closed bool +} + +func (w *fileWriter) Write(p []byte) (int, error) { + if w.closed { + return 0, errors.New("zip: write to closed file") + } + w.crc32.Write(p) + return w.rawCount.Write(p) +} + +func (w *fileWriter) close() error { + if w.closed { + return errors.New("zip: file closed twice") + } + w.closed = true + if err := w.comp.Close(); err != nil { + return err + } + + // update FileHeader + fh := w.header.FileHeader + fh.CRC32 = w.crc32.Sum32() + fh.CompressedSize64 = uint64(w.compCount.count) + fh.UncompressedSize64 = uint64(w.rawCount.count) + + if fh.isZip64() { + fh.CompressedSize = uint32max + fh.UncompressedSize = uint32max + fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions + } else { + fh.CompressedSize = uint32(fh.CompressedSize64) + fh.UncompressedSize = uint32(fh.UncompressedSize64) + } + + // Write data descriptor. This is more complicated than one would + // think, see e.g. comments in zipfile.c:putextended() and + // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588. + // The approach here is to write 8 byte sizes if needed without + // adding a zip64 extra in the local header (too late anyway). + var buf []byte + if fh.isZip64() { + buf = make([]byte, dataDescriptor64Len) + } else { + buf = make([]byte, dataDescriptorLen) + } + b := writeBuf(buf) + b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X + b.uint32(fh.CRC32) + if fh.isZip64() { + b.uint64(fh.CompressedSize64) + b.uint64(fh.UncompressedSize64) + } else { + b.uint32(fh.CompressedSize) + b.uint32(fh.UncompressedSize) + } + _, err := w.zipw.Write(buf) + return err +} + +type countWriter struct { + w io.Writer + count int64 +} + +func (w *countWriter) Write(p []byte) (int, error) { + n, err := w.w.Write(p) + w.count += int64(n) + return n, err +} + +type nopCloser struct { + io.Writer +} + +func (w nopCloser) Close() error { + return nil +} + +type writeBuf []byte + +func (b *writeBuf) uint16(v uint16) { + binary.LittleEndian.PutUint16(*b, v) + *b = (*b)[2:] +} + +func (b *writeBuf) uint32(v uint32) { + binary.LittleEndian.PutUint32(*b, v) + *b = (*b)[4:] +} + +func (b *writeBuf) uint64(v uint64) { + binary.LittleEndian.PutUint64(*b, v) + *b = (*b)[8:] +} diff --git a/writer_test.go b/writer_test.go new file mode 100644 index 0000000..01b63f2 --- /dev/null +++ b/writer_test.go @@ -0,0 +1,199 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "io" + "io/ioutil" + "math/rand" + "os" + "testing" +) + +// TODO(adg): a more sophisticated test suite + +type WriteTest struct { + Name string + Data []byte + Method uint16 + Mode os.FileMode +} + +var writeTests = []WriteTest{ + { + Name: "foo", + Data: []byte("Rabbits, guinea pigs, gophers, marsupial rats, and quolls."), + Method: Store, + Mode: 0666, + }, + { + Name: "bar", + Data: nil, // large data set in the test + Method: Deflate, + Mode: 0644, + }, + { + Name: "setuid", + Data: []byte("setuid file"), + Method: Deflate, + Mode: 0755 | os.ModeSetuid, + }, + { + Name: "setgid", + Data: []byte("setgid file"), + Method: Deflate, + Mode: 0755 | os.ModeSetgid, + }, + { + Name: "symlink", + Data: []byte("../link/target"), + Method: Deflate, + Mode: 0755 | os.ModeSymlink, + }, +} + +func TestWriter(t *testing.T) { + largeData := make([]byte, 1<<17) + for i := range largeData { + largeData[i] = byte(rand.Int()) + } + writeTests[1].Data = largeData + defer func() { + writeTests[1].Data = nil + }() + + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + + for _, wt := range writeTests { + testCreate(t, w, &wt) + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, wt := range writeTests { + testReadFile(t, r.File[i], &wt) + } +} + +func TestWriterOffset(t *testing.T) { + largeData := make([]byte, 1<<17) + for i := range largeData { + largeData[i] = byte(rand.Int()) + } + writeTests[1].Data = largeData + defer func() { + writeTests[1].Data = nil + }() + + // write a zip file + buf := new(bytes.Buffer) + existingData := []byte{1, 2, 3, 1, 2, 3, 1, 2, 3} + n, _ := buf.Write(existingData) + w := NewWriter(buf) + w.SetOffset(int64(n)) + + for _, wt := range writeTests { + testCreate(t, w, &wt) + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, wt := range writeTests { + testReadFile(t, r.File[i], &wt) + } +} + +func TestWriterFlush(t *testing.T) { + var buf bytes.Buffer + w := NewWriter(struct{ io.Writer }{&buf}) + _, err := w.Create("foo") + if err != nil { + t.Fatal(err) + } + if buf.Len() > 0 { + t.Fatalf("Unexpected %d bytes already in buffer", buf.Len()) + } + if err := w.Flush(); err != nil { + t.Fatal(err) + } + if buf.Len() == 0 { + t.Fatal("No bytes written after Flush") + } +} + +func testCreate(t *testing.T, w *Writer, wt *WriteTest) { + header := &FileHeader{ + Name: wt.Name, + Method: wt.Method, + } + if wt.Mode != 0 { + header.SetMode(wt.Mode) + } + f, err := w.CreateHeader(header) + if err != nil { + t.Fatal(err) + } + _, err = f.Write(wt.Data) + if err != nil { + t.Fatal(err) + } +} + +func testReadFile(t *testing.T, f *File, wt *WriteTest) { + if f.Name != wt.Name { + t.Fatalf("File name: got %q, want %q", f.Name, wt.Name) + } + testFileMode(t, wt.Name, f, wt.Mode) + rc, err := f.Open() + if err != nil { + t.Fatal("opening:", err) + } + b, err := ioutil.ReadAll(rc) + if err != nil { + t.Fatal("reading:", err) + } + err = rc.Close() + if err != nil { + t.Fatal("closing:", err) + } + if !bytes.Equal(b, wt.Data) { + t.Errorf("File contents %q, want %q", b, wt.Data) + } +} + +func BenchmarkCompressedZipGarbage(b *testing.B) { + b.ReportAllocs() + var buf bytes.Buffer + bigBuf := bytes.Repeat([]byte("a"), 1<<20) + for i := 0; i < b.N; i++ { + buf.Reset() + zw := NewWriter(&buf) + for j := 0; j < 3; j++ { + w, _ := zw.CreateHeader(&FileHeader{ + Name: "foo", + Method: Deflate, + }) + w.Write(bigBuf) + } + zw.Close() + } +} diff --git a/zip_test.go b/zip_test.go new file mode 100644 index 0000000..f00ff47 --- /dev/null +++ b/zip_test.go @@ -0,0 +1,427 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tests that involve both reading and writing. + +package zip + +import ( + "bytes" + "fmt" + "hash" + "io" + "io/ioutil" + "sort" + "strings" + "testing" + "time" +) + +func TestOver65kFiles(t *testing.T) { + buf := new(bytes.Buffer) + w := NewWriter(buf) + const nFiles = (1 << 16) + 42 + for i := 0; i < nFiles; i++ { + _, err := w.CreateHeader(&FileHeader{ + Name: fmt.Sprintf("%d.dat", i), + Method: Store, // avoid Issue 6136 and Issue 6138 + }) + if err != nil { + t.Fatalf("creating file %d: %v", i, err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + s := buf.String() + zr, err := NewReader(strings.NewReader(s), int64(len(s))) + if err != nil { + t.Fatalf("NewReader: %v", err) + } + if got := len(zr.File); got != nFiles { + t.Fatalf("File contains %d files, want %d", got, nFiles) + } + for i := 0; i < nFiles; i++ { + want := fmt.Sprintf("%d.dat", i) + if zr.File[i].Name != want { + t.Fatalf("File(%d) = %q, want %q", i, zr.File[i].Name, want) + } + } +} + +func TestModTime(t *testing.T) { + var testTime = time.Date(2009, time.November, 10, 23, 45, 58, 0, time.UTC) + fh := new(FileHeader) + fh.SetModTime(testTime) + outTime := fh.ModTime() + if !outTime.Equal(testTime) { + t.Errorf("times don't match: got %s, want %s", outTime, testTime) + } +} + +func testHeaderRoundTrip(fh *FileHeader, wantUncompressedSize uint32, wantUncompressedSize64 uint64, t *testing.T) { + fi := fh.FileInfo() + fh2, err := FileInfoHeader(fi) + if err != nil { + t.Fatal(err) + } + if got, want := fh2.Name, fh.Name; got != want { + t.Errorf("Name: got %s, want %s\n", got, want) + } + if got, want := fh2.UncompressedSize, wantUncompressedSize; got != want { + t.Errorf("UncompressedSize: got %d, want %d\n", got, want) + } + if got, want := fh2.UncompressedSize64, wantUncompressedSize64; got != want { + t.Errorf("UncompressedSize64: got %d, want %d\n", got, want) + } + if got, want := fh2.ModifiedTime, fh.ModifiedTime; got != want { + t.Errorf("ModifiedTime: got %d, want %d\n", got, want) + } + if got, want := fh2.ModifiedDate, fh.ModifiedDate; got != want { + t.Errorf("ModifiedDate: got %d, want %d\n", got, want) + } + + if sysfh, ok := fi.Sys().(*FileHeader); !ok && sysfh != fh { + t.Errorf("Sys didn't return original *FileHeader") + } +} + +func TestFileHeaderRoundTrip(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize: 987654321, + ModifiedTime: 1234, + ModifiedDate: 5678, + } + testHeaderRoundTrip(fh, fh.UncompressedSize, uint64(fh.UncompressedSize), t) +} + +func TestFileHeaderRoundTrip64(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize64: 9876543210, + ModifiedTime: 1234, + ModifiedDate: 5678, + } + testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t) +} + +type repeatedByte struct { + off int64 + b byte + n int64 +} + +// rleBuffer is a run-length-encoded byte buffer. +// It's an io.Writer (like a bytes.Buffer) and also an io.ReaderAt, +// allowing random-access reads. +type rleBuffer struct { + buf []repeatedByte +} + +func (r *rleBuffer) Size() int64 { + if len(r.buf) == 0 { + return 0 + } + last := &r.buf[len(r.buf)-1] + return last.off + last.n +} + +func (r *rleBuffer) Write(p []byte) (n int, err error) { + var rp *repeatedByte + if len(r.buf) > 0 { + rp = &r.buf[len(r.buf)-1] + // Fast path, if p is entirely the same byte repeated. + if lastByte := rp.b; len(p) > 0 && p[0] == lastByte { + all := true + for _, b := range p { + if b != lastByte { + all = false + break + } + } + if all { + rp.n += int64(len(p)) + return len(p), nil + } + } + } + + for _, b := range p { + if rp == nil || rp.b != b { + r.buf = append(r.buf, repeatedByte{r.Size(), b, 1}) + rp = &r.buf[len(r.buf)-1] + } else { + rp.n++ + } + } + return len(p), nil +} + +func (r *rleBuffer) ReadAt(p []byte, off int64) (n int, err error) { + if len(p) == 0 { + return + } + skipParts := sort.Search(len(r.buf), func(i int) bool { + part := &r.buf[i] + return part.off+part.n > off + }) + parts := r.buf[skipParts:] + if len(parts) > 0 { + skipBytes := off - parts[0].off + for len(parts) > 0 { + part := parts[0] + for i := skipBytes; i < part.n; i++ { + if n == len(p) { + return + } + p[n] = part.b + n++ + } + parts = parts[1:] + skipBytes = 0 + } + } + if n != len(p) { + err = io.ErrUnexpectedEOF + } + return +} + +// Just testing the rleBuffer used in the Zip64 test above. Not used by the zip code. +func TestRLEBuffer(t *testing.T) { + b := new(rleBuffer) + var all []byte + writes := []string{"abcdeee", "eeeeeee", "eeeefghaaiii"} + for _, w := range writes { + b.Write([]byte(w)) + all = append(all, w...) + } + if len(b.buf) != 10 { + t.Fatalf("len(b.buf) = %d; want 10", len(b.buf)) + } + + for i := 0; i < len(all); i++ { + for j := 0; j < len(all)-i; j++ { + buf := make([]byte, j) + n, err := b.ReadAt(buf, int64(i)) + if err != nil || n != len(buf) { + t.Errorf("ReadAt(%d, %d) = %d, %v; want %d, nil", i, j, n, err, len(buf)) + } + if !bytes.Equal(buf, all[i:i+j]) { + t.Errorf("ReadAt(%d, %d) = %q; want %q", i, j, buf, all[i:i+j]) + } + } + } +} + +// fakeHash32 is a dummy Hash32 that always returns 0. +type fakeHash32 struct { + hash.Hash32 +} + +func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil } +func (fakeHash32) Sum32() uint32 { return 0 } + +func TestZip64(t *testing.T) { + if testing.Short() { + t.Skip("slow test; skipping") + } + const size = 1 << 32 // before the "END\n" part + buf := testZip64(t, size) + testZip64DirectoryRecordLength(buf, t) +} + +func testZip64(t testing.TB, size int64) *rleBuffer { + const chunkSize = 1024 + chunks := int(size / chunkSize) + // write 2^32 bytes plus "END\n" to a zip file + buf := new(rleBuffer) + w := NewWriter(buf) + f, err := w.CreateHeader(&FileHeader{ + Name: "huge.txt", + Method: Store, + }) + if err != nil { + t.Fatal(err) + } + f.(*fileWriter).crc32 = fakeHash32{} + chunk := make([]byte, chunkSize) + for i := range chunk { + chunk[i] = '.' + } + for i := 0; i < chunks; i++ { + _, err := f.Write(chunk) + if err != nil { + t.Fatal("write chunk:", err) + } + } + end := []byte("END\n") + _, err = f.Write(end) + if err != nil { + t.Fatal("write end:", err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read back zip file and check that we get to the end of it + r, err := NewReader(buf, int64(buf.Size())) + if err != nil { + t.Fatal("reader:", err) + } + f0 := r.File[0] + rc, err := f0.Open() + if err != nil { + t.Fatal("opening:", err) + } + rc.(*checksumReader).hash = fakeHash32{} + for i := 0; i < chunks; i++ { + _, err := io.ReadFull(rc, chunk) + if err != nil { + t.Fatal("read:", err) + } + } + gotEnd, err := ioutil.ReadAll(rc) + if err != nil { + t.Fatal("read end:", err) + } + if !bytes.Equal(gotEnd, end) { + t.Errorf("End of zip64 archive %q, want %q", gotEnd, end) + } + err = rc.Close() + if err != nil { + t.Fatal("closing:", err) + } + if size == 1<<32 { + if got, want := f0.UncompressedSize, uint32(uint32max); got != want { + t.Errorf("UncompressedSize %d, want %d", got, want) + } + } + + if got, want := f0.UncompressedSize64, uint64(size)+uint64(len(end)); got != want { + t.Errorf("UncompressedSize64 %d, want %d", got, want) + } + + return buf +} + +// Issue 9857 +func testZip64DirectoryRecordLength(buf *rleBuffer, t *testing.T) { + d := make([]byte, 1024) + if _, err := buf.ReadAt(d, buf.Size()-int64(len(d))); err != nil { + t.Fatal("read:", err) + } + + sigOff := findSignatureInBlock(d) + dirOff, err := findDirectory64End(buf, buf.Size()-int64(len(d))+int64(sigOff)) + if err != nil { + t.Fatal("findDirectory64End:", err) + } + + d = make([]byte, directory64EndLen) + if _, err := buf.ReadAt(d, dirOff); err != nil { + t.Fatal("read:", err) + } + + b := readBuf(d) + if sig := b.uint32(); sig != directory64EndSignature { + t.Fatalf("Expected directory64EndSignature (%d), got %d", directory64EndSignature, sig) + } + + size := b.uint64() + if size != directory64EndLen-12 { + t.Fatalf("Expected length of %d, got %d", directory64EndLen-12, size) + } +} + +func testInvalidHeader(h *FileHeader, t *testing.T) { + var buf bytes.Buffer + z := NewWriter(&buf) + + f, err := z.CreateHeader(h) + if err != nil { + t.Fatalf("error creating header: %v", err) + } + if _, err := f.Write([]byte("hi")); err != nil { + t.Fatalf("error writing content: %v", err) + } + if err := z.Close(); err != nil { + t.Fatalf("error closing zip writer: %v", err) + } + + b := buf.Bytes() + if _, err = NewReader(bytes.NewReader(b), int64(len(b))); err != ErrFormat { + t.Fatalf("got %v, expected ErrFormat", err) + } +} + +func testValidHeader(h *FileHeader, t *testing.T) { + var buf bytes.Buffer + z := NewWriter(&buf) + + f, err := z.CreateHeader(h) + if err != nil { + t.Fatalf("error creating header: %v", err) + } + if _, err := f.Write([]byte("hi")); err != nil { + t.Fatalf("error writing content: %v", err) + } + if err := z.Close(); err != nil { + t.Fatalf("error closing zip writer: %v", err) + } + + b := buf.Bytes() + if _, err = NewReader(bytes.NewReader(b), int64(len(b))); err != nil { + t.Fatalf("got %v, expected nil", err) + } +} + +// Issue 4302. +func TestHeaderInvalidTagAndSize(t *testing.T) { + const timeFormat = "20060102T150405.000.txt" + + ts := time.Now() + filename := ts.Format(timeFormat) + + h := FileHeader{ + Name: filename, + Method: Deflate, + Extra: []byte(ts.Format(time.RFC3339Nano)), // missing tag and len + } + h.SetModTime(ts) + + testInvalidHeader(&h, t) +} + +func TestHeaderTooShort(t *testing.T) { + h := FileHeader{ + Name: "foo.txt", + Method: Deflate, + Extra: []byte{zip64ExtraId}, // missing size + } + testInvalidHeader(&h, t) +} + +// Issue 4393. It is valid to have an extra data header +// which contains no body. +func TestZeroLengthHeader(t *testing.T) { + h := FileHeader{ + Name: "extadata.txt", + Method: Deflate, + Extra: []byte{ + 85, 84, 5, 0, 3, 154, 144, 195, 77, // tag 21589 size 5 + 85, 120, 0, 0, // tag 30805 size 0 + }, + } + testValidHeader(&h, t) +} + +// Just benchmarking how fast the Zip64 test above is. Not related to +// our zip performance, since the test above disabled CRC32 and flate. +func BenchmarkZip64Test(b *testing.B) { + for i := 0; i < b.N; i++ { + testZip64(b, 1<<26) + } +}