diff options
| -rw-r--r-- | decompress.go | 79 | ||||
| -rw-r--r-- | go.mod | 5 | ||||
| -rw-r--r-- | go.sum | 2 | ||||
| -rw-r--r-- | main.go | 304 | ||||
| -rw-r--r-- | reader.go | 898 | ||||
| -rw-r--r-- | struct.go | 392 | ||||
| -rw-r--r-- | unicode.go | 28 |
7 files changed, 1635 insertions, 73 deletions
diff --git a/decompress.go b/decompress.go new file mode 100644 index 0000000..0d12659 --- /dev/null +++ b/decompress.go @@ -0,0 +1,79 @@ +package main + +import ( + "errors" + "io" + "sync" + + "github.com/klauspost/compress/flate" +) + +// A Decompressor returns a new decompressing reader, reading from r. +// The ReadCloser's Close method must be used to release associated resources. +// The Decompressor itself must be safe to invoke from multiple goroutines +// simultaneously, but each returned reader will be used only by +// one goroutine at a time. +type Decompressor func(r io.Reader) io.ReadCloser + +var flateReaderPool sync.Pool + +func newFlateReader(r io.Reader) io.ReadCloser { + fr, ok := flateReaderPool.Get().(io.ReadCloser) + if ok { + fr.(flate.Resetter).Reset(r, nil) + } else { + fr = flate.NewReader(r) + } + return &pooledFlateReader{fr: fr} +} + +type pooledFlateReader struct { + mu sync.Mutex // guards Close and Read + fr io.ReadCloser +} + +func (r *pooledFlateReader) Read(p []byte) (n int, err error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.fr == nil { + return 0, errors.New("Read after Close") + } + return r.fr.Read(p) +} + +func (r *pooledFlateReader) Close() error { + r.mu.Lock() + defer r.mu.Unlock() + var err error + if r.fr != nil { + err = r.fr.Close() + flateReaderPool.Put(r.fr) + r.fr = nil + } + return err +} + +var ( + decompressors sync.Map // map[uint16]Decompressor +) + +func init() { + decompressors.Store(Store, Decompressor(io.NopCloser)) + decompressors.Store(Deflate, Decompressor(newFlateReader)) +} + +// RegisterDecompressor allows custom decompressors for a specified method ID. +// The common methods Store and Deflate are built in. +func RegisterDecompressor(method uint16, dcomp Decompressor) { + if _, dup := decompressors.LoadOrStore(method, dcomp); dup { + panic("decompressor already registered") + } +} + +func decompressor(method uint16) Decompressor { + di, ok := decompressors.Load(method) + if !ok { + return nil + } + return di.(Decompressor) +} @@ -2,4 +2,7 @@ module go.metala.org/zipreads go 1.21.5 -require github.com/spf13/pflag v1.0.5 +require ( + github.com/klauspost/compress v1.17.7 + github.com/spf13/pflag v1.0.5 +) @@ -1,2 +1,4 @@ +github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg= +github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -1,6 +1,10 @@ package main import ( + "bytes" + "encoding/json" + "errors" + "io" "log" "os" "time" @@ -8,84 +12,240 @@ import ( flag "github.com/spf13/pflag" ) -// Compression methods. -const ( - Store uint16 = 0 // no compression - Deflate uint16 = 8 // DEFLATE compressed -) - -const ( - fileHeaderSignature = 0x04034b50 - fileHeaderLen = 30 // + filename + extra -) - -type FileHeader struct { - // Name is the name of the file. - // - // It must be a relative path, not start with a drive letter (such as "C:"), - // and must use forward slashes instead of back slashes. A trailing slash - // indicates that this file is a directory and should have no data. - // - // When reading zip files, the Name field is populated from - // the zip file directly and is not validated for correctness. - // It is the caller's responsibility to sanitize it as - // appropriate, including canonicalizing slash directions, - // validating that paths are relative, and preventing path - // traversal through filenames ("../../../"). - Name string - - // Comment is any arbitrary user-defined string shorter than 64KiB. - Comment string - - // NonUTF8 indicates that Name and Comment are not encoded in UTF-8. - // - // By specification, the only other encoding permitted should be CP-437, - // but historically many ZIP readers interpret Name and Comment as whatever - // the system's local character encoding happens to be. - // - // This flag should only be set if the user intends to encode a non-portable - // ZIP file for a specific localized region. Otherwise, the Writer - // automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings. - NonUTF8 bool - - CreatorVersion uint16 - ReaderVersion uint16 - Flags uint16 - - // Method is the compression method. If zero, Store is used. - Method uint16 - - // Modified is the modified time of the file. - // - // When reading, an extended timestamp is preferred over the legacy MS-DOS - // date field, and the offset between the times is used as the timezone. - // If only the MS-DOS date is present, the timezone is assumed to be UTC. - // - // When writing, an extended timestamp (which is timezone-agnostic) is - // always emitted. The legacy MS-DOS date field is encoded according to the - // location of the Modified time. - Modified time.Time - ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead. - ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead. - - CRC32 uint32 - CompressedSize uint32 // Deprecated: Use CompressedSize64 instead. - UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead. - CompressedSize64 uint64 - UncompressedSize64 uint64 - Extra []byte - ExternalAttrs uint32 // Meaning depends on CreatorVersion -} - func main() { - var filename string - flag.StringVarP(&filename, "filename", "f", "", "filename") + var zipFilename string + flag.StringVarP(&zipFilename, "filename", "f", "", "filename") flag.Parse() - f, err := os.Open(filename) + f, err := os.Open(zipFilename) if err != nil { log.Fatal(err) } defer f.Close() + for { + var sig [4]byte + if _, err = io.ReadFull(f, sig[:]); err != nil { + log.Fatal("failed to read signature: ", err) + } + + sigb := readBuf(sig[:]) + signature := sigb.uint32() + + switch signature { + case fileHeaderSignature: + var hdr File + if err := parseFileHeader(&hdr, f); err != nil { + pos, _ := f.Seek(0, io.SeekCurrent) + log.Fatalf("failed to read file header at %d: %s", pos, err) + } + j, _ := json.MarshalIndent(hdr, "", " ") + log.Printf("File: %s", string(j)) + if pos, err := f.Seek(int64(hdr.CompressedSize), io.SeekCurrent); err != nil { + log.Fatalf("failed to seek to next header, stopped at %d: %s", pos, err) + } + case directoryHeaderSignature: + var hdr File + if err = readDirectoryHeader(&hdr, f); err != nil { + pos, _ := f.Seek(0, io.SeekCurrent) + log.Fatalf("failed to read directory header at %d: %s", pos, err) + } + j, _ := json.MarshalIndent(hdr, "", " ") + log.Printf("Directory: %s", string(j)) + case directoryEndSignature: + var hdr directoryEnd + if err := parseDirectoryEnd(&hdr, f); err != nil { + pos, _ := f.Seek(0, io.SeekCurrent) + log.Fatalf("failed to read directory end at %d: %s", pos, err) + } + j, _ := json.MarshalIndent(hdr, "", " ") + log.Printf("Directory End: %s", string(j)) + default: + pos, _ := f.Seek(0, io.SeekCurrent) + log.Fatalf("invalid header signature at %d: got %08x", pos, signature) + } + } +} + +func parseFileHeader(f *File, r io.Reader) error { + var hdr [fileHeaderLen]byte + if _, err := io.ReadFull(r, hdr[:]); err != nil { + return err + } + b := readBuf(hdr[:]) + f.CreatorVersion = b.uint16() + f.Flags = b.uint16() + f.Method = b.uint16() + f.ModifiedTime = b.uint16() + f.ModifiedDate = b.uint16() + f.CRC32 = b.uint32() + f.CompressedSize = b.uint32() + f.UncompressedSize = b.uint32() + + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + filename := make([]byte, filenameLen) + if _, err := io.ReadFull(r, filename); err != nil { + return err + } + f.Name = string(filename) + f.Extra = make([]byte, extraLen) + if _, err := io.ReadFull(r, f.Extra); err != nil { + return err + } + + needUSize := f.UncompressedSize == ^uint32(0) + needCSize := f.CompressedSize == ^uint32(0) + needHeaderOffset := f.headerOffset == int64(^uint32(0)) + + // Best effort to find what we need. + // Other zip authors might not even follow the basic format, + // and we'll just ignore the Extra content in that case. + var modified time.Time +parseExtras: + for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size + fieldTag := extra.uint16() + fieldSize := int(extra.uint16()) + if len(extra) < fieldSize { + break + } + fieldBuf := extra.sub(fieldSize) + + switch fieldTag { + case zip64ExtraID: + f.zip64 = true + + // update directory values from the zip64 extra block. + // They should only be consulted if the sizes read earlier + // are maxed out. + // See golang.org/issue/13367. + if needUSize { + needUSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.UncompressedSize64 = fieldBuf.uint64() + } + if needCSize { + needCSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.CompressedSize64 = fieldBuf.uint64() + } + if needHeaderOffset { + needHeaderOffset = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.headerOffset = int64(fieldBuf.uint64()) + } + case ntfsExtraID: + if len(fieldBuf) < 4 { + continue parseExtras + } + fieldBuf.uint32() // reserved (ignored) + for len(fieldBuf) >= 4 { // need at least tag and size + attrTag := fieldBuf.uint16() + attrSize := int(fieldBuf.uint16()) + if len(fieldBuf) < attrSize { + continue parseExtras + } + attrBuf := fieldBuf.sub(attrSize) + if attrTag != 1 || attrSize != 24 { + continue // Ignore irrelevant attributes + } + + const ticksPerSecond = 1e7 // Windows timestamp resolution + ts := int64(attrBuf.uint64()) // ModTime since Windows epoch + secs := int64(ts / ticksPerSecond) + nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) + epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) + modified = time.Unix(epoch.Unix()+secs, nsecs) + } + case unixExtraID, infoZipUnixExtraID: + if len(fieldBuf) < 8 { + continue parseExtras + } + fieldBuf.uint32() // AcTime (ignored) + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + case extTimeExtraID: + if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { + continue parseExtras + } + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + } + } + + msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) + f.Modified = msdosModified + if !modified.IsZero() { + f.Modified = modified.UTC() + + // If legacy MS-DOS timestamps are set, we can use the delta between + // the legacy and extended versions to estimate timezone offset. + // + // A non-UTC timezone is always used (even if offset is zero). + // Thus, FileHeader.Modified.Location() == time.UTC is useful for + // determining whether extended timestamps are present. + // This is necessary for users that need to do additional time + // calculations when dealing with legacy ZIP formats. + if f.ModifiedTime != 0 || f.ModifiedDate != 0 { + f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) + } + } + + return nil +} + +func parseDirectoryEnd(d *directoryEnd, r io.Reader) error { + var hdr [directoryEndLen]byte + if _, err := io.ReadFull(r, hdr[:]); err != nil { + return err + } + b := readBuf(hdr[:]) + d.DiskNbr = uint32(b.uint16()) + d.DirDiskNbr = uint32(b.uint16()) + d.DirRecordsThisDisk = uint64(b.uint16()) + d.DirectoryRecords = uint64(b.uint16()) + d.DirectorySize = uint64(b.uint32()) + d.DirectoryOffset = uint64(b.uint32()) + d.CommentLen = b.uint16() + l := int(d.CommentLen) + if l > len(b) { + return errors.New("zip: invalid comment length") + } + comment := make([]byte, d.CommentLen) + if _, err := io.ReadFull(r, comment); err != nil { + return err + } + d.Comment = string(comment) + + return nil +} + +const searchChunkSize = 4096 + +func find(r io.Reader, search []byte) (int64, error) { + var offset int64 + tailLen := len(search) - 1 + chunk := make([]byte, searchChunkSize+tailLen) + n, err := r.Read(chunk[tailLen:]) + idx := bytes.Index(chunk[tailLen:n+tailLen], search) + for { + if idx >= 0 { + return offset + int64(idx), nil + } + if err == io.EOF { + return -1, nil + } else if err != nil { + return -1, err + } + copy(chunk, chunk[searchChunkSize:]) + offset += searchChunkSize + n, err = r.Read(chunk[tailLen:]) + idx = bytes.Index(chunk[:n+tailLen], search) + } } diff --git a/reader.go b/reader.go new file mode 100644 index 0000000..5ee2d3d --- /dev/null +++ b/reader.go @@ -0,0 +1,898 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bufio" + "encoding/binary" + "errors" + "hash" + "hash/crc32" + "io" + "io/fs" + "os" + "path" + "sort" + "strings" + "sync" + "time" +) + +var ( + ErrFormat = errors.New("zip: not a valid zip file") + ErrAlgorithm = errors.New("zip: unsupported compression algorithm") + ErrChecksum = errors.New("zip: checksum error") +) + +// A Reader serves content from a ZIP archive. +type Reader struct { + r io.ReaderAt + File []*File + Comment string + decompressors map[uint16]Decompressor + + // Some JAR files are zip files with a prefix that is a bash script. + // The baseOffset field is the start of the zip file proper. + baseOffset int64 + + // fileList is a list of files sorted by ename, + // for use by the Open method. + fileListOnce sync.Once + fileList []fileListEntry +} + +// A ReadCloser is a Reader that must be closed when no longer needed. +type ReadCloser struct { + f *os.File + Reader +} + +// A File is a single file in a ZIP archive. +// The file information is in the embedded FileHeader. +// The file content can be accessed by calling Open. +type File struct { + FileHeader + zip *Reader + zipr io.ReaderAt + headerOffset int64 // includes overall ZIP archive baseOffset + zip64 bool // zip64 extended information extra field presence +} + +// OpenReader will open the Zip file specified by name and return a ReadCloser. +func OpenReader(name string) (*ReadCloser, error) { + f, err := os.Open(name) + if err != nil { + return nil, err + } + fi, err := f.Stat() + if err != nil { + f.Close() + return nil, err + } + r := new(ReadCloser) + if err := r.init(f, fi.Size()); err != nil { + f.Close() + return nil, err + } + r.f = f + return r, nil +} + +// NewReader returns a new Reader reading from r, which is assumed to +// have the given size in bytes. +func NewReader(r io.ReaderAt, size int64) (*Reader, error) { + if size < 0 { + return nil, errors.New("zip: size cannot be negative") + } + zr := new(Reader) + if err := zr.init(r, size); err != nil { + return nil, err + } + return zr, nil +} + +func (z *Reader) init(r io.ReaderAt, size int64) error { + end, baseOffset, err := readDirectoryEnd(r, size) + if err != nil { + return err + } + z.r = r + z.baseOffset = baseOffset + // Since the number of directory records is not validated, it is not + // safe to preallocate z.File without first checking that the specified + // number of files is reasonable, since a malformed archive may + // indicate it contains up to 1 << 128 - 1 files. Since each file has a + // header which will be _at least_ 30 bytes we can safely preallocate + // if (data size / 30) >= end.directoryRecords. + if end.DirectorySize < uint64(size) && (uint64(size)-end.DirectorySize)/30 >= end.DirectoryRecords { + z.File = make([]*File, 0, end.DirectoryRecords) + } + z.Comment = end.Comment + rs := io.NewSectionReader(r, 0, size) + if _, err = rs.Seek(z.baseOffset+int64(end.DirectoryOffset), io.SeekStart); err != nil { + return err + } + buf := bufio.NewReader(rs) + + // The count of files inside a zip is truncated to fit in a uint16. + // Gloss over this by reading headers until we encounter + // a bad one, and then only report an ErrFormat or UnexpectedEOF if + // the file count modulo 65536 is incorrect. + for { + f := &File{zip: z, zipr: r} + err = readDirectoryHeader(f, buf) + + // For compatibility with other zip programs, + // if we have a non-zero base offset and can't read + // the first directory header, try again with a zero + // base offset. + if err == ErrFormat && z.baseOffset != 0 && len(z.File) == 0 { + z.baseOffset = 0 + if _, err = rs.Seek(int64(end.DirectoryOffset), io.SeekStart); err != nil { + return err + } + buf.Reset(rs) + continue + } + + if err == ErrFormat || err == io.ErrUnexpectedEOF { + break + } + if err != nil { + return err + } + f.headerOffset += z.baseOffset + z.File = append(z.File, f) + } + if uint16(len(z.File)) != uint16(end.DirectoryRecords) { // only compare 16 bits here + // Return the readDirectoryHeader error if we read + // the wrong number of directory entries. + return err + } + return nil +} + +// RegisterDecompressor registers or overrides a custom decompressor for a +// specific method ID. If a decompressor for a given method is not found, +// Reader will default to looking up the decompressor at the package level. +func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { + if z.decompressors == nil { + z.decompressors = make(map[uint16]Decompressor) + } + z.decompressors[method] = dcomp +} + +func (z *Reader) decompressor(method uint16) Decompressor { + dcomp := z.decompressors[method] + if dcomp == nil { + dcomp = decompressor(method) + } + return dcomp +} + +// Close closes the Zip file, rendering it unusable for I/O. +func (rc *ReadCloser) Close() error { + return rc.f.Close() +} + +// DataOffset returns the offset of the file's possibly-compressed +// data, relative to the beginning of the zip file. +// +// Most callers should instead use Open, which transparently +// decompresses data and verifies checksums. +func (f *File) DataOffset() (offset int64, err error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return + } + return f.headerOffset + bodyOffset, nil +} + +// Open returns a ReadCloser that provides access to the File's contents. +// Multiple files may be read concurrently. +func (f *File) Open() (io.ReadCloser, error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return nil, err + } + size := int64(f.CompressedSize64) + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) + dcomp := f.zip.decompressor(f.Method) + if dcomp == nil { + return nil, ErrAlgorithm + } + var rc io.ReadCloser = dcomp(r) + var desr io.Reader + if f.hasDataDescriptor() { + desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) + } + rc = &checksumReader{ + rc: rc, + hash: crc32.NewIEEE(), + f: f, + desr: desr, + } + return rc, nil +} + +// OpenRaw returns a Reader that provides access to the File's contents without +// decompression. +func (f *File) OpenRaw() (io.Reader, error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return nil, err + } + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64)) + return r, nil +} + +type checksumReader struct { + rc io.ReadCloser + hash hash.Hash32 + nread uint64 // number of bytes read so far + f *File + desr io.Reader // if non-nil, where to read the data descriptor + err error // sticky error +} + +func (r *checksumReader) Stat() (fs.FileInfo, error) { + return headerFileInfo{&r.f.FileHeader}, nil +} + +func (r *checksumReader) Read(b []byte) (n int, err error) { + if r.err != nil { + return 0, r.err + } + n, err = r.rc.Read(b) + r.hash.Write(b[:n]) + r.nread += uint64(n) + if r.nread > r.f.UncompressedSize64 { + return 0, ErrFormat + } + if err == nil { + return + } + if err == io.EOF { + if r.nread != r.f.UncompressedSize64 { + return 0, io.ErrUnexpectedEOF + } + if r.desr != nil { + if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { + if err1 == io.EOF { + err = io.ErrUnexpectedEOF + } else { + err = err1 + } + } else if r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } else { + // If there's not a data descriptor, we still compare + // the CRC32 of what we've read against the file header + // or TOC's CRC32, if it seems like it was set. + if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } + } + r.err = err + return +} + +func (r *checksumReader) Close() error { return r.rc.Close() } + +// findBodyOffset does the minimum work to verify the file has a header +// and returns the file body offset. +func (f *File) findBodyOffset() (int64, error) { + var buf [fileHeaderLen]byte + if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { + return 0, err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != fileHeaderSignature { + return 0, ErrFormat + } + b = b[22:] // skip over most of the header + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + return int64(fileHeaderLen + filenameLen + extraLen), nil +} + +// readDirectoryHeader attempts to read a directory header from r. +// It returns io.ErrUnexpectedEOF if it cannot read a complete header, +// and ErrFormat if it doesn't find a valid header signature. +func readDirectoryHeader(f *File, r io.Reader) error { + var buf [directoryHeaderLen]byte + if _, err := io.ReadFull(r, buf[:]); err != nil { + return err + } + b := readBuf(buf[:]) + f.CreatorVersion = b.uint16() + f.ReaderVersion = b.uint16() + f.Flags = b.uint16() + f.Method = b.uint16() + f.ModifiedTime = b.uint16() + f.ModifiedDate = b.uint16() + f.CRC32 = b.uint32() + f.CompressedSize = b.uint32() + f.UncompressedSize = b.uint32() + f.CompressedSize64 = uint64(f.CompressedSize) + f.UncompressedSize64 = uint64(f.UncompressedSize) + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + commentLen := int(b.uint16()) + b = b[4:] // skipped start disk number and internal attributes (2x uint16) + f.ExternalAttrs = b.uint32() + f.headerOffset = int64(b.uint32()) + d := make([]byte, filenameLen+extraLen+commentLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen : filenameLen+extraLen] + f.Comment = string(d[filenameLen+extraLen:]) + + // Determine the character encoding. + utf8Valid1, utf8Require1 := detectUTF8(f.Name) + utf8Valid2, utf8Require2 := detectUTF8(f.Comment) + switch { + case !utf8Valid1 || !utf8Valid2: + // Name and Comment definitely not UTF-8. + f.NonUTF8 = true + case !utf8Require1 && !utf8Require2: + // Name and Comment use only single-byte runes that overlap with UTF-8. + f.NonUTF8 = false + default: + // Might be UTF-8, might be some other encoding; preserve existing flag. + // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag. + // Since it is impossible to always distinguish valid UTF-8 from some + // other encoding (e.g., GBK or Shift-JIS), we trust the flag. + f.NonUTF8 = f.Flags&0x800 == 0 + } + + needUSize := f.UncompressedSize == ^uint32(0) + needCSize := f.CompressedSize == ^uint32(0) + needHeaderOffset := f.headerOffset == int64(^uint32(0)) + + // Best effort to find what we need. + // Other zip authors might not even follow the basic format, + // and we'll just ignore the Extra content in that case. + var modified time.Time +parseExtras: + for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size + fieldTag := extra.uint16() + fieldSize := int(extra.uint16()) + if len(extra) < fieldSize { + break + } + fieldBuf := extra.sub(fieldSize) + + switch fieldTag { + case zip64ExtraID: + f.zip64 = true + + // update directory values from the zip64 extra block. + // They should only be consulted if the sizes read earlier + // are maxed out. + // See golang.org/issue/13367. + if needUSize { + needUSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.UncompressedSize64 = fieldBuf.uint64() + } + if needCSize { + needCSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.CompressedSize64 = fieldBuf.uint64() + } + if needHeaderOffset { + needHeaderOffset = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.headerOffset = int64(fieldBuf.uint64()) + } + case ntfsExtraID: + if len(fieldBuf) < 4 { + continue parseExtras + } + fieldBuf.uint32() // reserved (ignored) + for len(fieldBuf) >= 4 { // need at least tag and size + attrTag := fieldBuf.uint16() + attrSize := int(fieldBuf.uint16()) + if len(fieldBuf) < attrSize { + continue parseExtras + } + attrBuf := fieldBuf.sub(attrSize) + if attrTag != 1 || attrSize != 24 { + continue // Ignore irrelevant attributes + } + + const ticksPerSecond = 1e7 // Windows timestamp resolution + ts := int64(attrBuf.uint64()) // ModTime since Windows epoch + secs := int64(ts / ticksPerSecond) + nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) + epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) + modified = time.Unix(epoch.Unix()+secs, nsecs) + } + case unixExtraID, infoZipUnixExtraID: + if len(fieldBuf) < 8 { + continue parseExtras + } + fieldBuf.uint32() // AcTime (ignored) + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + case extTimeExtraID: + if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { + continue parseExtras + } + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + } + } + + msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) + f.Modified = msdosModified + if !modified.IsZero() { + f.Modified = modified.UTC() + + // If legacy MS-DOS timestamps are set, we can use the delta between + // the legacy and extended versions to estimate timezone offset. + // + // A non-UTC timezone is always used (even if offset is zero). + // Thus, FileHeader.Modified.Location() == time.UTC is useful for + // determining whether extended timestamps are present. + // This is necessary for users that need to do additional time + // calculations when dealing with legacy ZIP formats. + if f.ModifiedTime != 0 || f.ModifiedDate != 0 { + f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) + } + } + + // Assume that uncompressed size 2³²-1 could plausibly happen in + // an old zip32 file that was sharding inputs into the largest chunks + // possible (or is just malicious; search the web for 42.zip). + // If needUSize is true still, it means we didn't see a zip64 extension. + // As long as the compressed size is not also 2³²-1 (implausible) + // and the header is not also 2³²-1 (equally implausible), + // accept the uncompressed size 2³²-1 as valid. + // If nothing else, this keeps archive/zip working with 42.zip. + _ = needUSize + + if needCSize || needHeaderOffset { + return ErrFormat + } + + return nil +} + +func readDataDescriptor(r io.Reader, f *File) error { + var buf [dataDescriptorLen]byte + // The spec says: "Although not originally assigned a + // signature, the value 0x08074b50 has commonly been adopted + // as a signature value for the data descriptor record. + // Implementers should be aware that ZIP files may be + // encountered with or without this signature marking data + // descriptors and should account for either case when reading + // ZIP files to ensure compatibility." + // + // dataDescriptorLen includes the size of the signature but + // first read just those 4 bytes to see if it exists. + if _, err := io.ReadFull(r, buf[:4]); err != nil { + return err + } + off := 0 + maybeSig := readBuf(buf[:4]) + if maybeSig.uint32() != dataDescriptorSignature { + // No data descriptor signature. Keep these four + // bytes. + off += 4 + } + if _, err := io.ReadFull(r, buf[off:12]); err != nil { + return err + } + b := readBuf(buf[:12]) + if b.uint32() != f.CRC32 { + return ErrChecksum + } + + // The two sizes that follow here can be either 32 bits or 64 bits + // but the spec is not very clear on this and different + // interpretations has been made causing incompatibilities. We + // already have the sizes from the central directory so we can + // just ignore these. + + return nil +} + +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) { + // look for directoryEndSignature in the last 1k, then in the last 65k + var buf []byte + var directoryEndOffset int64 + for i, bLen := range []int64{1024, 65 * 1024} { + if bLen > size { + bLen = size + } + buf = make([]byte, int(bLen)) + if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { + return nil, 0, err + } + if p := findSignatureInBlock(buf); p >= 0 { + buf = buf[p:] + directoryEndOffset = size - bLen + int64(p) + break + } + if i == 1 || bLen == size { + return nil, 0, ErrFormat + } + } + + // read header into struct + b := readBuf(buf[4:]) // skip signature + d := &directoryEnd{ + DiskNbr: uint32(b.uint16()), + DirDiskNbr: uint32(b.uint16()), + DirRecordsThisDisk: uint64(b.uint16()), + DirectoryRecords: uint64(b.uint16()), + DirectorySize: uint64(b.uint32()), + DirectoryOffset: uint64(b.uint32()), + CommentLen: b.uint16(), + } + l := int(d.CommentLen) + if l > len(b) { + return nil, 0, errors.New("zip: invalid comment length") + } + d.Comment = string(b[:l]) + + // These values mean that the file can be a zip64 file + if d.DirectoryRecords == 0xffff || d.DirectorySize == 0xffff || d.DirectoryOffset == 0xffffffff { + p, err := findDirectory64End(r, directoryEndOffset) + if err == nil && p >= 0 { + directoryEndOffset = p + err = readDirectory64End(r, p, d) + } + if err != nil { + return nil, 0, err + } + } + + baseOffset = directoryEndOffset - int64(d.DirectorySize) - int64(d.DirectoryOffset) + + // Make sure directoryOffset points to somewhere in our file. + if o := baseOffset + int64(d.DirectoryOffset); o < 0 || o >= size { + return nil, 0, ErrFormat + } + return d, baseOffset, nil +} + +// findDirectory64End tries to read the zip64 locator just before the +// directory end and returns the offset of the zip64 directory end if +// found. +func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { + locOffset := directoryEndOffset - directory64LocLen + if locOffset < 0 { + return -1, nil // no need to look for a header outside the file + } + buf := make([]byte, directory64LocLen) + if _, err := r.ReadAt(buf, locOffset); err != nil { + return -1, err + } + b := readBuf(buf) + if sig := b.uint32(); sig != directory64LocSignature { + return -1, nil + } + if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory + return -1, nil // the file is not a valid zip64-file + } + p := b.uint64() // relative offset of the zip64 end of central directory record + if b.uint32() != 1 { // total number of disks + return -1, nil // the file is not a valid zip64-file + } + return int64(p), nil +} + +// readDirectory64End reads the zip64 directory end and updates the +// directory end with the zip64 directory end values. +func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { + buf := make([]byte, directory64EndLen) + if _, err := r.ReadAt(buf, offset); err != nil { + return err + } + + b := readBuf(buf) + if sig := b.uint32(); sig != directory64EndSignature { + return ErrFormat + } + + b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) + d.DiskNbr = b.uint32() // number of this disk + d.DirDiskNbr = b.uint32() // number of the disk with the start of the central directory + d.DirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk + d.DirectoryRecords = b.uint64() // total number of entries in the central directory + d.DirectorySize = b.uint64() // size of the central directory + d.DirectoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number + + return nil +} + +func findSignatureInBlock(b []byte) int { + for i := len(b) - directoryEndLen; i >= 0; i-- { + // defined from directoryEndSignature in struct.go + if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { + // n is length of comment + n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 + if n+directoryEndLen+i <= len(b) { + return i + } + } + } + return -1 +} + +type readBuf []byte + +func (b *readBuf) uint8() uint8 { + v := (*b)[0] + *b = (*b)[1:] + return v +} + +func (b *readBuf) uint16() uint16 { + v := binary.LittleEndian.Uint16(*b) + *b = (*b)[2:] + return v +} + +func (b *readBuf) uint32() uint32 { + v := binary.LittleEndian.Uint32(*b) + *b = (*b)[4:] + return v +} + +func (b *readBuf) uint64() uint64 { + v := binary.LittleEndian.Uint64(*b) + *b = (*b)[8:] + return v +} + +func (b *readBuf) sub(n int) readBuf { + b2 := (*b)[:n] + *b = (*b)[n:] + return b2 +} + +// A fileListEntry is a File and its ename. +// If file == nil, the fileListEntry describes a directory without metadata. +type fileListEntry struct { + name string + file *File + isDir bool + isDup bool +} + +type fileInfoDirEntry interface { + fs.FileInfo + fs.DirEntry +} + +func (e *fileListEntry) stat() (fileInfoDirEntry, error) { + if e.isDup { + return nil, errors.New(e.name + ": duplicate entries in zip file") + } + if !e.isDir { + return headerFileInfo{&e.file.FileHeader}, nil + } + return e, nil +} + +// Only used for directories. +func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem } +func (f *fileListEntry) Size() int64 { return 0 } +func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 } +func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir } +func (f *fileListEntry) IsDir() bool { return true } +func (f *fileListEntry) Sys() interface{} { return nil } + +func (f *fileListEntry) ModTime() time.Time { + if f.file == nil { + return time.Time{} + } + return f.file.FileHeader.Modified.UTC() +} + +func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil } + +// toValidName coerces name to be a valid name for fs.FS.Open. +func toValidName(name string) string { + name = strings.ReplaceAll(name, `\`, `/`) + p := path.Clean(name) + p = strings.TrimPrefix(p, "/") + p = strings.TrimPrefix(p, "../") + return p +} + +func (r *Reader) initFileList() { + r.fileListOnce.Do(func() { + // files and knownDirs map from a file/directory name + // to an index into the r.fileList entry that we are + // building. They are used to mark duplicate entries. + files := make(map[string]int) + knownDirs := make(map[string]int) + + // dirs[name] is true if name is known to be a directory, + // because it appears as a prefix in a path. + dirs := make(map[string]bool) + + for _, file := range r.File { + isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/' + name := toValidName(file.Name) + if name == "" { + continue + } + + if idx, ok := files[name]; ok { + r.fileList[idx].isDup = true + continue + } + if idx, ok := knownDirs[name]; ok { + r.fileList[idx].isDup = true + continue + } + + for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) { + dirs[dir] = true + } + + idx := len(r.fileList) + entry := fileListEntry{ + name: name, + file: file, + isDir: isDir, + } + r.fileList = append(r.fileList, entry) + if isDir { + knownDirs[name] = idx + } else { + files[name] = idx + } + } + for dir := range dirs { + if _, ok := knownDirs[dir]; !ok { + if idx, ok := files[dir]; ok { + r.fileList[idx].isDup = true + } else { + entry := fileListEntry{ + name: dir, + file: nil, + isDir: true, + } + r.fileList = append(r.fileList, entry) + } + } + } + + sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) }) + }) +} + +func fileEntryLess(x, y string) bool { + xdir, xelem, _ := split(x) + ydir, yelem, _ := split(y) + return xdir < ydir || xdir == ydir && xelem < yelem +} + +// Open opens the named file in the ZIP archive, +// using the semantics of fs.FS.Open: +// paths are always slash separated, with no +// leading / or ../ elements. +func (r *Reader) Open(name string) (fs.File, error) { + r.initFileList() + + if !fs.ValidPath(name) { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} + } + e := r.openLookup(name) + if e == nil { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} + } + if e.isDir { + return &openDir{e, r.openReadDir(name), 0}, nil + } + rc, err := e.file.Open() + if err != nil { + return nil, err + } + return rc.(fs.File), nil +} + +func split(name string) (dir, elem string, isDir bool) { + if len(name) > 0 && name[len(name)-1] == '/' { + isDir = true + name = name[:len(name)-1] + } + i := len(name) - 1 + for i >= 0 && name[i] != '/' { + i-- + } + if i < 0 { + return ".", name, isDir + } + return name[:i], name[i+1:], isDir +} + +var dotFile = &fileListEntry{name: "./", isDir: true} + +func (r *Reader) openLookup(name string) *fileListEntry { + if name == "." { + return dotFile + } + + dir, elem, _ := split(name) + files := r.fileList + i := sort.Search(len(files), func(i int) bool { + idir, ielem, _ := split(files[i].name) + return idir > dir || idir == dir && ielem >= elem + }) + if i < len(files) { + fname := files[i].name + if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { + return &files[i] + } + } + return nil +} + +func (r *Reader) openReadDir(dir string) []fileListEntry { + files := r.fileList + i := sort.Search(len(files), func(i int) bool { + idir, _, _ := split(files[i].name) + return idir >= dir + }) + j := sort.Search(len(files), func(j int) bool { + jdir, _, _ := split(files[j].name) + return jdir > dir + }) + return files[i:j] +} + +type openDir struct { + e *fileListEntry + files []fileListEntry + offset int +} + +func (d *openDir) Close() error { return nil } +func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() } + +func (d *openDir) Read([]byte) (int, error) { + return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")} +} + +func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) { + n := len(d.files) - d.offset + if count > 0 && n > count { + n = count + } + if n == 0 { + if count <= 0 { + return nil, nil + } + return nil, io.EOF + } + list := make([]fs.DirEntry, n) + for i := range list { + s, err := d.files[d.offset+i].stat() + if err != nil { + return nil, err + } + list[i] = s + } + d.offset += n + return list, nil +} diff --git a/struct.go b/struct.go new file mode 100644 index 0000000..eef0b91 --- /dev/null +++ b/struct.go @@ -0,0 +1,392 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zip provides support for reading and writing ZIP archives. + +See: https://www.pkware.com/appnote + +This package does not support disk spanning. + +A note about ZIP64: + +To be backwards compatible the FileHeader has both 32 and 64 bit Size +fields. The 64 bit fields will always contain the correct value and +for normal archives both fields will be the same. For files requiring +the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit +fields must be used instead. +*/ +package main + +import ( + "io/fs" + "path" + "time" +) + +// Compression methods. +const ( + Store uint16 = 0 // no compression + Deflate uint16 = 8 // DEFLATE compressed +) + +const ( + fileHeaderSignature = 0x04034b50 + directoryHeaderSignature = 0x02014b50 + directoryEndSignature = 0x06054b50 + directory64LocSignature = 0x07064b50 + directory64EndSignature = 0x06064b50 + dataDescriptorSignature = 0x08074b50 // de-facto standard; required by OS X Finder + fileHeaderLen = 26 // + filename + extra + directoryHeaderLen = 42 // + filename + extra + comment + directoryEndLen = 18 // + comment + dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size + dataDescriptor64Len = 24 // two uint32: signature, crc32 | two uint64: compressed size, size + directory64LocLen = 20 // + directory64EndLen = 56 // + extra + + // Constants for the first byte in CreatorVersion. + creatorFAT = 0 + creatorUnix = 3 + creatorNTFS = 11 + creatorVFAT = 14 + creatorMacOSX = 19 + + // Version numbers. + zipVersion20 = 20 // 2.0 + zipVersion45 = 45 // 4.5 (reads and writes zip64 archives) + + // Limits for non zip64 files. + uint16max = (1 << 16) - 1 + uint32max = (1 << 32) - 1 + + // Extra header IDs. + // + // IDs 0..31 are reserved for official use by PKWARE. + // IDs above that range are defined by third-party vendors. + // Since ZIP lacked high precision timestamps (nor a official specification + // of the timezone used for the date fields), many competing extra fields + // have been invented. Pervasive use effectively makes them "official". + // + // See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField + zip64ExtraID = 0x0001 // Zip64 extended information + ntfsExtraID = 0x000a // NTFS + unixExtraID = 0x000d // UNIX + extTimeExtraID = 0x5455 // Extended timestamp + infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension +) + +// FileHeader describes a file within a zip file. +// See the zip spec for details. +type FileHeader struct { + // Name is the name of the file. + // + // It must be a relative path, not start with a drive letter (such as "C:"), + // and must use forward slashes instead of back slashes. A trailing slash + // indicates that this file is a directory and should have no data. + // + // When reading zip files, the Name field is populated from + // the zip file directly and is not validated for correctness. + // It is the caller's responsibility to sanitize it as + // appropriate, including canonicalizing slash directions, + // validating that paths are relative, and preventing path + // traversal through filenames ("../../../"). + Name string + + // Comment is any arbitrary user-defined string shorter than 64KiB. + Comment string + + // NonUTF8 indicates that Name and Comment are not encoded in UTF-8. + // + // By specification, the only other encoding permitted should be CP-437, + // but historically many ZIP readers interpret Name and Comment as whatever + // the system's local character encoding happens to be. + // + // This flag should only be set if the user intends to encode a non-portable + // ZIP file for a specific localized region. Otherwise, the Writer + // automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings. + NonUTF8 bool + + CreatorVersion uint16 + ReaderVersion uint16 + Flags uint16 + + // Method is the compression method. If zero, Store is used. + Method uint16 + + // Modified is the modified time of the file. + // + // When reading, an extended timestamp is preferred over the legacy MS-DOS + // date field, and the offset between the times is used as the timezone. + // If only the MS-DOS date is present, the timezone is assumed to be UTC. + // + // When writing, an extended timestamp (which is timezone-agnostic) is + // always emitted. The legacy MS-DOS date field is encoded according to the + // location of the Modified time. + Modified time.Time + ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead. + ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead. + + CRC32 uint32 + CompressedSize uint32 // Deprecated: Use CompressedSize64 instead. + UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead. + CompressedSize64 uint64 + UncompressedSize64 uint64 + Extra []byte + ExternalAttrs uint32 // Meaning depends on CreatorVersion +} + +// FileInfo returns an fs.FileInfo for the FileHeader. +func (h *FileHeader) FileInfo() fs.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements fs.FileInfo. +type headerFileInfo struct { + fh *FileHeader +} + +func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) } +func (fi headerFileInfo) Size() int64 { + if fi.fh.UncompressedSize64 > 0 { + return int64(fi.fh.UncompressedSize64) + } + return int64(fi.fh.UncompressedSize) +} +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { + if fi.fh.Modified.IsZero() { + return fi.fh.ModTime() + } + return fi.fh.Modified.UTC() +} +func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() } +func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() } +func (fi headerFileInfo) Sys() interface{} { return fi.fh } + +func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil } + +// FileInfoHeader creates a partially-populated FileHeader from an +// fs.FileInfo. +// Because fs.FileInfo's Name method returns only the base name of +// the file it describes, it may be necessary to modify the Name field +// of the returned header to provide the full path name of the file. +// If compression is desired, callers should set the FileHeader.Method +// field; it is unset by default. +func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) { + size := fi.Size() + fh := &FileHeader{ + Name: fi.Name(), + UncompressedSize64: uint64(size), + } + fh.SetModTime(fi.ModTime()) + fh.SetMode(fi.Mode()) + if fh.UncompressedSize64 > uint32max { + fh.UncompressedSize = uint32max + } else { + fh.UncompressedSize = uint32(fh.UncompressedSize64) + } + return fh, nil +} + +type directoryEnd struct { + DiskNbr uint32 // unused + DirDiskNbr uint32 // unused + DirRecordsThisDisk uint64 // unused + DirectoryRecords uint64 + DirectorySize uint64 + DirectoryOffset uint64 // relative to file + CommentLen uint16 + Comment string +} + +// timeZone returns a *time.Location based on the provided offset. +// If the offset is non-sensible, then this uses an offset of zero. +func timeZone(offset time.Duration) *time.Location { + const ( + minOffset = -12 * time.Hour // E.g., Baker island at -12:00 + maxOffset = +14 * time.Hour // E.g., Line island at +14:00 + offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45 + ) + offset = offset.Round(offsetAlias) + if offset < minOffset || maxOffset < offset { + offset = 0 + } + return time.FixedZone("", int(offset/time.Second)) +} + +// msDosTimeToTime converts an MS-DOS date and time into a time.Time. +// The resolution is 2s. +// See: https://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx +func msDosTimeToTime(dosDate, dosTime uint16) time.Time { + return time.Date( + // date bits 0-4: day of month; 5-8: month; 9-15: years since 1980 + int(dosDate>>9+1980), + time.Month(dosDate>>5&0xf), + int(dosDate&0x1f), + + // time bits 0-4: second/2; 5-10: minute; 11-15: hour + int(dosTime>>11), + int(dosTime>>5&0x3f), + int(dosTime&0x1f*2), + 0, // nanoseconds + + time.UTC, + ) +} + +// timeToMsDosTime converts a time.Time to an MS-DOS date and time. +// The resolution is 2s. +// See: https://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx +func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) { + fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9) + fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11) + return +} + +// ModTime returns the modification time in UTC using the legacy +// ModifiedDate and ModifiedTime fields. +// +// Deprecated: Use Modified instead. +func (h *FileHeader) ModTime() time.Time { + return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime) +} + +// SetModTime sets the Modified, ModifiedTime, and ModifiedDate fields +// to the given time in UTC. +// +// Deprecated: Use Modified instead. +func (h *FileHeader) SetModTime(t time.Time) { + t = t.UTC() // Convert to UTC for compatibility + h.Modified = t + h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t) +} + +const ( + // Unix constants. The specification doesn't mention them, + // but these seem to be the values agreed on by tools. + s_IFMT = 0xf000 + s_IFSOCK = 0xc000 + s_IFLNK = 0xa000 + s_IFREG = 0x8000 + s_IFBLK = 0x6000 + s_IFDIR = 0x4000 + s_IFCHR = 0x2000 + s_IFIFO = 0x1000 + s_ISUID = 0x800 + s_ISGID = 0x400 + s_ISVTX = 0x200 + + msdosDir = 0x10 + msdosReadOnly = 0x01 +) + +// Mode returns the permission and mode bits for the FileHeader. +func (h *FileHeader) Mode() (mode fs.FileMode) { + switch h.CreatorVersion >> 8 { + case creatorUnix, creatorMacOSX: + mode = unixModeToFileMode(h.ExternalAttrs >> 16) + case creatorNTFS, creatorVFAT, creatorFAT: + mode = msdosModeToFileMode(h.ExternalAttrs) + } + if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' { + mode |= fs.ModeDir + } + return mode +} + +// SetMode changes the permission and mode bits for the FileHeader. +func (h *FileHeader) SetMode(mode fs.FileMode) { + h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8 + h.ExternalAttrs = fileModeToUnixMode(mode) << 16 + + // set MSDOS attributes too, as the original zip does. + if mode&fs.ModeDir != 0 { + h.ExternalAttrs |= msdosDir + } + if mode&0200 == 0 { + h.ExternalAttrs |= msdosReadOnly + } +} + +// isZip64 reports whether the file size exceeds the 32 bit limit +func (h *FileHeader) isZip64() bool { + return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max +} + +func (f *FileHeader) hasDataDescriptor() bool { + return f.Flags&0x8 != 0 +} + +func msdosModeToFileMode(m uint32) (mode fs.FileMode) { + if m&msdosDir != 0 { + mode = fs.ModeDir | 0777 + } else { + mode = 0666 + } + if m&msdosReadOnly != 0 { + mode &^= 0222 + } + return mode +} + +func fileModeToUnixMode(mode fs.FileMode) uint32 { + var m uint32 + switch mode & fs.ModeType { + default: + m = s_IFREG + case fs.ModeDir: + m = s_IFDIR + case fs.ModeSymlink: + m = s_IFLNK + case fs.ModeNamedPipe: + m = s_IFIFO + case fs.ModeSocket: + m = s_IFSOCK + case fs.ModeDevice: + m = s_IFBLK + case fs.ModeDevice | fs.ModeCharDevice: + m = s_IFCHR + } + if mode&fs.ModeSetuid != 0 { + m |= s_ISUID + } + if mode&fs.ModeSetgid != 0 { + m |= s_ISGID + } + if mode&fs.ModeSticky != 0 { + m |= s_ISVTX + } + return m | uint32(mode&0777) +} + +func unixModeToFileMode(m uint32) fs.FileMode { + mode := fs.FileMode(m & 0777) + switch m & s_IFMT { + case s_IFBLK: + mode |= fs.ModeDevice + case s_IFCHR: + mode |= fs.ModeDevice | fs.ModeCharDevice + case s_IFDIR: + mode |= fs.ModeDir + case s_IFIFO: + mode |= fs.ModeNamedPipe + case s_IFLNK: + mode |= fs.ModeSymlink + case s_IFREG: + // nothing to do + case s_IFSOCK: + mode |= fs.ModeSocket + } + if m&s_ISGID != 0 { + mode |= fs.ModeSetgid + } + if m&s_ISUID != 0 { + mode |= fs.ModeSetuid + } + if m&s_ISVTX != 0 { + mode |= fs.ModeSticky + } + return mode +} diff --git a/unicode.go b/unicode.go new file mode 100644 index 0000000..e145d28 --- /dev/null +++ b/unicode.go @@ -0,0 +1,28 @@ +package main + +import ( + "unicode/utf8" +) + +// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string +// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, +// or any other common encoding). +func detectUTF8(s string) (valid, require bool) { + for i := 0; i < len(s); { + r, size := utf8.DecodeRuneInString(s[i:]) + i += size + // Officially, ZIP uses CP-437, but many readers use the system's + // local character encoding. Most encoding are compatible with a large + // subset of CP-437, which itself is ASCII-like. + // + // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those + // characters with localized currency and overline characters. + if r < 0x20 || r > 0x7d || r == 0x5c { + if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { + return false, false + } + require = true + } + } + return true, require +} |
