diff options
Diffstat (limited to 'main.go')
| -rw-r--r-- | main.go | 304 |
1 files changed, 232 insertions, 72 deletions
@@ -1,6 +1,10 @@ package main import ( + "bytes" + "encoding/json" + "errors" + "io" "log" "os" "time" @@ -8,84 +12,240 @@ import ( flag "github.com/spf13/pflag" ) -// Compression methods. -const ( - Store uint16 = 0 // no compression - Deflate uint16 = 8 // DEFLATE compressed -) - -const ( - fileHeaderSignature = 0x04034b50 - fileHeaderLen = 30 // + filename + extra -) - -type FileHeader struct { - // Name is the name of the file. - // - // It must be a relative path, not start with a drive letter (such as "C:"), - // and must use forward slashes instead of back slashes. A trailing slash - // indicates that this file is a directory and should have no data. - // - // When reading zip files, the Name field is populated from - // the zip file directly and is not validated for correctness. - // It is the caller's responsibility to sanitize it as - // appropriate, including canonicalizing slash directions, - // validating that paths are relative, and preventing path - // traversal through filenames ("../../../"). - Name string - - // Comment is any arbitrary user-defined string shorter than 64KiB. - Comment string - - // NonUTF8 indicates that Name and Comment are not encoded in UTF-8. - // - // By specification, the only other encoding permitted should be CP-437, - // but historically many ZIP readers interpret Name and Comment as whatever - // the system's local character encoding happens to be. - // - // This flag should only be set if the user intends to encode a non-portable - // ZIP file for a specific localized region. Otherwise, the Writer - // automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings. - NonUTF8 bool - - CreatorVersion uint16 - ReaderVersion uint16 - Flags uint16 - - // Method is the compression method. If zero, Store is used. - Method uint16 - - // Modified is the modified time of the file. - // - // When reading, an extended timestamp is preferred over the legacy MS-DOS - // date field, and the offset between the times is used as the timezone. - // If only the MS-DOS date is present, the timezone is assumed to be UTC. - // - // When writing, an extended timestamp (which is timezone-agnostic) is - // always emitted. The legacy MS-DOS date field is encoded according to the - // location of the Modified time. - Modified time.Time - ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead. - ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead. - - CRC32 uint32 - CompressedSize uint32 // Deprecated: Use CompressedSize64 instead. - UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead. - CompressedSize64 uint64 - UncompressedSize64 uint64 - Extra []byte - ExternalAttrs uint32 // Meaning depends on CreatorVersion -} - func main() { - var filename string - flag.StringVarP(&filename, "filename", "f", "", "filename") + var zipFilename string + flag.StringVarP(&zipFilename, "filename", "f", "", "filename") flag.Parse() - f, err := os.Open(filename) + f, err := os.Open(zipFilename) if err != nil { log.Fatal(err) } defer f.Close() + for { + var sig [4]byte + if _, err = io.ReadFull(f, sig[:]); err != nil { + log.Fatal("failed to read signature: ", err) + } + + sigb := readBuf(sig[:]) + signature := sigb.uint32() + + switch signature { + case fileHeaderSignature: + var hdr File + if err := parseFileHeader(&hdr, f); err != nil { + pos, _ := f.Seek(0, io.SeekCurrent) + log.Fatalf("failed to read file header at %d: %s", pos, err) + } + j, _ := json.MarshalIndent(hdr, "", " ") + log.Printf("File: %s", string(j)) + if pos, err := f.Seek(int64(hdr.CompressedSize), io.SeekCurrent); err != nil { + log.Fatalf("failed to seek to next header, stopped at %d: %s", pos, err) + } + case directoryHeaderSignature: + var hdr File + if err = readDirectoryHeader(&hdr, f); err != nil { + pos, _ := f.Seek(0, io.SeekCurrent) + log.Fatalf("failed to read directory header at %d: %s", pos, err) + } + j, _ := json.MarshalIndent(hdr, "", " ") + log.Printf("Directory: %s", string(j)) + case directoryEndSignature: + var hdr directoryEnd + if err := parseDirectoryEnd(&hdr, f); err != nil { + pos, _ := f.Seek(0, io.SeekCurrent) + log.Fatalf("failed to read directory end at %d: %s", pos, err) + } + j, _ := json.MarshalIndent(hdr, "", " ") + log.Printf("Directory End: %s", string(j)) + default: + pos, _ := f.Seek(0, io.SeekCurrent) + log.Fatalf("invalid header signature at %d: got %08x", pos, signature) + } + } +} + +func parseFileHeader(f *File, r io.Reader) error { + var hdr [fileHeaderLen]byte + if _, err := io.ReadFull(r, hdr[:]); err != nil { + return err + } + b := readBuf(hdr[:]) + f.CreatorVersion = b.uint16() + f.Flags = b.uint16() + f.Method = b.uint16() + f.ModifiedTime = b.uint16() + f.ModifiedDate = b.uint16() + f.CRC32 = b.uint32() + f.CompressedSize = b.uint32() + f.UncompressedSize = b.uint32() + + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + filename := make([]byte, filenameLen) + if _, err := io.ReadFull(r, filename); err != nil { + return err + } + f.Name = string(filename) + f.Extra = make([]byte, extraLen) + if _, err := io.ReadFull(r, f.Extra); err != nil { + return err + } + + needUSize := f.UncompressedSize == ^uint32(0) + needCSize := f.CompressedSize == ^uint32(0) + needHeaderOffset := f.headerOffset == int64(^uint32(0)) + + // Best effort to find what we need. + // Other zip authors might not even follow the basic format, + // and we'll just ignore the Extra content in that case. + var modified time.Time +parseExtras: + for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size + fieldTag := extra.uint16() + fieldSize := int(extra.uint16()) + if len(extra) < fieldSize { + break + } + fieldBuf := extra.sub(fieldSize) + + switch fieldTag { + case zip64ExtraID: + f.zip64 = true + + // update directory values from the zip64 extra block. + // They should only be consulted if the sizes read earlier + // are maxed out. + // See golang.org/issue/13367. + if needUSize { + needUSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.UncompressedSize64 = fieldBuf.uint64() + } + if needCSize { + needCSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.CompressedSize64 = fieldBuf.uint64() + } + if needHeaderOffset { + needHeaderOffset = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.headerOffset = int64(fieldBuf.uint64()) + } + case ntfsExtraID: + if len(fieldBuf) < 4 { + continue parseExtras + } + fieldBuf.uint32() // reserved (ignored) + for len(fieldBuf) >= 4 { // need at least tag and size + attrTag := fieldBuf.uint16() + attrSize := int(fieldBuf.uint16()) + if len(fieldBuf) < attrSize { + continue parseExtras + } + attrBuf := fieldBuf.sub(attrSize) + if attrTag != 1 || attrSize != 24 { + continue // Ignore irrelevant attributes + } + + const ticksPerSecond = 1e7 // Windows timestamp resolution + ts := int64(attrBuf.uint64()) // ModTime since Windows epoch + secs := int64(ts / ticksPerSecond) + nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) + epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) + modified = time.Unix(epoch.Unix()+secs, nsecs) + } + case unixExtraID, infoZipUnixExtraID: + if len(fieldBuf) < 8 { + continue parseExtras + } + fieldBuf.uint32() // AcTime (ignored) + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + case extTimeExtraID: + if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { + continue parseExtras + } + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + } + } + + msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) + f.Modified = msdosModified + if !modified.IsZero() { + f.Modified = modified.UTC() + + // If legacy MS-DOS timestamps are set, we can use the delta between + // the legacy and extended versions to estimate timezone offset. + // + // A non-UTC timezone is always used (even if offset is zero). + // Thus, FileHeader.Modified.Location() == time.UTC is useful for + // determining whether extended timestamps are present. + // This is necessary for users that need to do additional time + // calculations when dealing with legacy ZIP formats. + if f.ModifiedTime != 0 || f.ModifiedDate != 0 { + f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) + } + } + + return nil +} + +func parseDirectoryEnd(d *directoryEnd, r io.Reader) error { + var hdr [directoryEndLen]byte + if _, err := io.ReadFull(r, hdr[:]); err != nil { + return err + } + b := readBuf(hdr[:]) + d.DiskNbr = uint32(b.uint16()) + d.DirDiskNbr = uint32(b.uint16()) + d.DirRecordsThisDisk = uint64(b.uint16()) + d.DirectoryRecords = uint64(b.uint16()) + d.DirectorySize = uint64(b.uint32()) + d.DirectoryOffset = uint64(b.uint32()) + d.CommentLen = b.uint16() + l := int(d.CommentLen) + if l > len(b) { + return errors.New("zip: invalid comment length") + } + comment := make([]byte, d.CommentLen) + if _, err := io.ReadFull(r, comment); err != nil { + return err + } + d.Comment = string(comment) + + return nil +} + +const searchChunkSize = 4096 + +func find(r io.Reader, search []byte) (int64, error) { + var offset int64 + tailLen := len(search) - 1 + chunk := make([]byte, searchChunkSize+tailLen) + n, err := r.Read(chunk[tailLen:]) + idx := bytes.Index(chunk[tailLen:n+tailLen], search) + for { + if idx >= 0 { + return offset + int64(idx), nil + } + if err == io.EOF { + return -1, nil + } else if err != nil { + return -1, err + } + copy(chunk, chunk[searchChunkSize:]) + offset += searchChunkSize + n, err = r.Read(chunk[tailLen:]) + idx = bytes.Index(chunk[:n+tailLen], search) + } } |
