aboutsummaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'main.go')
-rw-r--r--main.go304
1 files changed, 232 insertions, 72 deletions
diff --git a/main.go b/main.go
index 0167f9f..cab47d5 100644
--- a/main.go
+++ b/main.go
@@ -1,6 +1,10 @@
package main
import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "io"
"log"
"os"
"time"
@@ -8,84 +12,240 @@ import (
flag "github.com/spf13/pflag"
)
-// Compression methods.
-const (
- Store uint16 = 0 // no compression
- Deflate uint16 = 8 // DEFLATE compressed
-)
-
-const (
- fileHeaderSignature = 0x04034b50
- fileHeaderLen = 30 // + filename + extra
-)
-
-type FileHeader struct {
- // Name is the name of the file.
- //
- // It must be a relative path, not start with a drive letter (such as "C:"),
- // and must use forward slashes instead of back slashes. A trailing slash
- // indicates that this file is a directory and should have no data.
- //
- // When reading zip files, the Name field is populated from
- // the zip file directly and is not validated for correctness.
- // It is the caller's responsibility to sanitize it as
- // appropriate, including canonicalizing slash directions,
- // validating that paths are relative, and preventing path
- // traversal through filenames ("../../../").
- Name string
-
- // Comment is any arbitrary user-defined string shorter than 64KiB.
- Comment string
-
- // NonUTF8 indicates that Name and Comment are not encoded in UTF-8.
- //
- // By specification, the only other encoding permitted should be CP-437,
- // but historically many ZIP readers interpret Name and Comment as whatever
- // the system's local character encoding happens to be.
- //
- // This flag should only be set if the user intends to encode a non-portable
- // ZIP file for a specific localized region. Otherwise, the Writer
- // automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings.
- NonUTF8 bool
-
- CreatorVersion uint16
- ReaderVersion uint16
- Flags uint16
-
- // Method is the compression method. If zero, Store is used.
- Method uint16
-
- // Modified is the modified time of the file.
- //
- // When reading, an extended timestamp is preferred over the legacy MS-DOS
- // date field, and the offset between the times is used as the timezone.
- // If only the MS-DOS date is present, the timezone is assumed to be UTC.
- //
- // When writing, an extended timestamp (which is timezone-agnostic) is
- // always emitted. The legacy MS-DOS date field is encoded according to the
- // location of the Modified time.
- Modified time.Time
- ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead.
- ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead.
-
- CRC32 uint32
- CompressedSize uint32 // Deprecated: Use CompressedSize64 instead.
- UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead.
- CompressedSize64 uint64
- UncompressedSize64 uint64
- Extra []byte
- ExternalAttrs uint32 // Meaning depends on CreatorVersion
-}
-
func main() {
- var filename string
- flag.StringVarP(&filename, "filename", "f", "", "filename")
+ var zipFilename string
+ flag.StringVarP(&zipFilename, "filename", "f", "", "filename")
flag.Parse()
- f, err := os.Open(filename)
+ f, err := os.Open(zipFilename)
if err != nil {
log.Fatal(err)
}
defer f.Close()
+ for {
+ var sig [4]byte
+ if _, err = io.ReadFull(f, sig[:]); err != nil {
+ log.Fatal("failed to read signature: ", err)
+ }
+
+ sigb := readBuf(sig[:])
+ signature := sigb.uint32()
+
+ switch signature {
+ case fileHeaderSignature:
+ var hdr File
+ if err := parseFileHeader(&hdr, f); err != nil {
+ pos, _ := f.Seek(0, io.SeekCurrent)
+ log.Fatalf("failed to read file header at %d: %s", pos, err)
+ }
+ j, _ := json.MarshalIndent(hdr, "", " ")
+ log.Printf("File: %s", string(j))
+ if pos, err := f.Seek(int64(hdr.CompressedSize), io.SeekCurrent); err != nil {
+ log.Fatalf("failed to seek to next header, stopped at %d: %s", pos, err)
+ }
+ case directoryHeaderSignature:
+ var hdr File
+ if err = readDirectoryHeader(&hdr, f); err != nil {
+ pos, _ := f.Seek(0, io.SeekCurrent)
+ log.Fatalf("failed to read directory header at %d: %s", pos, err)
+ }
+ j, _ := json.MarshalIndent(hdr, "", " ")
+ log.Printf("Directory: %s", string(j))
+ case directoryEndSignature:
+ var hdr directoryEnd
+ if err := parseDirectoryEnd(&hdr, f); err != nil {
+ pos, _ := f.Seek(0, io.SeekCurrent)
+ log.Fatalf("failed to read directory end at %d: %s", pos, err)
+ }
+ j, _ := json.MarshalIndent(hdr, "", " ")
+ log.Printf("Directory End: %s", string(j))
+ default:
+ pos, _ := f.Seek(0, io.SeekCurrent)
+ log.Fatalf("invalid header signature at %d: got %08x", pos, signature)
+ }
+ }
+}
+
+func parseFileHeader(f *File, r io.Reader) error {
+ var hdr [fileHeaderLen]byte
+ if _, err := io.ReadFull(r, hdr[:]); err != nil {
+ return err
+ }
+ b := readBuf(hdr[:])
+ f.CreatorVersion = b.uint16()
+ f.Flags = b.uint16()
+ f.Method = b.uint16()
+ f.ModifiedTime = b.uint16()
+ f.ModifiedDate = b.uint16()
+ f.CRC32 = b.uint32()
+ f.CompressedSize = b.uint32()
+ f.UncompressedSize = b.uint32()
+
+ filenameLen := int(b.uint16())
+ extraLen := int(b.uint16())
+ filename := make([]byte, filenameLen)
+ if _, err := io.ReadFull(r, filename); err != nil {
+ return err
+ }
+ f.Name = string(filename)
+ f.Extra = make([]byte, extraLen)
+ if _, err := io.ReadFull(r, f.Extra); err != nil {
+ return err
+ }
+
+ needUSize := f.UncompressedSize == ^uint32(0)
+ needCSize := f.CompressedSize == ^uint32(0)
+ needHeaderOffset := f.headerOffset == int64(^uint32(0))
+
+ // Best effort to find what we need.
+ // Other zip authors might not even follow the basic format,
+ // and we'll just ignore the Extra content in that case.
+ var modified time.Time
+parseExtras:
+ for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size
+ fieldTag := extra.uint16()
+ fieldSize := int(extra.uint16())
+ if len(extra) < fieldSize {
+ break
+ }
+ fieldBuf := extra.sub(fieldSize)
+
+ switch fieldTag {
+ case zip64ExtraID:
+ f.zip64 = true
+
+ // update directory values from the zip64 extra block.
+ // They should only be consulted if the sizes read earlier
+ // are maxed out.
+ // See golang.org/issue/13367.
+ if needUSize {
+ needUSize = false
+ if len(fieldBuf) < 8 {
+ return ErrFormat
+ }
+ f.UncompressedSize64 = fieldBuf.uint64()
+ }
+ if needCSize {
+ needCSize = false
+ if len(fieldBuf) < 8 {
+ return ErrFormat
+ }
+ f.CompressedSize64 = fieldBuf.uint64()
+ }
+ if needHeaderOffset {
+ needHeaderOffset = false
+ if len(fieldBuf) < 8 {
+ return ErrFormat
+ }
+ f.headerOffset = int64(fieldBuf.uint64())
+ }
+ case ntfsExtraID:
+ if len(fieldBuf) < 4 {
+ continue parseExtras
+ }
+ fieldBuf.uint32() // reserved (ignored)
+ for len(fieldBuf) >= 4 { // need at least tag and size
+ attrTag := fieldBuf.uint16()
+ attrSize := int(fieldBuf.uint16())
+ if len(fieldBuf) < attrSize {
+ continue parseExtras
+ }
+ attrBuf := fieldBuf.sub(attrSize)
+ if attrTag != 1 || attrSize != 24 {
+ continue // Ignore irrelevant attributes
+ }
+
+ const ticksPerSecond = 1e7 // Windows timestamp resolution
+ ts := int64(attrBuf.uint64()) // ModTime since Windows epoch
+ secs := int64(ts / ticksPerSecond)
+ nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond)
+ epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
+ modified = time.Unix(epoch.Unix()+secs, nsecs)
+ }
+ case unixExtraID, infoZipUnixExtraID:
+ if len(fieldBuf) < 8 {
+ continue parseExtras
+ }
+ fieldBuf.uint32() // AcTime (ignored)
+ ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
+ modified = time.Unix(ts, 0)
+ case extTimeExtraID:
+ if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
+ continue parseExtras
+ }
+ ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
+ modified = time.Unix(ts, 0)
+ }
+ }
+
+ msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
+ f.Modified = msdosModified
+ if !modified.IsZero() {
+ f.Modified = modified.UTC()
+
+ // If legacy MS-DOS timestamps are set, we can use the delta between
+ // the legacy and extended versions to estimate timezone offset.
+ //
+ // A non-UTC timezone is always used (even if offset is zero).
+ // Thus, FileHeader.Modified.Location() == time.UTC is useful for
+ // determining whether extended timestamps are present.
+ // This is necessary for users that need to do additional time
+ // calculations when dealing with legacy ZIP formats.
+ if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
+ f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
+ }
+ }
+
+ return nil
+}
+
+func parseDirectoryEnd(d *directoryEnd, r io.Reader) error {
+ var hdr [directoryEndLen]byte
+ if _, err := io.ReadFull(r, hdr[:]); err != nil {
+ return err
+ }
+ b := readBuf(hdr[:])
+ d.DiskNbr = uint32(b.uint16())
+ d.DirDiskNbr = uint32(b.uint16())
+ d.DirRecordsThisDisk = uint64(b.uint16())
+ d.DirectoryRecords = uint64(b.uint16())
+ d.DirectorySize = uint64(b.uint32())
+ d.DirectoryOffset = uint64(b.uint32())
+ d.CommentLen = b.uint16()
+ l := int(d.CommentLen)
+ if l > len(b) {
+ return errors.New("zip: invalid comment length")
+ }
+ comment := make([]byte, d.CommentLen)
+ if _, err := io.ReadFull(r, comment); err != nil {
+ return err
+ }
+ d.Comment = string(comment)
+
+ return nil
+}
+
+const searchChunkSize = 4096
+
+func find(r io.Reader, search []byte) (int64, error) {
+ var offset int64
+ tailLen := len(search) - 1
+ chunk := make([]byte, searchChunkSize+tailLen)
+ n, err := r.Read(chunk[tailLen:])
+ idx := bytes.Index(chunk[tailLen:n+tailLen], search)
+ for {
+ if idx >= 0 {
+ return offset + int64(idx), nil
+ }
+ if err == io.EOF {
+ return -1, nil
+ } else if err != nil {
+ return -1, err
+ }
+ copy(chunk, chunk[searchChunkSize:])
+ offset += searchChunkSize
+ n, err = r.Read(chunk[tailLen:])
+ idx = bytes.Index(chunk[:n+tailLen], search)
+ }
}