1 files changed, 232 insertions, 72 deletions
diff --git a/main.go b/main.go
index 0167f9f..cab47d5 100644
--- a/main.go
+++ b/main.go
@@ -1,6 +1,10 @@
 package main
 
 import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"io"
 	"log"
 	"os"
 	"time"
@@ -8,84 +12,240 @@ import (
 	flag "github.com/spf13/pflag"
 )
 
-// Compression methods.
-const (
-	Store   uint16 = 0 // no compression
-	Deflate uint16 = 8 // DEFLATE compressed
-)
-
-const (
-	fileHeaderSignature = 0x04034b50
-	fileHeaderLen       = 30 // + filename + extra
-)
-
-type FileHeader struct {
-	// Name is the name of the file.
-	//
-	// It must be a relative path, not start with a drive letter (such as "C:"),
-	// and must use forward slashes instead of back slashes. A trailing slash
-	// indicates that this file is a directory and should have no data.
-	//
-	// When reading zip files, the Name field is populated from
-	// the zip file directly and is not validated for correctness.
-	// It is the caller's responsibility to sanitize it as
-	// appropriate, including canonicalizing slash directions,
-	// validating that paths are relative, and preventing path
-	// traversal through filenames ("../../../").
-	Name string
-
-	// Comment is any arbitrary user-defined string shorter than 64KiB.
-	Comment string
-
-	// NonUTF8 indicates that Name and Comment are not encoded in UTF-8.
-	//
-	// By specification, the only other encoding permitted should be CP-437,
-	// but historically many ZIP readers interpret Name and Comment as whatever
-	// the system's local character encoding happens to be.
-	//
-	// This flag should only be set if the user intends to encode a non-portable
-	// ZIP file for a specific localized region. Otherwise, the Writer
-	// automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings.
-	NonUTF8 bool
-
-	CreatorVersion uint16
-	ReaderVersion  uint16
-	Flags          uint16
-
-	// Method is the compression method. If zero, Store is used.
-	Method uint16
-
-	// Modified is the modified time of the file.
-	//
-	// When reading, an extended timestamp is preferred over the legacy MS-DOS
-	// date field, and the offset between the times is used as the timezone.
-	// If only the MS-DOS date is present, the timezone is assumed to be UTC.
-	//
-	// When writing, an extended timestamp (which is timezone-agnostic) is
-	// always emitted. The legacy MS-DOS date field is encoded according to the
-	// location of the Modified time.
-	Modified     time.Time
-	ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead.
-	ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead.
-
-	CRC32              uint32
-	CompressedSize     uint32 // Deprecated: Use CompressedSize64 instead.
-	UncompressedSize   uint32 // Deprecated: Use UncompressedSize64 instead.
-	CompressedSize64   uint64
-	UncompressedSize64 uint64
-	Extra              []byte
-	ExternalAttrs      uint32 // Meaning depends on CreatorVersion
-}
-
 func main() {
-	var filename string
-	flag.StringVarP(&filename, "filename", "f", "", "filename")
+	var zipFilename string
+	flag.StringVarP(&zipFilename, "filename", "f", "", "filename")
 	flag.Parse()
 
-	f, err := os.Open(filename)
+	f, err := os.Open(zipFilename)
 	if err != nil {
 		log.Fatal(err)
 	}
 	defer f.Close()
 
+	for {
+		var sig [4]byte
+		if _, err = io.ReadFull(f, sig[:]); err != nil {
+			log.Fatal("failed to read signature: ", err)
+		}
+
+		sigb := readBuf(sig[:])
+		signature := sigb.uint32()
+
+		switch signature {
+		case fileHeaderSignature:
+			var hdr File
+			if err := parseFileHeader(&hdr, f); err != nil {
+				pos, _ := f.Seek(0, io.SeekCurrent)
+				log.Fatalf("failed to read file header at %d: %s", pos, err)
+			}
+			j, _ := json.MarshalIndent(hdr, "", "  ")
+			log.Printf("File: %s", string(j))
+			if pos, err := f.Seek(int64(hdr.CompressedSize), io.SeekCurrent); err != nil {
+				log.Fatalf("failed to seek to next header, stopped at %d: %s", pos, err)
+			}
+		case directoryHeaderSignature:
+			var hdr File
+			if err = readDirectoryHeader(&hdr, f); err != nil {
+				pos, _ := f.Seek(0, io.SeekCurrent)
+				log.Fatalf("failed to read directory header at %d: %s", pos, err)
+			}
+			j, _ := json.MarshalIndent(hdr, "", "  ")
+			log.Printf("Directory: %s", string(j))
+		case directoryEndSignature:
+			var hdr directoryEnd
+			if err := parseDirectoryEnd(&hdr, f); err != nil {
+				pos, _ := f.Seek(0, io.SeekCurrent)
+				log.Fatalf("failed to read directory end at %d: %s", pos, err)
+			}
+			j, _ := json.MarshalIndent(hdr, "", "  ")
+			log.Printf("Directory End: %s", string(j))
+		default:
+			pos, _ := f.Seek(0, io.SeekCurrent)
+			log.Fatalf("invalid header signature at %d: got %08x", pos, signature)
+		}
+	}
+}
+
+func parseFileHeader(f *File, r io.Reader) error {
+	var hdr [fileHeaderLen]byte
+	if _, err := io.ReadFull(r, hdr[:]); err != nil {
+		return err
+	}
+	b := readBuf(hdr[:])
+	f.CreatorVersion = b.uint16()
+	f.Flags = b.uint16()
+	f.Method = b.uint16()
+	f.ModifiedTime = b.uint16()
+	f.ModifiedDate = b.uint16()
+	f.CRC32 = b.uint32()
+	f.CompressedSize = b.uint32()
+	f.UncompressedSize = b.uint32()
+
+	filenameLen := int(b.uint16())
+	extraLen := int(b.uint16())
+	filename := make([]byte, filenameLen)
+	if _, err := io.ReadFull(r, filename); err != nil {
+		return err
+	}
+	f.Name = string(filename)
+	f.Extra = make([]byte, extraLen)
+	if _, err := io.ReadFull(r, f.Extra); err != nil {
+		return err
+	}
+
+	needUSize := f.UncompressedSize == ^uint32(0)
+	needCSize := f.CompressedSize == ^uint32(0)
+	needHeaderOffset := f.headerOffset == int64(^uint32(0))
+
+	// Best effort to find what we need.
+	// Other zip authors might not even follow the basic format,
+	// and we'll just ignore the Extra content in that case.
+	var modified time.Time
+parseExtras:
+	for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size
+		fieldTag := extra.uint16()
+		fieldSize := int(extra.uint16())
+		if len(extra) < fieldSize {
+			break
+		}
+		fieldBuf := extra.sub(fieldSize)
+
+		switch fieldTag {
+		case zip64ExtraID:
+			f.zip64 = true
+
+			// update directory values from the zip64 extra block.
+			// They should only be consulted if the sizes read earlier
+			// are maxed out.
+			// See golang.org/issue/13367.
+			if needUSize {
+				needUSize = false
+				if len(fieldBuf) < 8 {
+					return ErrFormat
+				}
+				f.UncompressedSize64 = fieldBuf.uint64()
+			}
+			if needCSize {
+				needCSize = false
+				if len(fieldBuf) < 8 {
+					return ErrFormat
+				}
+				f.CompressedSize64 = fieldBuf.uint64()
+			}
+			if needHeaderOffset {
+				needHeaderOffset = false
+				if len(fieldBuf) < 8 {
+					return ErrFormat
+				}
+				f.headerOffset = int64(fieldBuf.uint64())
+			}
+		case ntfsExtraID:
+			if len(fieldBuf) < 4 {
+				continue parseExtras
+			}
+			fieldBuf.uint32()        // reserved (ignored)
+			for len(fieldBuf) >= 4 { // need at least tag and size
+				attrTag := fieldBuf.uint16()
+				attrSize := int(fieldBuf.uint16())
+				if len(fieldBuf) < attrSize {
+					continue parseExtras
+				}
+				attrBuf := fieldBuf.sub(attrSize)
+				if attrTag != 1 || attrSize != 24 {
+					continue // Ignore irrelevant attributes
+				}
+
+				const ticksPerSecond = 1e7    // Windows timestamp resolution
+				ts := int64(attrBuf.uint64()) // ModTime since Windows epoch
+				secs := int64(ts / ticksPerSecond)
+				nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond)
+				epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
+				modified = time.Unix(epoch.Unix()+secs, nsecs)
+			}
+		case unixExtraID, infoZipUnixExtraID:
+			if len(fieldBuf) < 8 {
+				continue parseExtras
+			}
+			fieldBuf.uint32()              // AcTime (ignored)
+			ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
+			modified = time.Unix(ts, 0)
+		case extTimeExtraID:
+			if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
+				continue parseExtras
+			}
+			ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
+			modified = time.Unix(ts, 0)
+		}
+	}
+
+	msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
+	f.Modified = msdosModified
+	if !modified.IsZero() {
+		f.Modified = modified.UTC()
+
+		// If legacy MS-DOS timestamps are set, we can use the delta between
+		// the legacy and extended versions to estimate timezone offset.
+		//
+		// A non-UTC timezone is always used (even if offset is zero).
+		// Thus, FileHeader.Modified.Location() == time.UTC is useful for
+		// determining whether extended timestamps are present.
+		// This is necessary for users that need to do additional time
+		// calculations when dealing with legacy ZIP formats.
+		if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
+			f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
+		}
+	}
+
+	return nil
+}
+
+func parseDirectoryEnd(d *directoryEnd, r io.Reader) error {
+	var hdr [directoryEndLen]byte
+	if _, err := io.ReadFull(r, hdr[:]); err != nil {
+		return err
+	}
+	b := readBuf(hdr[:])
+	d.DiskNbr = uint32(b.uint16())
+	d.DirDiskNbr = uint32(b.uint16())
+	d.DirRecordsThisDisk = uint64(b.uint16())
+	d.DirectoryRecords = uint64(b.uint16())
+	d.DirectorySize = uint64(b.uint32())
+	d.DirectoryOffset = uint64(b.uint32())
+	d.CommentLen = b.uint16()
+	l := int(d.CommentLen)
+	if l > len(b) {
+		return errors.New("zip: invalid comment length")
+	}
+	comment := make([]byte, d.CommentLen)
+	if _, err := io.ReadFull(r, comment); err != nil {
+		return err
+	}
+	d.Comment = string(comment)
+
+	return nil
+}
+
+const searchChunkSize = 4096
+
+func find(r io.Reader, search []byte) (int64, error) {
+	var offset int64
+	tailLen := len(search) - 1
+	chunk := make([]byte, searchChunkSize+tailLen)
+	n, err := r.Read(chunk[tailLen:])
+	idx := bytes.Index(chunk[tailLen:n+tailLen], search)
+	for {
+		if idx >= 0 {
+			return offset + int64(idx), nil
+		}
+		if err == io.EOF {
+			return -1, nil
+		} else if err != nil {
+			return -1, err
+		}
+		copy(chunk, chunk[searchChunkSize:])
+		offset += searchChunkSize
+		n, err = r.Read(chunk[tailLen:])
+		idx = bytes.Index(chunk[:n+tailLen], search)
+	}
 }