package main import ( "bytes" "compress/flate" "encoding/json" "errors" "fmt" "hash/crc32" "io" "log" "os" "path" "time" flag "github.com/spf13/pflag" ) func main() { var zipFilename string var extract string var verbose int flag.StringVarP(&zipFilename, "filename", "f", "", "filename") flag.StringVarP(&extract, "extract", "e", "", "extraction output directory") flag.CountVarP(&verbose, "verbose", "v", "-v verbose, -vv more verbose") flag.Parse() f, err := os.Open(zipFilename) if err != nil { log.Fatal(err) } defer f.Close() for { var sig [4]byte _, err = io.ReadFull(f, sig[:]) if err == io.EOF { break } else if err != nil { log.Fatal("failed to read signature: ", err) } sigb := readBuf(sig[:]) signature := sigb.uint32() switch signature { case fileHeaderSignature: var hdr File if err := parseFileHeader(&hdr, f); err != nil { pos, _ := f.Seek(0, io.SeekCurrent) log.Fatalf("failed to read file header at 0x%x: %s", pos, err) } hdr.Offset, _ = f.Seek(0, io.SeekCurrent) if verbose >= 2 { j, _ := json.MarshalIndent(hdr, "", " ") log.Printf("File at 0x%x: %s", hdr.Offset, string(j)) } hasDD := false if hdr.Flags&0x08 > 0 { if hdr.CompressedSize == 0 { if verbose >= 2 { log.Printf("Searching for file's end...") } if err = findAndUseDataDescriptor(&hdr, f, verbose); err != nil { log.Fatalf("failed to udpate using data descriptor at 0x%x: %s", hdr.Offset, err) } } hasDD = true } if extract != "" { if verbose >= 1 { log.Printf("Extracting '%s'...", hdr.Name) } if err := extractFile(f, &hdr, extract); err != nil { log.Fatalf("failed to extract file: %s", err) } } size := int64(hdr.CompressedSize) if size == 0xFFFFFFFF { size = int64(hdr.CompressedSize64) } pos, err := f.Seek(hdr.Offset+size, io.SeekStart) if err != nil { log.Fatalf("failed to seek to next header, stopped at 0x%x: %s", pos, err) } if hasDD { if _, err = io.ReadFull(f, sig[:]); err != nil { log.Fatal("failed to read DD signature: ", err) } sigb := readBuf(sig[:]) signature = sigb.uint32() if signature != dataDescriptorSignature { log.Fatalf("unexpected signature at 0x%x: got %08x", pos, signature) } if verbose >= 2 { log.Printf("Skipping data descriptor at 0x%x, zip64=%t...", pos, hdr.zip64) } if hdr.zip64 { io.CopyN(io.Discard, f, 20) } else { io.CopyN(io.Discard, f, 12) } } case directoryHeaderSignature: var hdr File if err = readDirectoryHeader(&hdr, f); err != nil { pos, _ := f.Seek(0, io.SeekCurrent) log.Fatalf("failed to read directory header at 0x%x: %s", pos, err) } if verbose >= 2 { j, _ := json.MarshalIndent(hdr, "", " ") log.Printf("Directory: %s", string(j)) } case directoryEndSignature: var hdr directoryEnd if err := parseDirectoryEnd(&hdr, f); err != nil { pos, _ := f.Seek(0, io.SeekCurrent) log.Fatalf("failed to read directory end at 0x%x: %s", pos, err) } if verbose >= 2 { j, _ := json.MarshalIndent(hdr, "", " ") log.Printf("Directory End: %s", string(j)) } default: pos, _ := f.Seek(0, io.SeekCurrent) log.Fatalf("invalid header signature at 0x%x: got %08x", pos, signature) } } } func parseFileHeader(f *File, r io.Reader) error { var hdr [fileHeaderLen]byte if _, err := io.ReadFull(r, hdr[:]); err != nil { return err } b := readBuf(hdr[:]) f.CreatorVersion = b.uint16() f.Flags = b.uint16() f.Method = b.uint16() f.ModifiedTime = b.uint16() f.ModifiedDate = b.uint16() f.CRC32 = b.uint32() f.CompressedSize = b.uint32() f.UncompressedSize = b.uint32() filenameLen := int(b.uint16()) extraLen := int(b.uint16()) filename := make([]byte, filenameLen) if _, err := io.ReadFull(r, filename); err != nil { return err } f.Name = string(filename) f.Extra = make([]byte, extraLen) if _, err := io.ReadFull(r, f.Extra); err != nil { return err } needUSize := f.UncompressedSize == ^uint32(0) needCSize := f.CompressedSize == ^uint32(0) needHeaderOffset := f.Offset == int64(^uint32(0)) // Best effort to find what we need. // Other zip authors might not even follow the basic format, // and we'll just ignore the Extra content in that case. var modified time.Time parseExtras: for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size fieldTag := extra.uint16() fieldSize := int(extra.uint16()) if len(extra) < fieldSize { break } fieldBuf := extra.sub(fieldSize) switch fieldTag { case zip64ExtraID: f.zip64 = true // update directory values from the zip64 extra block. // They should only be consulted if the sizes read earlier // are maxed out. // See golang.org/issue/13367. if needUSize { needUSize = false if len(fieldBuf) < 8 { return ErrFormat } f.UncompressedSize64 = fieldBuf.uint64() } if needCSize { needCSize = false if len(fieldBuf) < 8 { return ErrFormat } f.CompressedSize64 = fieldBuf.uint64() } if needHeaderOffset { needHeaderOffset = false if len(fieldBuf) < 8 { return ErrFormat } f.Offset = int64(fieldBuf.uint64()) } case ntfsExtraID: if len(fieldBuf) < 4 { continue parseExtras } fieldBuf.uint32() // reserved (ignored) for len(fieldBuf) >= 4 { // need at least tag and size attrTag := fieldBuf.uint16() attrSize := int(fieldBuf.uint16()) if len(fieldBuf) < attrSize { continue parseExtras } attrBuf := fieldBuf.sub(attrSize) if attrTag != 1 || attrSize != 24 { continue // Ignore irrelevant attributes } const ticksPerSecond = 1e7 // Windows timestamp resolution ts := int64(attrBuf.uint64()) // ModTime since Windows epoch secs := int64(ts / ticksPerSecond) nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) modified = time.Unix(epoch.Unix()+secs, nsecs) } case unixExtraID, infoZipUnixExtraID: if len(fieldBuf) < 8 { continue parseExtras } fieldBuf.uint32() // AcTime (ignored) ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch modified = time.Unix(ts, 0) case extTimeExtraID: if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { continue parseExtras } ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch modified = time.Unix(ts, 0) } } msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) f.Modified = msdosModified if !modified.IsZero() { f.Modified = modified.UTC() // If legacy MS-DOS timestamps are set, we can use the delta between // the legacy and extended versions to estimate timezone offset. // // A non-UTC timezone is always used (even if offset is zero). // Thus, FileHeader.Modified.Location() == time.UTC is useful for // determining whether extended timestamps are present. // This is necessary for users that need to do additional time // calculations when dealing with legacy ZIP formats. if f.ModifiedTime != 0 || f.ModifiedDate != 0 { f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) } } return nil } func parseDirectoryEnd(d *directoryEnd, r io.ReadSeeker) error { d.HeaderOffset, _ = r.Seek(0, io.SeekCurrent) var hdr [directoryEndLen]byte if _, err := io.ReadFull(r, hdr[:]); err != nil { return err } b := readBuf(hdr[:]) d.DiskNbr = uint32(b.uint16()) d.DirDiskNbr = uint32(b.uint16()) d.DirRecordsThisDisk = uint64(b.uint16()) d.DirectoryRecords = uint64(b.uint16()) d.DirectorySize = uint64(b.uint32()) d.DirectoryOffset = uint64(b.uint32()) d.CommentLen = b.uint16() l := int(d.CommentLen) if l > len(b) { return errors.New("zip: invalid comment length") } comment := make([]byte, d.CommentLen) if _, err := io.ReadFull(r, comment); err != nil { return err } d.Comment = string(comment) return nil } const searchSize = 4096 func search(r io.Reader, term []byte, offset int64) (int64, error) { termLen := len(term) buf := make([]byte, searchSize+termLen) n, err := r.Read(buf[termLen:]) idx := bytes.Index(buf[termLen:n+termLen], term) for { if idx >= 0 { return offset + int64(idx), nil } if err == io.EOF { return -1, nil } else if err != nil { return -1, err } copy(buf, buf[searchSize:]) offset += searchSize n, err = r.Read(buf[termLen:]) idx = bytes.Index(buf[:n+termLen], term) } } func parseDataDescriptor(f *File, r io.Reader) error { var buf [dataDescriptorLen]byte if _, err := io.ReadFull(r, buf[:]); err != nil { return err } b := readBuf(buf[:]) f.DescriptorCRC32 = b.uint32() f.DescriptorCompressedSize = b.uint32() f.DescriptorUncompressedSize = b.uint32() return nil } var dataDescriptorSignatureBytes = []byte{'P', 'K', 0x07, 0x08} func findAndUseDataDescriptor(f *File, r io.ReadSeeker, verbose int) error { pos, err := r.Seek(0, io.SeekCurrent) if err != nil { return err } for { descriptorPos, err := search(r, dataDescriptorSignatureBytes, pos) if err != nil { return fmt.Errorf("failed to find descriptor maxed at 0x%x: %s", descriptorPos, err) } if verbose >= 2 { log.Printf("found descriptor at offset 0x%x", descriptorPos) } if pos, err = r.Seek(descriptorPos, io.SeekStart); err != nil { return fmt.Errorf("failed to seek to descriptor at 0x%x: %s", pos, err) } if err := parseDataDescriptor(f, r); err != nil { return fmt.Errorf("failed to read descriptor : %s", err) } if descriptorPos-int64(f.DescriptorCompressedSize) == f.Offset+4 { break } else if verbose >= 2 { log.Printf("invalid size: expected offset %x (hex) != %x (hex), compsize=%d, size=%d", f.Offset+4, descriptorPos-int64(f.DescriptorCompressedSize), f.DescriptorCompressedSize, f.DescriptorUncompressedSize) } if pos, err = r.Seek(descriptorPos+4, io.SeekStart); err != nil { return fmt.Errorf("failed to seek to descriptor at 0x%x: %s", pos, err) } } // bring back at the beginning of the header (due to chunking) if _, err := r.Seek(f.Offset, io.SeekStart); err != nil { return fmt.Errorf("failed to seek to beginning at %d: %s", f.Offset, err) } crc := crc32.NewIEEE() if _, err := io.CopyN(crc, r, int64(f.DescriptorCompressedSize)); err != nil { return fmt.Errorf("failed to calculate CRC32: %s", err) } crcSum := crc.Sum32() if f.DescriptorCRC32 != crcSum { return fmt.Errorf("invalid CRC32: calculated %08x, have in decriptor %08x", crcSum, f.DescriptorCRC32) } f.CRC32 = crcSum f.UncompressedSize = f.DescriptorUncompressedSize f.CompressedSize = f.DescriptorCompressedSize return nil } func extractFile(r io.ReadSeeker, f *File, outputDirectory string) error { dir, base := path.Split(path.Clean(f.Name)) directory := path.Join(outputDirectory, dir) err := os.MkdirAll(directory, 0o755) if err != nil { return err } w, err := os.OpenFile(path.Join(directory, base), os.O_WRONLY|os.O_CREATE, 0o644) if err != nil { return err } defer w.Close() if _, err := r.Seek(f.Offset, io.SeekStart); err != nil { return err } size := int64(f.UncompressedSize) if size == 0xFFFFFFFF { size = int64(f.UncompressedSize64) } if f.Method == Deflate { fr := flate.NewReader(r) if _, err := io.CopyN(w, fr, size); err != nil { return err } } else if f.Method == Store { if _, err := io.CopyN(w, r, size); err != nil { return err } } else { return errors.New("invalid method") } return nil }