diff options
| -rw-r--r-- | main.go | 102 | ||||
| -rw-r--r-- | reader.go | 26 |
2 files changed, 98 insertions, 30 deletions
@@ -2,6 +2,7 @@ package main import ( "bytes" + "compress/flate" "encoding/json" "errors" "fmt" @@ -9,6 +10,7 @@ import ( "io" "log" "os" + "path" "time" flag "github.com/spf13/pflag" @@ -16,7 +18,9 @@ import ( func main() { var zipFilename string + var output string flag.StringVarP(&zipFilename, "filename", "f", "", "filename") + flag.StringVarP(&output, "output", "o", "./out/", "output directory") flag.Parse() f, err := os.Open(zipFilename) @@ -27,7 +31,10 @@ func main() { for { var sig [4]byte - if _, err = io.ReadFull(f, sig[:]); err != nil { + _, err = io.ReadFull(f, sig[:]) + if err == io.EOF { + break + } else if err != nil { log.Fatal("failed to read signature: ", err) } @@ -41,22 +48,47 @@ func main() { pos, _ := f.Seek(0, io.SeekCurrent) log.Fatalf("failed to read file header at 0x%x: %s", pos, err) } - hdr.HeaderOffset, _ = f.Seek(0, io.SeekCurrent) + hdr.Offset, _ = f.Seek(0, io.SeekCurrent) j, _ := json.MarshalIndent(hdr, "", " ") - log.Printf("File at 0x%x: %s", hdr.HeaderOffset, string(j)) + log.Printf("File at 0x%x: %s", hdr.Offset, string(j)) + hasDD := false if hdr.Flags&0x08 > 0 { - log.Printf("Searching for file's end...") - if err = findAndUseDataDescriptor(&hdr, f); err != nil { - log.Fatalf("failed to udpate using data descriptor at 0x%x: %s", hdr.HeaderOffset, err) + if hdr.CompressedSize == 0 { + log.Printf("Searching for file's end...") + if err = findAndUseDataDescriptor(&hdr, f); err != nil { + log.Fatalf("failed to udpate using data descriptor at 0x%x: %s", hdr.Offset, err) + } } + hasDD = true + } + err := extract(f, &hdr, output) + if err != nil { + log.Fatalf("failed to extract file: %s", err) } - if pos, err := f.Seek(hdr.HeaderOffset+int64(hdr.CompressedSize), io.SeekStart); err != nil { + size := int64(hdr.CompressedSize) + if size == 0xFFFFFFFF { + size = int64(hdr.CompressedSize64) + } + pos, err := f.Seek(hdr.Offset+size, io.SeekStart) + if err != nil { log.Fatalf("failed to seek to next header, stopped at 0x%x: %s", pos, err) } - - case dataDescriptorSignature: - io.CopyN(io.Discard, f, 12) - + if hasDD { + if _, err = io.ReadFull(f, sig[:]); err != nil { + log.Fatal("failed to read DD signature: ", err) + } + sigb := readBuf(sig[:]) + signature = sigb.uint32() + if signature != dataDescriptorSignature { + log.Fatalf("unexpected signature at 0x%x: got %08x", pos, signature) + } + log.Printf("Skipping data descriptor at 0x%x, zip64=%t...", pos, hdr.zip64) + if hdr.zip64 { + io.CopyN(io.Discard, f, 20) + } else { + io.CopyN(io.Discard, f, 12) + } + } case directoryHeaderSignature: var hdr File if err = readDirectoryHeader(&hdr, f); err != nil { @@ -109,7 +141,7 @@ func parseFileHeader(f *File, r io.Reader) error { needUSize := f.UncompressedSize == ^uint32(0) needCSize := f.CompressedSize == ^uint32(0) - needHeaderOffset := f.HeaderOffset == int64(^uint32(0)) + needHeaderOffset := f.Offset == int64(^uint32(0)) // Best effort to find what we need. // Other zip authors might not even follow the basic format, @@ -151,7 +183,7 @@ parseExtras: if len(fieldBuf) < 8 { return ErrFormat } - f.HeaderOffset = int64(fieldBuf.uint64()) + f.Offset = int64(fieldBuf.uint64()) } case ntfsExtraID: if len(fieldBuf) < 4 { @@ -299,11 +331,11 @@ func findAndUseDataDescriptor(f *File, r io.ReadSeeker) error { if err := parseDataDescriptor(f, r); err != nil { return fmt.Errorf("failed to read descriptor : %s", err) } - if descriptorPos-int64(f.DescriptorCompressedSize) == f.HeaderOffset+4 { + if descriptorPos-int64(f.DescriptorCompressedSize) == f.Offset+4 { break } else { log.Printf("invalid size: expected offset %x (hex) != %x (hex), compsize=%d, size=%d", - f.HeaderOffset+4, descriptorPos-int64(f.DescriptorCompressedSize), f.DescriptorCompressedSize, f.DescriptorUncompressedSize) + f.Offset+4, descriptorPos-int64(f.DescriptorCompressedSize), f.DescriptorCompressedSize, f.DescriptorUncompressedSize) } if pos, err = r.Seek(descriptorPos+4, io.SeekStart); err != nil { return fmt.Errorf("failed to seek to descriptor at 0x%x: %s", pos, err) @@ -313,8 +345,8 @@ func findAndUseDataDescriptor(f *File, r io.ReadSeeker) error { } // bring back at the beginning of the header (due to chunking) - if _, err := r.Seek(f.HeaderOffset, io.SeekStart); err != nil { - return fmt.Errorf("failed to seek to beginning at %d: %s", f.HeaderOffset, err) + if _, err := r.Seek(f.Offset, io.SeekStart); err != nil { + return fmt.Errorf("failed to seek to beginning at %d: %s", f.Offset, err) } crc := crc32.NewIEEE() if _, err := io.CopyN(crc, r, int64(f.DescriptorCompressedSize)); err != nil { @@ -331,3 +363,39 @@ func findAndUseDataDescriptor(f *File, r io.ReadSeeker) error { return nil } + +func extract(r io.ReadSeeker, f *File, outputDirectory string) error { + dir, base := path.Split(path.Clean(f.Name)) + directory := path.Join(outputDirectory, dir) + err := os.MkdirAll(directory, 0o755) + if err != nil { + return err + } + + w, err := os.OpenFile(path.Join(directory, base), os.O_WRONLY|os.O_CREATE, 0o644) + if err != nil { + return err + } + defer w.Close() + if _, err := r.Seek(f.Offset, io.SeekStart); err != nil { + return err + } + size := int64(f.UncompressedSize) + if size == 0xFFFFFFFF { + size = int64(f.UncompressedSize64) + } + if f.Method == Deflate { + fr := flate.NewReader(r) + if _, err := io.CopyN(w, fr, size); err != nil { + return err + } + } else if f.Method == Store { + if _, err := io.CopyN(w, r, size); err != nil { + return err + } + } else { + return errors.New("invalid method") + } + + return nil +} @@ -54,10 +54,10 @@ type ReadCloser struct { // The file content can be accessed by calling Open. type File struct { FileHeader - zip *Reader - zipr io.ReaderAt - HeaderOffset int64 // includes overall ZIP archive baseOffset - zip64 bool // zip64 extended information extra field presence + zip *Reader + zipr io.ReaderAt + Offset int64 // includes overall ZIP archive baseOffset + zip64 bool // zip64 extended information extra field presence } // OpenReader will open the Zip file specified by name and return a ReadCloser. @@ -143,7 +143,7 @@ func (z *Reader) init(r io.ReaderAt, size int64) error { if err != nil { return err } - f.HeaderOffset += z.baseOffset + f.Offset += z.baseOffset z.File = append(z.File, f) } if uint16(len(z.File)) != uint16(end.DirectoryRecords) { // only compare 16 bits here @@ -187,7 +187,7 @@ func (f *File) DataOffset() (offset int64, err error) { if err != nil { return } - return f.HeaderOffset + bodyOffset, nil + return f.Offset + bodyOffset, nil } // Open returns a ReadCloser that provides access to the File's contents. @@ -198,7 +198,7 @@ func (f *File) Open() (io.ReadCloser, error) { return nil, err } size := int64(f.CompressedSize64) - r := io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset, size) + r := io.NewSectionReader(f.zipr, f.Offset+bodyOffset, size) dcomp := f.zip.decompressor(f.Method) if dcomp == nil { return nil, ErrAlgorithm @@ -206,7 +206,7 @@ func (f *File) Open() (io.ReadCloser, error) { var rc io.ReadCloser = dcomp(r) var desr io.Reader if f.hasDataDescriptor() { - desr = io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset+size, dataDescriptorLen) + desr = io.NewSectionReader(f.zipr, f.Offset+bodyOffset+size, dataDescriptorLen) } rc = &checksumReader{ rc: rc, @@ -224,7 +224,7 @@ func (f *File) OpenRaw() (io.Reader, error) { if err != nil { return nil, err } - r := io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset, int64(f.CompressedSize64)) + r := io.NewSectionReader(f.zipr, f.Offset+bodyOffset, int64(f.CompressedSize64)) return r, nil } @@ -287,7 +287,7 @@ func (r *checksumReader) Close() error { return r.rc.Close() } // and returns the file body offset. func (f *File) findBodyOffset() (int64, error) { var buf [fileHeaderLen]byte - if _, err := f.zipr.ReadAt(buf[:], f.HeaderOffset); err != nil { + if _, err := f.zipr.ReadAt(buf[:], f.Offset); err != nil { return 0, err } b := readBuf(buf[:]) @@ -325,7 +325,7 @@ func readDirectoryHeader(f *File, r io.Reader) error { commentLen := int(b.uint16()) b = b[4:] // skipped start disk number and internal attributes (2x uint16) f.ExternalAttrs = b.uint32() - f.HeaderOffset = int64(b.uint32()) + f.Offset = int64(b.uint32()) d := make([]byte, filenameLen+extraLen+commentLen) if _, err := io.ReadFull(r, d); err != nil { return err @@ -354,7 +354,7 @@ func readDirectoryHeader(f *File, r io.Reader) error { needUSize := f.UncompressedSize == ^uint32(0) needCSize := f.CompressedSize == ^uint32(0) - needHeaderOffset := f.HeaderOffset == int64(^uint32(0)) + needHeaderOffset := f.Offset == int64(^uint32(0)) // Best effort to find what we need. // Other zip authors might not even follow the basic format, @@ -396,7 +396,7 @@ parseExtras: if len(fieldBuf) < 8 { return ErrFormat } - f.HeaderOffset = int64(fieldBuf.uint64()) + f.Offset = int64(fieldBuf.uint64()) } case ntfsExtraID: if len(fieldBuf) < 4 { |
