diff options
| author | Marin Ivanov <[email protected]> | 2024-03-29 00:01:55 +0200 |
|---|---|---|
| committer | Marin Ivanov <[email protected]> | 2024-03-29 00:01:55 +0200 |
| commit | f1fdc384ae82da620d53c1660bf4969beed992e6 (patch) | |
| tree | c2f8641c924097338e950de6ee3877756fc47f76 | |
| parent | 90394d8b5b2a4c6bcf29692e6d4ab79c598928b1 (diff) | |
wip
| -rw-r--r-- | main.go | 124 | ||||
| -rw-r--r-- | reader.go | 20 | ||||
| -rw-r--r-- | struct.go | 8 |
3 files changed, 120 insertions, 32 deletions
@@ -4,6 +4,8 @@ import ( "bytes" "encoding/json" "errors" + "fmt" + "hash/crc32" "io" "log" "os" @@ -37,18 +39,29 @@ func main() { var hdr File if err := parseFileHeader(&hdr, f); err != nil { pos, _ := f.Seek(0, io.SeekCurrent) - log.Fatalf("failed to read file header at %d: %s", pos, err) + log.Fatalf("failed to read file header at 0x%x: %s", pos, err) } + hdr.HeaderOffset, _ = f.Seek(0, io.SeekCurrent) j, _ := json.MarshalIndent(hdr, "", " ") - log.Printf("File: %s", string(j)) - if pos, err := f.Seek(int64(hdr.CompressedSize), io.SeekCurrent); err != nil { - log.Fatalf("failed to seek to next header, stopped at %d: %s", pos, err) + log.Printf("File at 0x%x: %s", hdr.HeaderOffset, string(j)) + if hdr.Flags&0x08 > 0 { + log.Printf("Searching for file's end...") + if err = findAndUseDataDescriptor(&hdr, f); err != nil { + log.Fatalf("failed to udpate using data descriptor at 0x%x: %s", hdr.HeaderOffset, err) + } + } + if pos, err := f.Seek(hdr.HeaderOffset+int64(hdr.CompressedSize), io.SeekStart); err != nil { + log.Fatalf("failed to seek to next header, stopped at 0x%x: %s", pos, err) } + + case dataDescriptorSignature: + io.CopyN(io.Discard, f, 12) + case directoryHeaderSignature: var hdr File if err = readDirectoryHeader(&hdr, f); err != nil { pos, _ := f.Seek(0, io.SeekCurrent) - log.Fatalf("failed to read directory header at %d: %s", pos, err) + log.Fatalf("failed to read directory header at 0x%x: %s", pos, err) } j, _ := json.MarshalIndent(hdr, "", " ") log.Printf("Directory: %s", string(j)) @@ -56,13 +69,13 @@ func main() { var hdr directoryEnd if err := parseDirectoryEnd(&hdr, f); err != nil { pos, _ := f.Seek(0, io.SeekCurrent) - log.Fatalf("failed to read directory end at %d: %s", pos, err) + log.Fatalf("failed to read directory end at 0x%x: %s", pos, err) } j, _ := json.MarshalIndent(hdr, "", " ") log.Printf("Directory End: %s", string(j)) default: pos, _ := f.Seek(0, io.SeekCurrent) - log.Fatalf("invalid header signature at %d: got %08x", pos, signature) + log.Fatalf("invalid header signature at 0x%x: got %08x", pos, signature) } } } @@ -96,7 +109,7 @@ func parseFileHeader(f *File, r io.Reader) error { needUSize := f.UncompressedSize == ^uint32(0) needCSize := f.CompressedSize == ^uint32(0) - needHeaderOffset := f.headerOffset == int64(^uint32(0)) + needHeaderOffset := f.HeaderOffset == int64(^uint32(0)) // Best effort to find what we need. // Other zip authors might not even follow the basic format, @@ -138,7 +151,7 @@ parseExtras: if len(fieldBuf) < 8 { return ErrFormat } - f.headerOffset = int64(fieldBuf.uint64()) + f.HeaderOffset = int64(fieldBuf.uint64()) } case ntfsExtraID: if len(fieldBuf) < 4 { @@ -200,7 +213,8 @@ parseExtras: return nil } -func parseDirectoryEnd(d *directoryEnd, r io.Reader) error { +func parseDirectoryEnd(d *directoryEnd, r io.ReadSeeker) error { + d.HeaderOffset, _ = r.Seek(0, io.SeekCurrent) var hdr [directoryEndLen]byte if _, err := io.ReadFull(r, hdr[:]); err != nil { return err @@ -226,15 +240,17 @@ func parseDirectoryEnd(d *directoryEnd, r io.Reader) error { return nil } -const searchChunkSize = 4096 +const searchSize = 4096 + +func search(r io.Reader, term []byte, offset int64) (int64, error) { + termLen := len(term) + buf := make([]byte, searchSize+termLen) -func find(r io.Reader, search []byte) (int64, error) { - var offset int64 - tailLen := len(search) - 1 - chunk := make([]byte, searchChunkSize+tailLen) - n, err := r.Read(chunk[tailLen:]) - idx := bytes.Index(chunk[tailLen:n+tailLen], search) + var err error + var n, idx int for { + n, err = r.Read(buf[termLen:]) + idx = bytes.Index(buf[:n+termLen], term) if idx >= 0 { return offset + int64(idx), nil } @@ -243,9 +259,75 @@ func find(r io.Reader, search []byte) (int64, error) { } else if err != nil { return -1, err } - copy(chunk, chunk[searchChunkSize:]) - offset += searchChunkSize - n, err = r.Read(chunk[tailLen:]) - idx = bytes.Index(chunk[:n+tailLen], search) + copy(buf, buf[searchSize:]) + offset += searchSize + } +} + +func parseDataDescriptor(f *File, r io.Reader) error { + var buf [dataDescriptorLen]byte + + if _, err := io.ReadFull(r, buf[:]); err != nil { + return err + } + b := readBuf(buf[:]) + f.DescriptorCRC32 = b.uint32() + f.DescriptorCompressedSize = b.uint32() + f.DescriptorUncompressedSize = b.uint32() + + log.Printf("compsize=%d, size=%d", f.DescriptorCompressedSize, f.DescriptorUncompressedSize) + + return nil +} + +var dataDescriptorSignatureBytes = []byte{'P', 'K', 0x07, 0x08} + +func findAndUseDataDescriptor(f *File, r io.ReadSeeker) error { + pos, err := r.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + for { + descriptorPos, err := search(r, dataDescriptorSignatureBytes, pos) + if err != nil { + return fmt.Errorf("failed to find descriptor maxed at 0x%x: %s", descriptorPos, err) + } + log.Printf("found descriptor at offset 0x%x", descriptorPos) + if pos, err = r.Seek(descriptorPos, io.SeekStart); err != nil { + return fmt.Errorf("failed to seek to descriptor at 0x%x: %s", pos, err) + } + if err := parseDataDescriptor(f, r); err != nil { + return fmt.Errorf("failed to read descriptor : %s", err) + } + if descriptorPos-int64(f.DescriptorCompressedSize) == f.HeaderOffset+4 { + break + } else { + log.Printf("invalid size: expected offset %x (hex) != %x (hex), compsize=%d, size=%d", + f.HeaderOffset+4, descriptorPos-int64(f.DescriptorCompressedSize), f.DescriptorCompressedSize, f.DescriptorUncompressedSize) + } + if pos, err = r.Seek(descriptorPos+4, io.SeekStart); err != nil { + return fmt.Errorf("failed to seek to descriptor at 0x%x: %s", pos, err) + } else { + // log.Printf("resetted to offset %d", pos) + } + } + + // bring back at the beginning of the header (due to chunking) + if _, err := r.Seek(f.HeaderOffset, io.SeekStart); err != nil { + return fmt.Errorf("failed to seek to beginning at %d: %s", f.HeaderOffset, err) + } + crc := crc32.NewIEEE() + if _, err := io.CopyN(crc, r, int64(f.DescriptorCompressedSize)); err != nil { + return fmt.Errorf("failed to calculate CRC32: %s", err) } + crcSum := crc.Sum32() + if f.DescriptorCRC32 != crcSum { + return fmt.Errorf("invalid CRC32: calculated %08x, have in decriptor %08x", crcSum, f.DescriptorCRC32) + } + + f.CRC32 = crcSum + f.UncompressedSize = f.DescriptorUncompressedSize + f.CompressedSize = f.DescriptorCompressedSize + + return nil } @@ -56,7 +56,7 @@ type File struct { FileHeader zip *Reader zipr io.ReaderAt - headerOffset int64 // includes overall ZIP archive baseOffset + HeaderOffset int64 // includes overall ZIP archive baseOffset zip64 bool // zip64 extended information extra field presence } @@ -143,7 +143,7 @@ func (z *Reader) init(r io.ReaderAt, size int64) error { if err != nil { return err } - f.headerOffset += z.baseOffset + f.HeaderOffset += z.baseOffset z.File = append(z.File, f) } if uint16(len(z.File)) != uint16(end.DirectoryRecords) { // only compare 16 bits here @@ -187,7 +187,7 @@ func (f *File) DataOffset() (offset int64, err error) { if err != nil { return } - return f.headerOffset + bodyOffset, nil + return f.HeaderOffset + bodyOffset, nil } // Open returns a ReadCloser that provides access to the File's contents. @@ -198,7 +198,7 @@ func (f *File) Open() (io.ReadCloser, error) { return nil, err } size := int64(f.CompressedSize64) - r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) + r := io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset, size) dcomp := f.zip.decompressor(f.Method) if dcomp == nil { return nil, ErrAlgorithm @@ -206,7 +206,7 @@ func (f *File) Open() (io.ReadCloser, error) { var rc io.ReadCloser = dcomp(r) var desr io.Reader if f.hasDataDescriptor() { - desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) + desr = io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset+size, dataDescriptorLen) } rc = &checksumReader{ rc: rc, @@ -224,7 +224,7 @@ func (f *File) OpenRaw() (io.Reader, error) { if err != nil { return nil, err } - r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64)) + r := io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset, int64(f.CompressedSize64)) return r, nil } @@ -287,7 +287,7 @@ func (r *checksumReader) Close() error { return r.rc.Close() } // and returns the file body offset. func (f *File) findBodyOffset() (int64, error) { var buf [fileHeaderLen]byte - if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { + if _, err := f.zipr.ReadAt(buf[:], f.HeaderOffset); err != nil { return 0, err } b := readBuf(buf[:]) @@ -325,7 +325,7 @@ func readDirectoryHeader(f *File, r io.Reader) error { commentLen := int(b.uint16()) b = b[4:] // skipped start disk number and internal attributes (2x uint16) f.ExternalAttrs = b.uint32() - f.headerOffset = int64(b.uint32()) + f.HeaderOffset = int64(b.uint32()) d := make([]byte, filenameLen+extraLen+commentLen) if _, err := io.ReadFull(r, d); err != nil { return err @@ -354,7 +354,7 @@ func readDirectoryHeader(f *File, r io.Reader) error { needUSize := f.UncompressedSize == ^uint32(0) needCSize := f.CompressedSize == ^uint32(0) - needHeaderOffset := f.headerOffset == int64(^uint32(0)) + needHeaderOffset := f.HeaderOffset == int64(^uint32(0)) // Best effort to find what we need. // Other zip authors might not even follow the basic format, @@ -396,7 +396,7 @@ parseExtras: if len(fieldBuf) < 8 { return ErrFormat } - f.headerOffset = int64(fieldBuf.uint64()) + f.HeaderOffset = int64(fieldBuf.uint64()) } case ntfsExtraID: if len(fieldBuf) < 4 { @@ -41,7 +41,7 @@ const ( fileHeaderLen = 26 // + filename + extra directoryHeaderLen = 42 // + filename + extra + comment directoryEndLen = 18 // + comment - dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size + dataDescriptorLen = 12 // four uint32: descriptor signature, crc32, compressed size, size dataDescriptor64Len = 24 // two uint32: signature, crc32 | two uint64: compressed size, size directory64LocLen = 20 // directory64EndLen = 56 // + extra @@ -135,6 +135,10 @@ type FileHeader struct { UncompressedSize64 uint64 Extra []byte ExternalAttrs uint32 // Meaning depends on CreatorVersion + + DescriptorCRC32 uint32 + DescriptorCompressedSize uint32 + DescriptorUncompressedSize uint32 } // FileInfo returns an fs.FileInfo for the FileHeader. @@ -199,6 +203,8 @@ type directoryEnd struct { DirectoryOffset uint64 // relative to file CommentLen uint16 Comment string + + HeaderOffset int64 // includes overall ZIP archive baseOffset } // timeZone returns a *time.Location based on the provided offset. |
