aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarin Ivanov <[email protected]>2024-03-29 00:01:55 +0200
committerMarin Ivanov <[email protected]>2024-03-29 00:01:55 +0200
commitf1fdc384ae82da620d53c1660bf4969beed992e6 (patch)
treec2f8641c924097338e950de6ee3877756fc47f76
parent90394d8b5b2a4c6bcf29692e6d4ab79c598928b1 (diff)
wip
-rw-r--r--main.go124
-rw-r--r--reader.go20
-rw-r--r--struct.go8
3 files changed, 120 insertions, 32 deletions
diff --git a/main.go b/main.go
index cab47d5..88ff7c1 100644
--- a/main.go
+++ b/main.go
@@ -4,6 +4,8 @@ import (
"bytes"
"encoding/json"
"errors"
+ "fmt"
+ "hash/crc32"
"io"
"log"
"os"
@@ -37,18 +39,29 @@ func main() {
var hdr File
if err := parseFileHeader(&hdr, f); err != nil {
pos, _ := f.Seek(0, io.SeekCurrent)
- log.Fatalf("failed to read file header at %d: %s", pos, err)
+ log.Fatalf("failed to read file header at 0x%x: %s", pos, err)
}
+ hdr.HeaderOffset, _ = f.Seek(0, io.SeekCurrent)
j, _ := json.MarshalIndent(hdr, "", " ")
- log.Printf("File: %s", string(j))
- if pos, err := f.Seek(int64(hdr.CompressedSize), io.SeekCurrent); err != nil {
- log.Fatalf("failed to seek to next header, stopped at %d: %s", pos, err)
+ log.Printf("File at 0x%x: %s", hdr.HeaderOffset, string(j))
+ if hdr.Flags&0x08 > 0 {
+ log.Printf("Searching for file's end...")
+ if err = findAndUseDataDescriptor(&hdr, f); err != nil {
+ log.Fatalf("failed to udpate using data descriptor at 0x%x: %s", hdr.HeaderOffset, err)
+ }
+ }
+ if pos, err := f.Seek(hdr.HeaderOffset+int64(hdr.CompressedSize), io.SeekStart); err != nil {
+ log.Fatalf("failed to seek to next header, stopped at 0x%x: %s", pos, err)
}
+
+ case dataDescriptorSignature:
+ io.CopyN(io.Discard, f, 12)
+
case directoryHeaderSignature:
var hdr File
if err = readDirectoryHeader(&hdr, f); err != nil {
pos, _ := f.Seek(0, io.SeekCurrent)
- log.Fatalf("failed to read directory header at %d: %s", pos, err)
+ log.Fatalf("failed to read directory header at 0x%x: %s", pos, err)
}
j, _ := json.MarshalIndent(hdr, "", " ")
log.Printf("Directory: %s", string(j))
@@ -56,13 +69,13 @@ func main() {
var hdr directoryEnd
if err := parseDirectoryEnd(&hdr, f); err != nil {
pos, _ := f.Seek(0, io.SeekCurrent)
- log.Fatalf("failed to read directory end at %d: %s", pos, err)
+ log.Fatalf("failed to read directory end at 0x%x: %s", pos, err)
}
j, _ := json.MarshalIndent(hdr, "", " ")
log.Printf("Directory End: %s", string(j))
default:
pos, _ := f.Seek(0, io.SeekCurrent)
- log.Fatalf("invalid header signature at %d: got %08x", pos, signature)
+ log.Fatalf("invalid header signature at 0x%x: got %08x", pos, signature)
}
}
}
@@ -96,7 +109,7 @@ func parseFileHeader(f *File, r io.Reader) error {
needUSize := f.UncompressedSize == ^uint32(0)
needCSize := f.CompressedSize == ^uint32(0)
- needHeaderOffset := f.headerOffset == int64(^uint32(0))
+ needHeaderOffset := f.HeaderOffset == int64(^uint32(0))
// Best effort to find what we need.
// Other zip authors might not even follow the basic format,
@@ -138,7 +151,7 @@ parseExtras:
if len(fieldBuf) < 8 {
return ErrFormat
}
- f.headerOffset = int64(fieldBuf.uint64())
+ f.HeaderOffset = int64(fieldBuf.uint64())
}
case ntfsExtraID:
if len(fieldBuf) < 4 {
@@ -200,7 +213,8 @@ parseExtras:
return nil
}
-func parseDirectoryEnd(d *directoryEnd, r io.Reader) error {
+func parseDirectoryEnd(d *directoryEnd, r io.ReadSeeker) error {
+ d.HeaderOffset, _ = r.Seek(0, io.SeekCurrent)
var hdr [directoryEndLen]byte
if _, err := io.ReadFull(r, hdr[:]); err != nil {
return err
@@ -226,15 +240,17 @@ func parseDirectoryEnd(d *directoryEnd, r io.Reader) error {
return nil
}
-const searchChunkSize = 4096
+const searchSize = 4096
+
+func search(r io.Reader, term []byte, offset int64) (int64, error) {
+ termLen := len(term)
+ buf := make([]byte, searchSize+termLen)
-func find(r io.Reader, search []byte) (int64, error) {
- var offset int64
- tailLen := len(search) - 1
- chunk := make([]byte, searchChunkSize+tailLen)
- n, err := r.Read(chunk[tailLen:])
- idx := bytes.Index(chunk[tailLen:n+tailLen], search)
+ var err error
+ var n, idx int
for {
+ n, err = r.Read(buf[termLen:])
+ idx = bytes.Index(buf[:n+termLen], term)
if idx >= 0 {
return offset + int64(idx), nil
}
@@ -243,9 +259,75 @@ func find(r io.Reader, search []byte) (int64, error) {
} else if err != nil {
return -1, err
}
- copy(chunk, chunk[searchChunkSize:])
- offset += searchChunkSize
- n, err = r.Read(chunk[tailLen:])
- idx = bytes.Index(chunk[:n+tailLen], search)
+ copy(buf, buf[searchSize:])
+ offset += searchSize
+ }
+}
+
+func parseDataDescriptor(f *File, r io.Reader) error {
+ var buf [dataDescriptorLen]byte
+
+ if _, err := io.ReadFull(r, buf[:]); err != nil {
+ return err
+ }
+ b := readBuf(buf[:])
+ f.DescriptorCRC32 = b.uint32()
+ f.DescriptorCompressedSize = b.uint32()
+ f.DescriptorUncompressedSize = b.uint32()
+
+ log.Printf("compsize=%d, size=%d", f.DescriptorCompressedSize, f.DescriptorUncompressedSize)
+
+ return nil
+}
+
+var dataDescriptorSignatureBytes = []byte{'P', 'K', 0x07, 0x08}
+
+func findAndUseDataDescriptor(f *File, r io.ReadSeeker) error {
+ pos, err := r.Seek(0, io.SeekCurrent)
+ if err != nil {
+ return err
+ }
+ for {
+ descriptorPos, err := search(r, dataDescriptorSignatureBytes, pos)
+ if err != nil {
+ return fmt.Errorf("failed to find descriptor maxed at 0x%x: %s", descriptorPos, err)
+ }
+ log.Printf("found descriptor at offset 0x%x", descriptorPos)
+ if pos, err = r.Seek(descriptorPos, io.SeekStart); err != nil {
+ return fmt.Errorf("failed to seek to descriptor at 0x%x: %s", pos, err)
+ }
+ if err := parseDataDescriptor(f, r); err != nil {
+ return fmt.Errorf("failed to read descriptor : %s", err)
+ }
+ if descriptorPos-int64(f.DescriptorCompressedSize) == f.HeaderOffset+4 {
+ break
+ } else {
+ log.Printf("invalid size: expected offset %x (hex) != %x (hex), compsize=%d, size=%d",
+ f.HeaderOffset+4, descriptorPos-int64(f.DescriptorCompressedSize), f.DescriptorCompressedSize, f.DescriptorUncompressedSize)
+ }
+ if pos, err = r.Seek(descriptorPos+4, io.SeekStart); err != nil {
+ return fmt.Errorf("failed to seek to descriptor at 0x%x: %s", pos, err)
+ } else {
+ // log.Printf("resetted to offset %d", pos)
+ }
+ }
+
+ // bring back at the beginning of the header (due to chunking)
+ if _, err := r.Seek(f.HeaderOffset, io.SeekStart); err != nil {
+ return fmt.Errorf("failed to seek to beginning at %d: %s", f.HeaderOffset, err)
+ }
+ crc := crc32.NewIEEE()
+ if _, err := io.CopyN(crc, r, int64(f.DescriptorCompressedSize)); err != nil {
+ return fmt.Errorf("failed to calculate CRC32: %s", err)
}
+ crcSum := crc.Sum32()
+ if f.DescriptorCRC32 != crcSum {
+ return fmt.Errorf("invalid CRC32: calculated %08x, have in decriptor %08x", crcSum, f.DescriptorCRC32)
+ }
+
+ f.CRC32 = crcSum
+ f.UncompressedSize = f.DescriptorUncompressedSize
+ f.CompressedSize = f.DescriptorCompressedSize
+
+ return nil
}
diff --git a/reader.go b/reader.go
index 5ee2d3d..6ef8885 100644
--- a/reader.go
+++ b/reader.go
@@ -56,7 +56,7 @@ type File struct {
FileHeader
zip *Reader
zipr io.ReaderAt
- headerOffset int64 // includes overall ZIP archive baseOffset
+ HeaderOffset int64 // includes overall ZIP archive baseOffset
zip64 bool // zip64 extended information extra field presence
}
@@ -143,7 +143,7 @@ func (z *Reader) init(r io.ReaderAt, size int64) error {
if err != nil {
return err
}
- f.headerOffset += z.baseOffset
+ f.HeaderOffset += z.baseOffset
z.File = append(z.File, f)
}
if uint16(len(z.File)) != uint16(end.DirectoryRecords) { // only compare 16 bits here
@@ -187,7 +187,7 @@ func (f *File) DataOffset() (offset int64, err error) {
if err != nil {
return
}
- return f.headerOffset + bodyOffset, nil
+ return f.HeaderOffset + bodyOffset, nil
}
// Open returns a ReadCloser that provides access to the File's contents.
@@ -198,7 +198,7 @@ func (f *File) Open() (io.ReadCloser, error) {
return nil, err
}
size := int64(f.CompressedSize64)
- r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
+ r := io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset, size)
dcomp := f.zip.decompressor(f.Method)
if dcomp == nil {
return nil, ErrAlgorithm
@@ -206,7 +206,7 @@ func (f *File) Open() (io.ReadCloser, error) {
var rc io.ReadCloser = dcomp(r)
var desr io.Reader
if f.hasDataDescriptor() {
- desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen)
+ desr = io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset+size, dataDescriptorLen)
}
rc = &checksumReader{
rc: rc,
@@ -224,7 +224,7 @@ func (f *File) OpenRaw() (io.Reader, error) {
if err != nil {
return nil, err
}
- r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64))
+ r := io.NewSectionReader(f.zipr, f.HeaderOffset+bodyOffset, int64(f.CompressedSize64))
return r, nil
}
@@ -287,7 +287,7 @@ func (r *checksumReader) Close() error { return r.rc.Close() }
// and returns the file body offset.
func (f *File) findBodyOffset() (int64, error) {
var buf [fileHeaderLen]byte
- if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil {
+ if _, err := f.zipr.ReadAt(buf[:], f.HeaderOffset); err != nil {
return 0, err
}
b := readBuf(buf[:])
@@ -325,7 +325,7 @@ func readDirectoryHeader(f *File, r io.Reader) error {
commentLen := int(b.uint16())
b = b[4:] // skipped start disk number and internal attributes (2x uint16)
f.ExternalAttrs = b.uint32()
- f.headerOffset = int64(b.uint32())
+ f.HeaderOffset = int64(b.uint32())
d := make([]byte, filenameLen+extraLen+commentLen)
if _, err := io.ReadFull(r, d); err != nil {
return err
@@ -354,7 +354,7 @@ func readDirectoryHeader(f *File, r io.Reader) error {
needUSize := f.UncompressedSize == ^uint32(0)
needCSize := f.CompressedSize == ^uint32(0)
- needHeaderOffset := f.headerOffset == int64(^uint32(0))
+ needHeaderOffset := f.HeaderOffset == int64(^uint32(0))
// Best effort to find what we need.
// Other zip authors might not even follow the basic format,
@@ -396,7 +396,7 @@ parseExtras:
if len(fieldBuf) < 8 {
return ErrFormat
}
- f.headerOffset = int64(fieldBuf.uint64())
+ f.HeaderOffset = int64(fieldBuf.uint64())
}
case ntfsExtraID:
if len(fieldBuf) < 4 {
diff --git a/struct.go b/struct.go
index eef0b91..123f895 100644
--- a/struct.go
+++ b/struct.go
@@ -41,7 +41,7 @@ const (
fileHeaderLen = 26 // + filename + extra
directoryHeaderLen = 42 // + filename + extra + comment
directoryEndLen = 18 // + comment
- dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size
+ dataDescriptorLen = 12 // four uint32: descriptor signature, crc32, compressed size, size
dataDescriptor64Len = 24 // two uint32: signature, crc32 | two uint64: compressed size, size
directory64LocLen = 20 //
directory64EndLen = 56 // + extra
@@ -135,6 +135,10 @@ type FileHeader struct {
UncompressedSize64 uint64
Extra []byte
ExternalAttrs uint32 // Meaning depends on CreatorVersion
+
+ DescriptorCRC32 uint32
+ DescriptorCompressedSize uint32
+ DescriptorUncompressedSize uint32
}
// FileInfo returns an fs.FileInfo for the FileHeader.
@@ -199,6 +203,8 @@ type directoryEnd struct {
DirectoryOffset uint64 // relative to file
CommentLen uint16
Comment string
+
+ HeaderOffset int64 // includes overall ZIP archive baseOffset
}
// timeZone returns a *time.Location based on the provided offset.