diff options
| author | Dustin Sallings <[email protected]> | 2024-02-17 10:43:02 -1000 |
|---|---|---|
| committer | GitHub <[email protected]> | 2024-02-17 10:43:02 -1000 |
| commit | 59cf28f56db53cddb9a1704fb34760dbc0377ec8 (patch) | |
| tree | 66c638a6ef5da5746ecc105c314a7859e72ca909 | |
| parent | 3bfd8048b5a93e4ede740dbb51baa16e0262da25 (diff) | |
Remove goroutine from dedup and add testing (#19)
| -rw-r--r-- | dedupe/dedupe.go | 57 | ||||
| -rw-r--r-- | dedupe/dedupe_test.go | 68 |
2 files changed, 97 insertions, 28 deletions
diff --git a/dedupe/dedupe.go b/dedupe/dedupe.go index c8ff5e9..a78df98 100644 --- a/dedupe/dedupe.go +++ b/dedupe/dedupe.go @@ -2,6 +2,7 @@ package dedup import ( "hash/maphash" + "math/rand" "sync" "time" ) @@ -11,42 +12,34 @@ import ( type PacketDeduplicator struct { expiresAfter time.Duration // expiresAfter defines the duration after which a seen packet record expires. seed maphash.Seed - sync.Mutex // protects the seen map from concurrent access. + sync.Mutex // protects the seen map from concurrent access. seen map[uint64]time.Time // seen maps a packet identifier to the last time it was seen. } // NewDeduplicator creates a new PacketDeduplicator with a given hasher and expiration duration for packet records. // It starts a background goroutine to periodically clean up expired packet records. func NewDeduplicator(expiresAfter time.Duration) *PacketDeduplicator { - pd := &PacketDeduplicator{ + return &PacketDeduplicator{ seen: map[uint64]time.Time{}, seed: maphash.MakeSeed(), expiresAfter: expiresAfter, } - go func() { - for range time.NewTicker(time.Second * 10).C { - pd.Lock() - for packet, timestamp := range pd.seen { - if time.Since(timestamp) > pd.expiresAfter { - delete(pd.seen, packet) - } - } - pd.Unlock() - } - }() - - return pd } -func (p *PacketDeduplicator) isSeen(k uint64) bool { +func (p *PacketDeduplicator) isSeen(now time.Time, k uint64) bool { + // Checking for dedup purges ~5% of the time + if rand.Intn(100) < 5 { + p.deleteUntil(now.Add(-p.expiresAfter)) + } + p.Lock() defer p.Unlock() d, exists := p.seen[k] if !exists { - p.seen[k] = time.Now() + p.seen[k] = now return false } - if time.Since(d) > p.expiresAfter { + if now.Sub(d) > p.expiresAfter { delete(p.seen, k) return false } @@ -56,11 +49,35 @@ func (p *PacketDeduplicator) isSeen(k uint64) bool { // Seen checks whether a packet with the given sender and packetID has been seen before. // If not, it records the packet as seen and returns false. Otherwise, it returns true. func (p *PacketDeduplicator) Seen(sender, packetID uint32) bool { - return p.isSeen((uint64(sender) << 32) | uint64(packetID)) + return p.seenAt(time.Now(), sender, packetID) } // SeenData checks whether the data has been seen before based on its hashed value. // If not, it records the data as seen and returns false. Otherwise, it returns true. func (p *PacketDeduplicator) SeenData(data []byte) bool { - return p.isSeen(maphash.Bytes(p.seed, data)) + return p.seenDataAt(time.Now(), data) +} + +// +// These are used internally and are test hooks allowing us to avoid the clock. +// + +func (p *PacketDeduplicator) deleteUntil(t time.Time) { + p.Lock() + defer p.Unlock() + for packet, timestamp := range p.seen { + if timestamp.Before(t) { + delete(p.seen, packet) + } + } +} + +// Seendata with an explicit time +func (p *PacketDeduplicator) seenDataAt(now time.Time, data []byte) bool { + return p.isSeen(now, maphash.Bytes(p.seed, data)) +} + +// Seen with an explicit time +func (p *PacketDeduplicator) seenAt(now time.Time, sender uint32, packetID uint32) bool { + return p.isSeen(now, (uint64(sender)<<32)|uint64(packetID)) } diff --git a/dedupe/dedupe_test.go b/dedupe/dedupe_test.go index 5891429..b02939f 100644 --- a/dedupe/dedupe_test.go +++ b/dedupe/dedupe_test.go @@ -34,19 +34,34 @@ func TestPacketDeduplicatorSeen(t *testing.T) { } func TestDuplicatorProp(t *testing.T) { + // Some arbitrary time. Ideally the generator would provide one, but the go quickcheck generators aren't very good + now := time.Now() if err := quick.Check(func(s, p uint32) bool { dedup := NewDeduplicator(expiresAfter) // Test Seen with new packetID - if dedup.Seen(s, p) { + if dedup.seenAt(now, s, p) { t.Error("Expected the packet to not have been seen") return false } // Test Seen with same packetID again - if !dedup.Seen(s, p) { + if !dedup.seenAt(now, s, p) { t.Error("Expected the packet to have been seen") return false } + + // Even up to expiry + if !dedup.seenAt(now.Add(expiresAfter), s, p) { + t.Error("Expected the packet to have been seen all the way to expiry") + return false + } + + // But not in The Future + if dedup.seenAt(now.Add(expiresAfter+time.Nanosecond), s, p) { + t.Error("Expected the packet to not have been seen after expiry") + return false + } + return true }, nil); err != nil { t.Error(err) @@ -54,16 +69,53 @@ func TestDuplicatorProp(t *testing.T) { } func FuzzDup(f *testing.F) { - f.Fuzz(func(t *testing.T, s uint32, p uint32) { - dedup := NewDeduplicator(10*time.Second) + now := time.Now() + f.Fuzz(func(t *testing.T, s1 uint32, s2 uint32, p1 uint32, p2 uint32) { + dedup := NewDeduplicator(expiresAfter) // Test Seen with new packetID - if dedup.Seen(s, p) { - t.Errorf("Expected the packet %v/%v to not have been seen the first time", s, p) + if dedup.seenAt(now, s1, p1) { + t.Error("Expected the packet to not have been seen") } // Test Seen with same packetID again - if !dedup.Seen(s, p) { - t.Errorf("Expected the packet %v/%v to have been seen", s, p) + if !dedup.seenAt(now, s1, p1) { + t.Error("Expected the packet to have been seen") + } + + // Even up to expiry + if !dedup.seenAt(now.Add(expiresAfter), s1, p1) { + t.Error("Expected the packet to have been seen all the way to expiry") + } + + // But not in The Future + if dedup.seenAt(now.Add(expiresAfter+time.Nanosecond), s1, p1) { + t.Error("Expected the packet to not have been seen after expiry") + } + + // + // This remaining bits of this property are not valid if s1 == s2 and p1 == p2 + // + if s1 == s2 && p1 == p2 { + return + } + + if dedup.seenAt(now, s2, p2) { + t.Error("Expected a different packet to not have been seen") + } + + // Test Seen with same packetID again + if !dedup.seenAt(now, s2, p2) { + t.Error("Expected a different packet to have been seen") + } + + // Even up to expiry + if !dedup.seenAt(now.Add(expiresAfter), s2, p2) { + t.Error("Expected a different packet to have been seen all the way to expiry") + } + + // But not in The Future + if dedup.seenAt(now.Add(expiresAfter+time.Nanosecond), s2, p2) { + t.Error("Expected a different packet to not have been seen after expiry") } }) |
