aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDustin Sallings <[email protected]>2024-02-17 10:43:02 -1000
committerGitHub <[email protected]>2024-02-17 10:43:02 -1000
commit59cf28f56db53cddb9a1704fb34760dbc0377ec8 (patch)
tree66c638a6ef5da5746ecc105c314a7859e72ca909
parent3bfd8048b5a93e4ede740dbb51baa16e0262da25 (diff)
Remove goroutine from dedup and add testing (#19)
-rw-r--r--dedupe/dedupe.go57
-rw-r--r--dedupe/dedupe_test.go68
2 files changed, 97 insertions, 28 deletions
diff --git a/dedupe/dedupe.go b/dedupe/dedupe.go
index c8ff5e9..a78df98 100644
--- a/dedupe/dedupe.go
+++ b/dedupe/dedupe.go
@@ -2,6 +2,7 @@ package dedup
import (
"hash/maphash"
+ "math/rand"
"sync"
"time"
)
@@ -11,42 +12,34 @@ import (
type PacketDeduplicator struct {
expiresAfter time.Duration // expiresAfter defines the duration after which a seen packet record expires.
seed maphash.Seed
- sync.Mutex // protects the seen map from concurrent access.
+ sync.Mutex // protects the seen map from concurrent access.
seen map[uint64]time.Time // seen maps a packet identifier to the last time it was seen.
}
// NewDeduplicator creates a new PacketDeduplicator with a given hasher and expiration duration for packet records.
// It starts a background goroutine to periodically clean up expired packet records.
func NewDeduplicator(expiresAfter time.Duration) *PacketDeduplicator {
- pd := &PacketDeduplicator{
+ return &PacketDeduplicator{
seen: map[uint64]time.Time{},
seed: maphash.MakeSeed(),
expiresAfter: expiresAfter,
}
- go func() {
- for range time.NewTicker(time.Second * 10).C {
- pd.Lock()
- for packet, timestamp := range pd.seen {
- if time.Since(timestamp) > pd.expiresAfter {
- delete(pd.seen, packet)
- }
- }
- pd.Unlock()
- }
- }()
-
- return pd
}
-func (p *PacketDeduplicator) isSeen(k uint64) bool {
+func (p *PacketDeduplicator) isSeen(now time.Time, k uint64) bool {
+ // Checking for dedup purges ~5% of the time
+ if rand.Intn(100) < 5 {
+ p.deleteUntil(now.Add(-p.expiresAfter))
+ }
+
p.Lock()
defer p.Unlock()
d, exists := p.seen[k]
if !exists {
- p.seen[k] = time.Now()
+ p.seen[k] = now
return false
}
- if time.Since(d) > p.expiresAfter {
+ if now.Sub(d) > p.expiresAfter {
delete(p.seen, k)
return false
}
@@ -56,11 +49,35 @@ func (p *PacketDeduplicator) isSeen(k uint64) bool {
// Seen checks whether a packet with the given sender and packetID has been seen before.
// If not, it records the packet as seen and returns false. Otherwise, it returns true.
func (p *PacketDeduplicator) Seen(sender, packetID uint32) bool {
- return p.isSeen((uint64(sender) << 32) | uint64(packetID))
+ return p.seenAt(time.Now(), sender, packetID)
}
// SeenData checks whether the data has been seen before based on its hashed value.
// If not, it records the data as seen and returns false. Otherwise, it returns true.
func (p *PacketDeduplicator) SeenData(data []byte) bool {
- return p.isSeen(maphash.Bytes(p.seed, data))
+ return p.seenDataAt(time.Now(), data)
+}
+
+//
+// These are used internally and are test hooks allowing us to avoid the clock.
+//
+
+func (p *PacketDeduplicator) deleteUntil(t time.Time) {
+ p.Lock()
+ defer p.Unlock()
+ for packet, timestamp := range p.seen {
+ if timestamp.Before(t) {
+ delete(p.seen, packet)
+ }
+ }
+}
+
+// Seendata with an explicit time
+func (p *PacketDeduplicator) seenDataAt(now time.Time, data []byte) bool {
+ return p.isSeen(now, maphash.Bytes(p.seed, data))
+}
+
+// Seen with an explicit time
+func (p *PacketDeduplicator) seenAt(now time.Time, sender uint32, packetID uint32) bool {
+ return p.isSeen(now, (uint64(sender)<<32)|uint64(packetID))
}
diff --git a/dedupe/dedupe_test.go b/dedupe/dedupe_test.go
index 5891429..b02939f 100644
--- a/dedupe/dedupe_test.go
+++ b/dedupe/dedupe_test.go
@@ -34,19 +34,34 @@ func TestPacketDeduplicatorSeen(t *testing.T) {
}
func TestDuplicatorProp(t *testing.T) {
+ // Some arbitrary time. Ideally the generator would provide one, but the go quickcheck generators aren't very good
+ now := time.Now()
if err := quick.Check(func(s, p uint32) bool {
dedup := NewDeduplicator(expiresAfter)
// Test Seen with new packetID
- if dedup.Seen(s, p) {
+ if dedup.seenAt(now, s, p) {
t.Error("Expected the packet to not have been seen")
return false
}
// Test Seen with same packetID again
- if !dedup.Seen(s, p) {
+ if !dedup.seenAt(now, s, p) {
t.Error("Expected the packet to have been seen")
return false
}
+
+ // Even up to expiry
+ if !dedup.seenAt(now.Add(expiresAfter), s, p) {
+ t.Error("Expected the packet to have been seen all the way to expiry")
+ return false
+ }
+
+ // But not in The Future
+ if dedup.seenAt(now.Add(expiresAfter+time.Nanosecond), s, p) {
+ t.Error("Expected the packet to not have been seen after expiry")
+ return false
+ }
+
return true
}, nil); err != nil {
t.Error(err)
@@ -54,16 +69,53 @@ func TestDuplicatorProp(t *testing.T) {
}
func FuzzDup(f *testing.F) {
- f.Fuzz(func(t *testing.T, s uint32, p uint32) {
- dedup := NewDeduplicator(10*time.Second)
+ now := time.Now()
+ f.Fuzz(func(t *testing.T, s1 uint32, s2 uint32, p1 uint32, p2 uint32) {
+ dedup := NewDeduplicator(expiresAfter)
// Test Seen with new packetID
- if dedup.Seen(s, p) {
- t.Errorf("Expected the packet %v/%v to not have been seen the first time", s, p)
+ if dedup.seenAt(now, s1, p1) {
+ t.Error("Expected the packet to not have been seen")
}
// Test Seen with same packetID again
- if !dedup.Seen(s, p) {
- t.Errorf("Expected the packet %v/%v to have been seen", s, p)
+ if !dedup.seenAt(now, s1, p1) {
+ t.Error("Expected the packet to have been seen")
+ }
+
+ // Even up to expiry
+ if !dedup.seenAt(now.Add(expiresAfter), s1, p1) {
+ t.Error("Expected the packet to have been seen all the way to expiry")
+ }
+
+ // But not in The Future
+ if dedup.seenAt(now.Add(expiresAfter+time.Nanosecond), s1, p1) {
+ t.Error("Expected the packet to not have been seen after expiry")
+ }
+
+ //
+ // This remaining bits of this property are not valid if s1 == s2 and p1 == p2
+ //
+ if s1 == s2 && p1 == p2 {
+ return
+ }
+
+ if dedup.seenAt(now, s2, p2) {
+ t.Error("Expected a different packet to not have been seen")
+ }
+
+ // Test Seen with same packetID again
+ if !dedup.seenAt(now, s2, p2) {
+ t.Error("Expected a different packet to have been seen")
+ }
+
+ // Even up to expiry
+ if !dedup.seenAt(now.Add(expiresAfter), s2, p2) {
+ t.Error("Expected a different packet to have been seen all the way to expiry")
+ }
+
+ // But not in The Future
+ if dedup.seenAt(now.Add(expiresAfter+time.Nanosecond), s2, p2) {
+ t.Error("Expected a different packet to not have been seen after expiry")
}
})