Skip to content

Commit

Permalink
perf(query): Improve IntersectCompressedWithBin for UID Pack (#8941)
Browse files Browse the repository at this point in the history
This PR improves the performance of IntersectCompressedWithBin.
LDBC09 Query went from 38 second -> 22 second.
Benchmarks show up to 80% improvement in performance.
  • Loading branch information
harshil-goel committed Aug 9, 2023
1 parent 3d5080f commit 0474a00
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 19 deletions.
50 changes: 31 additions & 19 deletions algo/uidlist.go
Expand Up @@ -92,6 +92,7 @@ func IntersectCompressedWithLinJump(dec *codec.Decoder, v []uint64, o *[]uint64)
// IntersectCompressedWithBin is based on the paper
// "Fast Intersection Algorithms for Sorted Sequences"
// https://link.springer.com/chapter/10.1007/978-3-642-12476-1_3
// Call seek on dec before calling this function
func IntersectCompressedWithBin(dec *codec.Decoder, q []uint64, o *[]uint64) {
ld := dec.ApproxLen()
lq := len(q)
Expand All @@ -105,34 +106,45 @@ func IntersectCompressedWithBin(dec *codec.Decoder, q []uint64, o *[]uint64) {

// Pick the shorter list and do binary search
if ld < lq {
uids := dec.Uids()
for len(uids) > 0 {
for _, u := range uids {
qidx := sort.Search(len(q), func(idx int) bool {
return q[idx] >= u
})
if qidx >= len(q) {
return
}
if q[qidx] == u {
*o = append(*o, u)
qidx++
}
q = q[qidx:]
for {
blockUids := dec.Uids()
if len(blockUids) == 0 {
break
}
uids = dec.Next()
IntersectWithBin(blockUids, q, o)
lastUid := blockUids[len(blockUids)-1]
qidx := sort.Search(len(q), func(idx int) bool {
return q[idx] >= lastUid
})
if qidx >= len(q) {
return
}
q = q[qidx:]
dec.Next()
}
return
}

var uids []uint64
for _, u := range q {
uids := dec.Seek(u, codec.SeekStart)
if len(uids) == 0 {
return
if len(uids) == 0 || u > uids[len(uids)-1] {
uids = dec.Seek(u, codec.SeekStart)
if len(uids) == 0 {
return
}
}
uidIdx := sort.Search(len(uids), func(idx int) bool {
return uids[idx] >= u
})
if uidIdx >= len(uids) {
// We know that u < max(uids). If we didn't find it here, it's not here.
continue
}
if uids[0] == u {
if uids[uidIdx] == u {
*o = append(*o, u)
uidIdx++
}
uids = uids[uidIdx:]
}
}

Expand Down
46 changes: 46 additions & 0 deletions algo/uidlist_test.go
Expand Up @@ -367,6 +367,52 @@ func BenchmarkListIntersectRandom(b *testing.B) {
randomTests(1024000, 0.01)
}

func BenchmarkListIntersectCompressBin(b *testing.B) {
randomTests := func(sz int, overlap float64) {
rs := []float64{0.01, 0.1, 1, 10, 100}
for _, r := range rs {
sz1 := sz
sz2 := int(float64(sz) * r)
if sz2 > 1000000 || sz2 == 0 {
break
}

u1, v1 := make([]uint64, sz1), make([]uint64, sz2)
limit := int64(float64(sz) / overlap)
for i := 0; i < sz1; i++ {
u1[i] = uint64(rand.Int63n(limit))
}
for i := 0; i < sz2; i++ {
v1[i] = uint64(rand.Int63n(limit))
}
sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] })
sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] })

dst2 := &pb.List{}
compressedUids := codec.Encode(v1, 256)

b.Run(fmt.Sprintf("compressed:IntersectWith:ratio=%v:size=%d:overlap=%.2f:", r, sz, overlap),
func(b *testing.B) {
for k := 0; k < b.N; k++ {
dec := codec.Decoder{Pack: compressedUids}
dec.Seek(0, codec.SeekStart)
IntersectCompressedWithBin(&dec, u1, &dst2.Uids)
}
})
fmt.Println()

codec.FreePack(compressedUids)
}
}

randomTests(10, 0.01)
randomTests(100, 0.01)
randomTests(1000, 0.01)
randomTests(10000, 0.01)
randomTests(100000, 0.01)
randomTests(1000000, 0.01)
}

func BenchmarkListIntersectRatio(b *testing.B) {
randomTests := func(sz int, overlap float64) {
rs := []int{1, 10, 50, 100, 500, 1000, 10000, 100000, 1000000}
Expand Down

0 comments on commit 0474a00

Please sign in to comment.