Skip to content

Commit

Permalink
[SYSINFRA-1501] base/digest: Use buffered writer for computing digests
Browse files Browse the repository at this point in the history
Summary:
Using a buffered writer for computing digests reduces the amount
of memory used and allocations made while computing digests.

Since digest computation is used widely, we introduce `NewWriterShort`
for computing digests of relatively short inputs and use it where appropriate.

Performance improvement is mainly due to addition of `WriteString`.
Various benchmarks show modest CPU improvements but significant memory/alloc reduction (reduced by **90%** in some cases)

Test Plan:
Unit and Integration tests.

Benchmarking some digest computations (fileset assertions in particular shown below):
- before
```
BenchmarkDigest
BenchmarkDigest/writeDigestOld_N100
BenchmarkDigest/writeDigestOld_N100-8         	    2750	    401574 ns/op	  177809 B/op	    1232 allocs/op
BenchmarkDigest/writeDigestNew_N100
BenchmarkDigest/writeDigestNew_N100-8         	    3904	    269149 ns/op	   51968 B/op	    1310 allocs/op
BenchmarkDigest/writeDigestOld_N1000
BenchmarkDigest/writeDigestOld_N1000-8        	     183	   6039835 ns/op	 1770788 B/op	   12113 allocs/op
BenchmarkDigest/writeDigestNew_N1000
BenchmarkDigest/writeDigestNew_N1000-8        	     352	   3031291 ns/op	  515594 B/op	   13010 allocs/op
BenchmarkDigest/writeDigestOld_N10000
BenchmarkDigest/writeDigestOld_N10000-8       	      16	  70172874 ns/op	22929233 B/op	  121179 allocs/op
BenchmarkDigest/writeDigestNew_N10000
BenchmarkDigest/writeDigestNew_N10000-8       	      31	  37214169 ns/op	 5153214 B/op	  130010 allocs/op
BenchmarkDigest/writeDigestOld_N100000
BenchmarkDigest/writeDigestOld_N100000-8      	       2	 960212048 ns/op	207093356 B/op	 1209659 allocs/op
BenchmarkDigest/writeDigestNew_N100000
BenchmarkDigest/writeDigestNew_N100000-8      	       2	 561615654 ns/op	51450236 B/op	 1300010 allocs/op
PASS
```
- after
```
BenchmarkDigest
BenchmarkDigest/writeDigestOld_N100
BenchmarkDigest/writeDigestOld_N100-8         	    3322	    339663 ns/op	  136214 B/op	      34 allocs/op
BenchmarkDigest/writeDigestNew_N100
BenchmarkDigest/writeDigestNew_N100-8         	    5930	    191718 ns/op	   10168 B/op	     112 allocs/op
BenchmarkDigest/writeDigestOld_N1000
BenchmarkDigest/writeDigestOld_N1000-8        	     294	   4650456 ns/op	 1316342 B/op	     115 allocs/op
BenchmarkDigest/writeDigestNew_N1000
BenchmarkDigest/writeDigestNew_N1000-8        	     483	   2245917 ns/op	   61336 B/op	    1012 allocs/op
BenchmarkDigest/writeDigestOld_N10000
BenchmarkDigest/writeDigestOld_N10000-8       	      21	  63469161 ns/op	18344572 B/op	    1176 allocs/op
BenchmarkDigest/writeDigestNew_N10000
BenchmarkDigest/writeDigestNew_N10000-8       	      43	  27897608 ns/op	  572248 B/op	   10012 allocs/op
BenchmarkDigest/writeDigestOld_N100000
BenchmarkDigest/writeDigestOld_N100000-8      	       2	 753470408 ns/op	161193760 B/op	    9590 allocs/op
BenchmarkDigest/writeDigestNew_N100000
BenchmarkDigest/writeDigestNew_N100000-8      	       3	 359851338 ns/op	 5607640 B/op	  100012 allocs/op
PASS
```

Reviewers: pboyapalli, ghorrell

Reviewed By: pboyapalli

Subscribers: dnicolaou

Differential Revision: https://phabricator.grailbio.com/D73250

fbshipit-source-id: 39208f6
  • Loading branch information
swami-m authored and Alex Wissmann committed Apr 19, 2022
1 parent 7568556 commit 81e8f4c
Show file tree
Hide file tree
Showing 11 changed files with 16 additions and 16 deletions.
6 changes: 3 additions & 3 deletions assertion.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ type assertion struct {
// newAssertion creates an assertion for a given key and object-value mappings.
func newAssertion(objects map[string]string) *assertion {
a := assertion{objects: objects}
w := Digester.NewWriter()
w := Digester.NewWriterShort()
for _, k := range sortedKeys(objects) {
_, _ = io.WriteString(w, k)
_, _ = io.WriteString(w, objects[k])
Expand Down Expand Up @@ -350,7 +350,7 @@ func (s *Assertions) Digest() digest.Digest {

// Digest returns the assertions' digest.
func (s *Assertions) computeDigest() digest.Digest {
w := Digester.NewWriter()
w := Digester.NewWriterShort()
s.writeDigest(w)
return w.Digest()
}
Expand Down Expand Up @@ -577,7 +577,7 @@ func (s *Assertions) unmarshal(dec *json.Decoder) error {
keys = append(keys, k)
}
sort.Strings(keys)
w := Digester.NewWriter()
w := Digester.NewWriterShort()
for _, k := range keys {
_, _ = w.Write(unsafe.StringToBytes(k))
_, _ = w.Write(unsafe.StringToBytes(v.objects[k]))
Expand Down
4 changes: 2 additions & 2 deletions fileset.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (f File) Digest() digest.Digest {
if !f.ContentHash.IsZero() {
return f.ContentHash
}
w := Digester.NewWriter()
w := Digester.NewWriterShort()
var b [8]byte
binary.LittleEndian.PutUint64(b[:], uint64(f.Size))
w.Write(b[:])
Expand Down Expand Up @@ -442,7 +442,7 @@ func (v Fileset) String() string {
// semantics: two values with the same digest are considered to be
// equivalent.
func (v Fileset) Digest() digest.Digest {
w := Digester.NewWriter()
w := Digester.NewWriterShort()
v.WriteDigest(w)
return w.Digest()
}
Expand Down
4 changes: 2 additions & 2 deletions flow/flow.go
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,7 @@ func (f *Flow) Digest() digest.Digest {
}

func (f *Flow) computeDigest() {
w := Digester.NewWriter()
w := Digester.NewWriterShort()
f.WriteDigest(w)
f.digest = w.Digest()
}
Expand Down Expand Up @@ -926,7 +926,7 @@ func (f *Flow) WriteDigest(w io.Writer) {
// PhysicalDigest returns the digest for this node substituting the
// image name in the node with the provided one, if an exec node.
func (f *Flow) physicalDigest(image string) digest.Digest {
w := Digester.NewWriter()
w := Digester.NewWriterShort()
for _, dep := range f.Deps {
dep.Value.(reflow.Fileset).WriteDigest(w)
}
Expand Down
2 changes: 1 addition & 1 deletion syntax/digest.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func digestN(i int) digest.Digest {
// the future, we could consider canonicalizing the expression tree
// as well (e.g., by exploiting commutativity, etc.)
func (e *Expr) Digest(env *values.Env) digest.Digest {
w := reflow.Digester.NewWriter()
w := reflow.Digester.NewWriterShort()
e.digest(w, env)
return w.Digest()
}
Expand Down
2 changes: 1 addition & 1 deletion syntax/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,7 @@ func (k evalK) Continue(e *Expr, sess *Session, env *values.Env, ident string, k
depsi []int
vs = make([]values.T, len(subs))
ts = make([]*types.T, len(subs))
dw = reflow.Digester.NewWriter()
dw = reflow.Digester.NewWriterShort()
)
// TODO(marius): push down sorting of dependencies here?
for i, sub := range subs {
Expand Down
2 changes: 1 addition & 1 deletion syntax/force.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ func newResolver(v values.T, t *types.T) *resolver {
return &resolver{
v: v,
t: t,
dw: reflow.Digester.NewWriter(),
dw: reflow.Digester.NewWriterShort(),
}
}

Expand Down
4 changes: 2 additions & 2 deletions syntax/pat.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ func (p *Pat) FieldMap() map[string]*Pat {

// Digest returns a digest for this pattern.
func (p *Pat) Digest() digest.Digest {
w := reflow.Digester.NewWriter()
w := reflow.Digester.NewWriterShort()
p.digest(w)
return w.Digest()
}
Expand Down Expand Up @@ -648,7 +648,7 @@ func (p Path) Match(v values.T, t *types.T) (values.T, *types.T, Path, error) {

// Digest returns a digest representing this path.
func (p Path) Digest() digest.Digest {
w := reflow.Digester.NewWriter()
w := reflow.Digester.NewWriterShort()
p.digest(w)
return w.Digest()
}
Expand Down
2 changes: 1 addition & 1 deletion syntax/stdlib.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (s SystemFunc) Apply(loc values.Location, args []values.T) (values.T, error
var (
deps []*flow.Flow
depsi []int
dw = reflow.Digester.NewWriter()
dw = reflow.Digester.NewWriterShort()
)
for i := range args {
if s.Mode == ModeForced {
Expand Down
2 changes: 1 addition & 1 deletion taskdb/taskdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ type ImgCmdID digest.Digest
// NewImgCmdID returns an ImgCmdID created
// from an image and a cmd.
func NewImgCmdID(image, cmd string) ImgCmdID {
w := idDigester.NewWriter()
w := idDigester.NewWriterShort()
_, _ = io.WriteString(w, image)
_, _ = io.WriteString(w, cmd)
return ImgCmdID(w.Digest())
Expand Down
2 changes: 1 addition & 1 deletion test/testutil/transferer.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func (t *WaitTransferer) Init() {
}

func (t *WaitTransferer) transfer(dst, src reflow.Repository, files ...reflow.File) chan error {
dw := reflow.Digester.NewWriter()
dw := reflow.Digester.NewWriterShort()
fmt.Fprintf(dw, "%p%p", dst, src)
sort.Slice(files, func(i, j int) bool {
return files[i].ID.Less(files[j].ID)
Expand Down
2 changes: 1 addition & 1 deletion values/values.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ func must(n int, err error) {

// Digest computes the digest for value v, given type t.
func Digest(v T, t *types.T) digest.Digest {
w := Digester.NewWriter()
w := Digester.NewWriterShort()
WriteDigest(w, v, t)
return w.Digest()
}
Expand Down

0 comments on commit 81e8f4c

Please sign in to comment.