From aab5e30095704f2fd17b5426ad70625b23073bb4 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Thu, 20 Feb 2014 12:06:12 +0100 Subject: [PATCH] devicemapper: Add trim-pool driver command This command suspends the pool, extracts all metadata from the metadata pool and then manually discards all regions not in use on the data device. This will re-sparsify the underlying loopback file and regain space on the host operating system. This is required in some cases because the discards we do when deleting images and containers isn't enought to fully free all space unless you have a very new kernel. See: https://github.com/dotcloud/docker/issues/3182#issuecomment-32888284 Docker-DCO-1.1-Signed-off-by: Alexander Larsson (github: alexlarsson) --- runtime/graphdriver/devmapper/deviceset.go | 91 ++++++++++ runtime/graphdriver/devmapper/driver.go | 11 ++ runtime/graphdriver/devmapper/metadata.go | 166 +++++++++++++++++++ runtime/graphdriver/devmapper/ranges.go | 98 +++++++++++ runtime/graphdriver/devmapper/ranges_test.go | 54 ++++++ 5 files changed, 420 insertions(+) create mode 100644 runtime/graphdriver/devmapper/metadata.go create mode 100644 runtime/graphdriver/devmapper/ranges.go create mode 100644 runtime/graphdriver/devmapper/ranges_test.go diff --git a/runtime/graphdriver/devmapper/deviceset.go b/runtime/graphdriver/devmapper/deviceset.go index bb37c79dff617..abafd3930dd2a 100644 --- a/runtime/graphdriver/devmapper/deviceset.go +++ b/runtime/graphdriver/devmapper/deviceset.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "sync" + "syscall" "time" ) @@ -395,6 +396,96 @@ func minor(device uint64) uint64 { return (device & 0xff) | ((device >> 12) & 0xfff00) } +func (devices *DeviceSet) getBlockDevice(name string) (*osFile, error) { + dirname := devices.loopbackDir() + filename := path.Join(dirname, name) + + file, err := osOpenFile(filename, osORdWr, 0) + if file == nil { + return nil, err + } + defer file.Close() + + loopback := FindLoopDeviceFor(file) + if loopback == nil { + return nil, fmt.Errorf("Unable to find loopback mount for: %s", filename) + } + return loopback, nil +} + +func (devices *DeviceSet) TrimPool() error { + devices.Lock() + defer devices.Unlock() + + totalSizeInSectors, _, _, dataTotal, _, _, err := devices.poolStatus() + if err != nil { + return err + } + blockSizeInSectors := totalSizeInSectors / dataTotal + SectorSize := blockSizeInSectors * 512 + + data, err := devices.getBlockDevice("data") + if err != nil { + return err + } + defer data.Close() + + dataSize, err := GetBlockDeviceSize(data) + if err != nil { + return err + } + + metadata, err := devices.getBlockDevice("metadata") + if err != nil { + return err + } + defer metadata.Close() + + // Suspend the pool so the metadata doesn't change and new blocks + // are not loaded + if err := suspendDevice(devices.getPoolName()); err != nil { + return fmt.Errorf("Unable to suspend pool: %s", err) + } + + // Just in case, make sure everything is on disk + syscall.Sync() + + ranges, err := readMetadataRanges(metadata.Name()) + if err != nil { + resumeDevice(devices.getPoolName()) + return err + } + + lastEnd := uint64(0) + + for e := ranges.Front(); e != nil; e = e.Next() { + r := e.Value.(*Range) + // Convert to bytes + rBegin := r.begin * SectorSize + rEnd := r.end * SectorSize + + if rBegin > lastEnd { + if err := BlockDeviceDiscard(data, lastEnd, rBegin-lastEnd); err != nil { + return fmt.Errorf("Failing do discard block, leaving pool suspended: %v", err) + } + } + lastEnd = rEnd + } + + if dataSize > lastEnd { + if err := BlockDeviceDiscard(data, lastEnd, dataSize-lastEnd); err != nil { + return fmt.Errorf("Failing do discard block, leaving pool suspended: %v", err) + } + } + + // Resume the pool + if err := resumeDevice(devices.getPoolName()); err != nil { + return fmt.Errorf("Unable to resume pool: %s", err) + } + + return nil +} + func (devices *DeviceSet) ResizePool(size int64) error { devices.Lock() defer devices.Unlock() diff --git a/runtime/graphdriver/devmapper/driver.go b/runtime/graphdriver/devmapper/driver.go index 0a77754514d74..58381af973ef3 100644 --- a/runtime/graphdriver/devmapper/driver.go +++ b/runtime/graphdriver/devmapper/driver.go @@ -95,6 +95,17 @@ func byteSizeFromString(arg string) (int64, error) { func (d *Driver) Operation(op string, args []string) error { switch op { + case "trim-pool": + if len(args) != 0 { + return fmt.Errorf("Usage: trim-pool") + } + + err := d.DeviceSet.TrimPool() + if err != nil { + return fmt.Errorf("Error trimming pool: %s", err.Error()) + } + + return nil case "resize-pool": if len(args) != 1 { return fmt.Errorf("Usage: resize-pool NEW_SIZE") diff --git a/runtime/graphdriver/devmapper/metadata.go b/runtime/graphdriver/devmapper/metadata.go new file mode 100644 index 0000000000000..ff89181652521 --- /dev/null +++ b/runtime/graphdriver/devmapper/metadata.go @@ -0,0 +1,166 @@ +// +build linux,amd64 + +package devmapper + +import ( + "encoding/xml" + "fmt" + "io" + "os/exec" + "strconv" +) + +type MetadataDecoder struct { + d *xml.Decoder + ranges *Ranges +} + +func NewMetadataDecoder(reader io.Reader) *MetadataDecoder { + m := &MetadataDecoder{ + d: xml.NewDecoder(reader), + ranges: NewRanges(), + } + + return m +} + +func (m *MetadataDecoder) parseRange(start *xml.StartElement) error { + var begin, length uint64 + var err error + for _, attr := range start.Attr { + switch attr.Name.Local { + case "data_begin": + begin, err = strconv.ParseUint(attr.Value, 10, 64) + if err != nil { + return err + } + case "length": + length, err = strconv.ParseUint(attr.Value, 10, 64) + if err != nil { + return err + } + } + } + + m.ranges.Add(begin, begin+length) + + m.d.Skip() + return nil +} + +func (m *MetadataDecoder) parseSingle(start *xml.StartElement) error { + for _, attr := range start.Attr { + switch attr.Name.Local { + case "data_block": + block, err := strconv.ParseUint(attr.Value, 10, 64) + if err != nil { + return err + } + m.ranges.Add(block, block+1) + } + } + + m.d.Skip() + + return nil +} + +func (m *MetadataDecoder) parseDevice(start *xml.StartElement) error { + for { + tok, err := m.d.Token() + if err != nil { + return err + } + switch tok := tok.(type) { + case xml.StartElement: + switch tok.Name.Local { + case "range_mapping": + if err := m.parseRange(&tok); err != nil { + return err + } + case "single_mapping": + if err := m.parseSingle(&tok); err != nil { + return err + } + default: + return fmt.Errorf("Unknown tag type %s\n", tok.Name) + } + case xml.EndElement: + return nil + } + } +} + +func (m *MetadataDecoder) readStart() (*xml.StartElement, error) { + for { + tok, err := m.d.Token() + if err != nil { + return nil, err + } + + switch tok := tok.(type) { + case xml.StartElement: + return &tok, nil + + case xml.EndElement: + return nil, fmt.Errorf("Unbalanced tags") + } + } +} + +func (m *MetadataDecoder) parseMetadata() error { + start, err := m.readStart() + if err != nil { + return err + } + if start.Name.Local != "superblock" { + return fmt.Errorf("Unexpected tag type %s", start.Name) + } + + for { + tok, err := m.d.Token() + if err != nil { + return err + } + switch tok := tok.(type) { + case xml.StartElement: + switch tok.Name.Local { + case "device": + m.parseDevice(&tok) + default: + return fmt.Errorf("Unknown tag type %s\n", tok.Name) + } + case xml.EndElement: + return nil + } + } +} + +func readMetadataRanges(file string) (*Ranges, error) { + cmd := exec.Command("thin_dump", file) + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + m := NewMetadataDecoder(stdout) + + errChan := make(chan error) + + go func() { + err = m.parseMetadata() + errChan <- err + }() + + if err := cmd.Run(); err != nil { + return nil, err + } + + err = <-errChan + if err != nil { + return nil, err + } + + return m.ranges, nil +} diff --git a/runtime/graphdriver/devmapper/ranges.go b/runtime/graphdriver/devmapper/ranges.go new file mode 100644 index 0000000000000..0015cc0296e84 --- /dev/null +++ b/runtime/graphdriver/devmapper/ranges.go @@ -0,0 +1,98 @@ +// +build linux,amd64 + +package devmapper + +import ( + "container/list" + "fmt" +) + +type Range struct { + begin uint64 + end uint64 +} + +type Ranges struct { + *list.List +} + +func NewRanges() *Ranges { + return &Ranges{list.New()} +} + +func (r *Ranges) ToString() string { + s := "" + for e := r.Front(); e != nil; e = e.Next() { + r := e.Value.(*Range) + if s != "" { + s = s + "," + } + s = fmt.Sprintf("%s%d-%d", s, r.begin, r.end) + } + return s +} + +func (r *Ranges) Clear() { + r.Init() +} + +func (r *Ranges) Add(begin, end uint64) { + var next *list.Element + for e := r.Front(); e != nil; e = next { + next = e.Next() + + existing := e.Value.(*Range) + + // If existing range is fully to the left, skip + if existing.end < begin { + continue + } + + // If new range is fully to the left, just insert + if end < existing.begin { + r.InsertBefore(&Range{begin, end}, e) + return + } + + // Now we know the two ranges somehow intersect (or at least touch) + + // Extend existing range with the new range + if begin < existing.begin { + existing.begin = begin + } + + // If the new range is completely covered by existing range, we're done + if end <= existing.end { + return + } + + // Otherwise strip r from new range + begin = existing.end + + // We're now touching r at the end, and so we need to either extend r + // or merge with next + + if next == nil { + // Nothing after, extend + existing.end = end + return + } + + nextR := next.Value.(*Range) + if end < nextR.begin { + // Fits, Just extend + existing.end = end + return + } + + // The new region overlaps the next, merge the two + nextR.begin = existing.begin + r.Remove(e) + } + + // nothing in list or everything to the left, just append the rest + if begin < end { + r.PushBack(&Range{begin, end}) + return + } +} diff --git a/runtime/graphdriver/devmapper/ranges_test.go b/runtime/graphdriver/devmapper/ranges_test.go new file mode 100644 index 0000000000000..71eb7902bba4e --- /dev/null +++ b/runtime/graphdriver/devmapper/ranges_test.go @@ -0,0 +1,54 @@ +// +build linux,amd64 + +package devmapper + +import ( + "fmt" + "testing" +) + +func assert(t *testing.T, r *Ranges, res string) { + s := r.ToString() + if s != res { + t.Fatalf(fmt.Sprintf("error: got %s, expecting %s\n", s, res)) + } +} + +func TestRanges(t *testing.T) { + r := NewRanges() + assert(t, r, "") + r.Clear() + assert(t, r, "") + r.Add(5, 6) + assert(t, r, "5-6") + r.Add(5, 6) + assert(t, r, "5-6") + r.Add(5, 7) + assert(t, r, "5-7") + r.Add(7, 8) + assert(t, r, "5-8") + r.Add(4, 6) + assert(t, r, "4-8") + r.Add(5, 6) + assert(t, r, "4-8") + r.Add(3, 4) + assert(t, r, "3-8") + r.Add(1, 2) + assert(t, r, "1-2,3-8") + r.Add(15, 20) + assert(t, r, "1-2,3-8,15-20") + r.Add(30, 40) + assert(t, r, "1-2,3-8,15-20,30-40") + r.Add(8, 9) + assert(t, r, "1-2,3-9,15-20,30-40") + r.Add(8, 10) + assert(t, r, "1-2,3-10,15-20,30-40") + r.Add(8, 25) + assert(t, r, "1-2,3-25,30-40") + r.Add(0, 27) + assert(t, r, "0-27,30-40") + r.Add(29, 41) + assert(t, r, "0-27,29-41") + r.Add(27, 29) + assert(t, r, "0-41") +}