|
| 1 | +/* |
| 2 | +Copyright 2015 The Kubernetes Authors All rights reserved. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package bandwidth |
| 18 | + |
| 19 | +import ( |
| 20 | + "bufio" |
| 21 | + "bytes" |
| 22 | + "encoding/hex" |
| 23 | + "fmt" |
| 24 | + "net" |
| 25 | + "strings" |
| 26 | + |
| 27 | + "k8s.io/kubernetes/pkg/api/resource" |
| 28 | + "k8s.io/kubernetes/pkg/util" |
| 29 | + "k8s.io/kubernetes/pkg/util/exec" |
| 30 | + |
| 31 | + "github.com/golang/glog" |
| 32 | +) |
| 33 | + |
| 34 | +// tcShaper provides an implementation of the BandwidthShaper interface on Linux using the 'tc' tool. |
| 35 | +// In general, using this requires that the caller posses the NET_CAP_ADMIN capability, though if you |
| 36 | +// do this within an container, it only requires the NS_CAPABLE capability for manipulations to that |
| 37 | +// container's network namespace. |
| 38 | +// Uses the hierarchical token bucket queueing discipline (htb), this requires Linux 2.4.20 or newer |
| 39 | +// or a custom kernel with that queueing discipline backported. |
| 40 | +type tcShaper struct { |
| 41 | + e exec.Interface |
| 42 | + iface string |
| 43 | +} |
| 44 | + |
| 45 | +func NewTCShaper(iface string) BandwidthShaper { |
| 46 | + shaper := &tcShaper{ |
| 47 | + e: exec.New(), |
| 48 | + iface: iface, |
| 49 | + } |
| 50 | + shaper.initializeInterface() |
| 51 | + return shaper |
| 52 | +} |
| 53 | + |
| 54 | +func (t *tcShaper) execAndLog(cmdStr string, args ...string) error { |
| 55 | + glog.V(6).Infof("Running: %s %s", cmdStr, strings.Join(args, " ")) |
| 56 | + cmd := t.e.Command(cmdStr, args...) |
| 57 | + out, err := cmd.CombinedOutput() |
| 58 | + glog.V(6).Infof("Output from tc: %s", string(out)) |
| 59 | + return err |
| 60 | +} |
| 61 | + |
| 62 | +func (t *tcShaper) nextClassID() (int, error) { |
| 63 | + data, err := t.e.Command("tc", "class", "show", "dev", t.iface).CombinedOutput() |
| 64 | + if err != nil { |
| 65 | + return -1, err |
| 66 | + } |
| 67 | + |
| 68 | + scanner := bufio.NewScanner(bytes.NewBuffer(data)) |
| 69 | + classes := util.StringSet{} |
| 70 | + for scanner.Scan() { |
| 71 | + line := strings.TrimSpace(scanner.Text()) |
| 72 | + // skip empty lines |
| 73 | + if len(line) == 0 { |
| 74 | + continue |
| 75 | + } |
| 76 | + parts := strings.Split(line, " ") |
| 77 | + // expected tc line: |
| 78 | + // class htb 1:1 root prio 0 rate 1000Kbit ceil 1000Kbit burst 1600b cburst 1600b |
| 79 | + if len(parts) != 14 { |
| 80 | + return -1, fmt.Errorf("unexpected output from tc: %s (%v)", scanner.Text(), parts) |
| 81 | + } |
| 82 | + classes.Insert(parts[2]) |
| 83 | + } |
| 84 | + |
| 85 | + // Make sure it doesn't go forever |
| 86 | + for nextClass := 1; nextClass < 10000; nextClass++ { |
| 87 | + if !classes.Has(fmt.Sprintf("1:%d", nextClass)) { |
| 88 | + return nextClass, nil |
| 89 | + } |
| 90 | + } |
| 91 | + // This should really never happen |
| 92 | + return -1, fmt.Errorf("exhausted class space, please try again") |
| 93 | +} |
| 94 | + |
| 95 | +// Convert a CIDR from text to a hex representation |
| 96 | +// Strips any masked parts of the IP, so 1.2.3.4/16 becomes hex(1.2.0.0)/ffffffff |
| 97 | +func hexCIDR(cidr string) (string, error) { |
| 98 | + ip, ipnet, err := net.ParseCIDR(cidr) |
| 99 | + if err != nil { |
| 100 | + return "", err |
| 101 | + } |
| 102 | + ip = ip.Mask(ipnet.Mask) |
| 103 | + hexIP := hex.EncodeToString([]byte(ip.To4())) |
| 104 | + hexMask := ipnet.Mask.String() |
| 105 | + return hexIP + "/" + hexMask, nil |
| 106 | +} |
| 107 | + |
| 108 | +func (t *tcShaper) findCIDRClass(cidr string) (class, handle string, err error) { |
| 109 | + data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput() |
| 110 | + if err != nil { |
| 111 | + return "", "", err |
| 112 | + } |
| 113 | + |
| 114 | + hex, err := hexCIDR(cidr) |
| 115 | + if err != nil { |
| 116 | + return "", "", err |
| 117 | + } |
| 118 | + spec := fmt.Sprintf("match %s", hex) |
| 119 | + |
| 120 | + scanner := bufio.NewScanner(bytes.NewBuffer(data)) |
| 121 | + filter := "" |
| 122 | + for scanner.Scan() { |
| 123 | + line := strings.TrimSpace(scanner.Text()) |
| 124 | + if len(line) == 0 { |
| 125 | + continue |
| 126 | + } |
| 127 | + if strings.HasPrefix(line, "filter") { |
| 128 | + filter = line |
| 129 | + continue |
| 130 | + } |
| 131 | + if strings.Contains(line, spec) { |
| 132 | + parts := strings.Split(filter, " ") |
| 133 | + // expected tc line: |
| 134 | + // filter parent 1: protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 |
| 135 | + if len(parts) != 19 { |
| 136 | + return "", "", fmt.Errorf("unexpected output from tc: %s %d (%v)", filter, len(parts), parts) |
| 137 | + } |
| 138 | + return parts[18], parts[9], nil |
| 139 | + } |
| 140 | + } |
| 141 | + return "", "", fmt.Errorf("Failed to find cidr: %s on interface: %s", cidr, t.iface) |
| 142 | +} |
| 143 | + |
| 144 | +func makeKBitString(rsrc resource.Quantity) string { |
| 145 | + return fmt.Sprintf("%dkbit", (rsrc.Value() / 1000)) |
| 146 | +} |
| 147 | + |
| 148 | +func (t *tcShaper) makeNewClass(rate string) (int, error) { |
| 149 | + class, err := t.nextClassID() |
| 150 | + if err != nil { |
| 151 | + return -1, err |
| 152 | + } |
| 153 | + if err := t.execAndLog("tc", "class", "add", "dev", t.iface, "parent", "1:", "classid", fmt.Sprintf("1:%d", class), "htb", "rate", rate); err != nil { |
| 154 | + return -1, err |
| 155 | + } |
| 156 | + return class, nil |
| 157 | +} |
| 158 | + |
| 159 | +func (t *tcShaper) Limit(cidr string, upload, download resource.Quantity) (err error) { |
| 160 | + var downloadClass, uploadClass int |
| 161 | + if downloadClass, err = t.makeNewClass(makeKBitString(download)); err != nil { |
| 162 | + return err |
| 163 | + } |
| 164 | + if uploadClass, err = t.makeNewClass(makeKBitString(upload)); err != nil { |
| 165 | + return err |
| 166 | + } |
| 167 | + |
| 168 | + if err := t.execAndLog("tc", "filter", "add", "dev", t.iface, "protocol", "ip", "parent", "1:0", "prio", "1", "u32", "match", "ip", "dst", cidr, "flowid", fmt.Sprintf("1:%d", downloadClass)); err != nil { |
| 169 | + return err |
| 170 | + } |
| 171 | + if err := t.execAndLog("tc", "filter", "add", "dev", t.iface, "protocol", "ip", "parent", "1:0", "prio", "1", "u32", "match", "ip", "src", cidr, "flowid", fmt.Sprintf("1:%d", uploadClass)); err != nil { |
| 172 | + return err |
| 173 | + } |
| 174 | + return nil |
| 175 | +} |
| 176 | + |
| 177 | +func (t *tcShaper) initializeInterface() error { |
| 178 | + return t.execAndLog("tc", "qdisc", "add", "dev", t.iface, "root", "handle", "1:", "htb", "default", "30") |
| 179 | +} |
| 180 | + |
| 181 | +func (t *tcShaper) Reset(cidr string) error { |
| 182 | + class, handle, err := t.findCIDRClass(cidr) |
| 183 | + if err != nil { |
| 184 | + return err |
| 185 | + } |
| 186 | + if err := t.execAndLog("tc", "filter", "del", "dev", t.iface, "parent", "1:", "proto", "ip", "prio", "1", "handle", handle, "u32"); err != nil { |
| 187 | + return err |
| 188 | + } |
| 189 | + return t.execAndLog("tc", "class", "del", "dev", t.iface, "parent", "1:", "classid", class) |
| 190 | +} |
0 commit comments