Source file src/internal/runtime/cgroup/cgroup_linux.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cgroup
     6  
     7  import (
     8  	"internal/runtime/syscall/linux"
     9  )
    10  
    11  // Include explicit NUL to be sure we include it in the slice.
    12  const (
    13  	v2MaxFile    = "/cpu.max\x00"
    14  	v1QuotaFile  = "/cpu.cfs_quota_us\x00"
    15  	v1PeriodFile = "/cpu.cfs_period_us\x00"
    16  )
    17  
    18  // CPU owns the FDs required to read the CPU limit from a cgroup.
    19  type CPU struct {
    20  	version Version
    21  
    22  	// For cgroup v1, this is cpu.cfs_quota_us.
    23  	// For cgroup v2, this is cpu.max.
    24  	quotaFD int
    25  
    26  	// For cgroup v1, this is cpu.cfs_period_us.
    27  	// For cgroup v2, this is unused.
    28  	periodFD int
    29  }
    30  
    31  func (c CPU) Close() {
    32  	switch c.version {
    33  	case V1:
    34  		linux.Close(c.quotaFD)
    35  		linux.Close(c.periodFD)
    36  	case V2:
    37  		linux.Close(c.quotaFD)
    38  	default:
    39  		throw("impossible cgroup version")
    40  	}
    41  }
    42  
    43  func checkBufferSize(s []byte, size int) {
    44  	if len(s) != size {
    45  		println("runtime: cgroup buffer length", len(s), "want", size)
    46  		throw("runtime: cgroup invalid buffer length")
    47  	}
    48  }
    49  
    50  // OpenCPU returns a CPU for the CPU cgroup containing the current process, or
    51  // ErrNoCgroup if the process is not in a CPU cgroup.
    52  //
    53  // scratch must have length ScratchSize.
    54  func OpenCPU(scratch []byte) (CPU, error) {
    55  	checkBufferSize(scratch, ScratchSize)
    56  
    57  	base := scratch[:PathSize]
    58  	scratch2 := scratch[PathSize:]
    59  
    60  	n, version, err := FindCPU(base, scratch2)
    61  	if err != nil {
    62  		return CPU{}, err
    63  	}
    64  
    65  	switch version {
    66  	case 1:
    67  		n2 := copy(base[n:], v1QuotaFile)
    68  		path := base[:n+n2]
    69  		quotaFD, errno := linux.Open(&path[0], linux.O_RDONLY|linux.O_CLOEXEC, 0)
    70  		if errno != 0 {
    71  			// This may fail if this process was migrated out of
    72  			// the cgroup found by FindCPU and that cgroup has been
    73  			// deleted.
    74  			return CPU{}, errSyscallFailed
    75  		}
    76  
    77  		n2 = copy(base[n:], v1PeriodFile)
    78  		path = base[:n+n2]
    79  		periodFD, errno := linux.Open(&path[0], linux.O_RDONLY|linux.O_CLOEXEC, 0)
    80  		if errno != 0 {
    81  			// This may fail if this process was migrated out of
    82  			// the cgroup found by FindCPU and that cgroup has been
    83  			// deleted.
    84  			return CPU{}, errSyscallFailed
    85  		}
    86  
    87  		c := CPU{
    88  			version:  1,
    89  			quotaFD:  quotaFD,
    90  			periodFD: periodFD,
    91  		}
    92  		return c, nil
    93  	case 2:
    94  		n2 := copy(base[n:], v2MaxFile)
    95  		path := base[:n+n2]
    96  		maxFD, errno := linux.Open(&path[0], linux.O_RDONLY|linux.O_CLOEXEC, 0)
    97  		if errno != 0 {
    98  			// This may fail if this process was migrated out of
    99  			// the cgroup found by FindCPU and that cgroup has been
   100  			// deleted.
   101  			return CPU{}, errSyscallFailed
   102  		}
   103  
   104  		c := CPU{
   105  			version:  2,
   106  			quotaFD:  maxFD,
   107  			periodFD: -1,
   108  		}
   109  		return c, nil
   110  	default:
   111  		throw("impossible cgroup version")
   112  		panic("unreachable")
   113  	}
   114  }
   115  
   116  // Returns average CPU throughput limit from the cgroup, or ok false if there
   117  // is no limit.
   118  func ReadCPULimit(c CPU) (float64, bool, error) {
   119  	switch c.version {
   120  	case 1:
   121  		quota, err := readV1Number(c.quotaFD)
   122  		if err != nil {
   123  			return 0, false, errMalformedFile
   124  		}
   125  
   126  		if quota < 0 {
   127  			// No limit.
   128  			return 0, false, nil
   129  		}
   130  
   131  		period, err := readV1Number(c.periodFD)
   132  		if err != nil {
   133  			return 0, false, errMalformedFile
   134  		}
   135  
   136  		return float64(quota) / float64(period), true, nil
   137  	case 2:
   138  		// quotaFD is the cpu.max FD.
   139  		return readV2Limit(c.quotaFD)
   140  	default:
   141  		throw("impossible cgroup version")
   142  		panic("unreachable")
   143  	}
   144  }
   145  
   146  // Returns the value from the quota/period file.
   147  func readV1Number(fd int) (int64, error) {
   148  	// The format of the file is "<value>\n" where the value is in
   149  	// int64 microseconds and, if quota, may be -1 to indicate no limit.
   150  	//
   151  	// MaxInt64 requires 19 bytes to display in base 10, thus the
   152  	// conservative max size of this file is 19 + 1 (newline) = 20 bytes.
   153  	// We'll provide a bit more for good measure.
   154  	//
   155  	// Always read from the beginning of the file to get a fresh value.
   156  	var b [64]byte
   157  	n, errno := linux.Pread(fd, b[:], 0)
   158  	if errno != 0 {
   159  		return 0, errSyscallFailed
   160  	}
   161  	if n == len(b) {
   162  		return 0, errMalformedFile
   163  	}
   164  
   165  	buf := b[:n]
   166  	return parseV1Number(buf)
   167  }
   168  
   169  // Returns CPU throughput limit, or ok false if there is no limit.
   170  func readV2Limit(fd int) (float64, bool, error) {
   171  	// The format of the file is "<quota> <period>\n" where quota and
   172  	// period are microseconds and quota may be "max" to indicate no limit.
   173  	//
   174  	// Note that the kernel is inconsistent about whether the values are
   175  	// uint64 or int64: values are parsed as uint64 but printed as int64.
   176  	// See kernel/sched/core.c:cpu_max_{show,write}.
   177  	//
   178  	// In practice, the kernel limits the period to 1s (1000000us) (see
   179  	// max_cfs_quota_period), and the quota to (1<<44)us (see
   180  	// max_cfs_runtime), so these values can't get large enough for the
   181  	// distinction to matter.
   182  	//
   183  	// MaxInt64 requires 19 bytes to display in base 10, thus the
   184  	// conservative max size of this file is 19 + 19 + 1 (space) + 1
   185  	// (newline) = 40 bytes. We'll provide a bit more for good measure.
   186  	//
   187  	// Always read from the beginning of the file to get a fresh value.
   188  	var b [64]byte
   189  	n, errno := linux.Pread(fd, b[:], 0)
   190  	if errno != 0 {
   191  		return 0, false, errSyscallFailed
   192  	}
   193  	if n == len(b) {
   194  		return 0, false, errMalformedFile
   195  	}
   196  
   197  	buf := b[:n]
   198  	return parseV2Limit(buf)
   199  }
   200  
   201  // FindCPU finds the path to the CPU cgroup that this process is a member of
   202  // and places it in out. scratch is a scratch buffer for internal use.
   203  //
   204  // out must have length PathSize. scratch must have length ParseSize.
   205  //
   206  // Returns the number of bytes written to out and the cgroup version (1 or 2).
   207  //
   208  // Returns ErrNoCgroup if the process is not in a CPU cgroup.
   209  func FindCPU(out []byte, scratch []byte) (int, Version, error) {
   210  	checkBufferSize(out, PathSize)
   211  	checkBufferSize(scratch, ParseSize)
   212  
   213  	// The cgroup path is <cgroup mount point> + <relative path>.
   214  	// relative path is the cgroup relative to the mount root.
   215  
   216  	n, version, err := FindCPUCgroup(out, scratch)
   217  	if err != nil {
   218  		return 0, 0, err
   219  	}
   220  
   221  	n, err = FindCPUMountPoint(out, out[:n], version, scratch)
   222  	return n, version, err
   223  }
   224  
   225  // FindCPUCgroup finds the path to the CPU cgroup that this process is a member of
   226  // and places it in out. scratch is a scratch buffer for internal use.
   227  //
   228  // out must have length PathSize. scratch must have length ParseSize.
   229  //
   230  // Returns the number of bytes written to out and the cgroup version (1 or 2).
   231  //
   232  // Returns ErrNoCgroup if the process is not in a CPU cgroup.
   233  func FindCPUCgroup(out []byte, scratch []byte) (int, Version, error) {
   234  	path := []byte("/proc/self/cgroup\x00")
   235  	fd, errno := linux.Open(&path[0], linux.O_RDONLY|linux.O_CLOEXEC, 0)
   236  	if errno == linux.ENOENT {
   237  		return 0, 0, ErrNoCgroup
   238  	} else if errno != 0 {
   239  		return 0, 0, errSyscallFailed
   240  	}
   241  
   242  	// The relative path always starts with /, so we can directly append it
   243  	// to the mount point.
   244  	n, version, err := parseCPUCgroup(fd, linux.Read, out[:], scratch)
   245  	if err != nil {
   246  		linux.Close(fd)
   247  		return 0, 0, err
   248  	}
   249  
   250  	linux.Close(fd)
   251  	return n, version, nil
   252  }
   253  
   254  // FindCPUMountPoint finds the mount point containing the specified cgroup and
   255  // version with cpu controller, and compose the full path to the cgroup in out.
   256  // scratch is a scratch buffer for internal use.
   257  //
   258  // out must have length PathSize, may overlap with cgroup.
   259  // scratch must have length ParseSize.
   260  //
   261  // Returns the number of bytes written to out.
   262  //
   263  // Returns ErrNoCgroup if no matching mount point is found.
   264  func FindCPUMountPoint(out, cgroup []byte, version Version, scratch []byte) (int, error) {
   265  	checkBufferSize(out, PathSize)
   266  	checkBufferSize(scratch, ParseSize)
   267  
   268  	path := []byte("/proc/self/mountinfo\x00")
   269  	fd, errno := linux.Open(&path[0], linux.O_RDONLY|linux.O_CLOEXEC, 0)
   270  	if errno == linux.ENOENT {
   271  		return 0, ErrNoCgroup
   272  	} else if errno != 0 {
   273  		return 0, errSyscallFailed
   274  	}
   275  
   276  	n, err := parseCPUMount(fd, linux.Read, out, cgroup, version, scratch)
   277  	if err != nil {
   278  		linux.Close(fd)
   279  		return 0, err
   280  	}
   281  	linux.Close(fd)
   282  
   283  	return n, nil
   284  }
   285  

View as plain text