< prev index next >

src/hotspot/os/linux/cgroupSubsystem_linux.cpp

Print this page
@  rev 56576 : 8230305: Cgroups v2: Container awareness
|  Summary: Implement Cgroups v2 container awareness in hotspot
|  Reviewed-by: bobv
o  rev 56575 : 8230848: OSContainer: Refactor container detection code
|  Summary: Move cgroups v1 implementation details out of osContainer_linux.cpp
~  Reviewed-by: bobv


  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include <string.h>
  26 #include <math.h>
  27 #include <errno.h>
  28 #include "cgroupSubsystem_linux.hpp"
  29 #include "cgroupV1Subsystem_linux.hpp"

  30 #include "logging/log.hpp"
  31 #include "memory/allocation.hpp"
  32 #include "runtime/globals.hpp"
  33 #include "runtime/os.hpp"
  34 #include "utilities/globalDefinitions.hpp"
  35 
  36 CgroupSubsystem* CgroupSubsystemFactory::create() {
  37   CgroupV1MemoryController* memory = NULL;
  38   CgroupV1Controller* cpuset = NULL;
  39   CgroupV1Controller* cpu = NULL;
  40   CgroupV1Controller* cpuacct = NULL;
  41   FILE *mntinfo = NULL;

  42   FILE *cgroup = NULL;
  43   char buf[MAXPATHLEN+1];
  44   char tmproot[MAXPATHLEN+1];
  45   char tmpmount[MAXPATHLEN+1];
  46   char *p;




































































































































































  47 
  48   /*
  49    * Find the cgroup mount point for memory and cpuset
  50    * by reading /proc/self/mountinfo
  51    *
  52    * Example for docker:
  53    * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
  54    *
  55    * Example for host:
  56    * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
  57    */
  58   mntinfo = fopen("/proc/self/mountinfo", "r");
  59   if (mntinfo == NULL) {
  60       log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
  61                                os::strerror(errno));
  62       return NULL;
  63   }
  64 
  65   while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
  66     char tmpcgroups[MAXPATHLEN+1];


  70     // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
  71     if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
  72       continue;
  73     }
  74     while ((token = strsep(&cptr, ",")) != NULL) {
  75       if (strcmp(token, "memory") == 0) {
  76         memory = new CgroupV1MemoryController(tmproot, tmpmount);
  77       } else if (strcmp(token, "cpuset") == 0) {
  78         cpuset = new CgroupV1Controller(tmproot, tmpmount);
  79       } else if (strcmp(token, "cpu") == 0) {
  80         cpu = new CgroupV1Controller(tmproot, tmpmount);
  81       } else if (strcmp(token, "cpuacct") == 0) {
  82         cpuacct= new CgroupV1Controller(tmproot, tmpmount);
  83       }
  84     }
  85   }
  86 
  87   fclose(mntinfo);
  88 
  89   if (memory == NULL) {
  90     log_debug(os, container)("Required cgroup memory subsystem not found");
  91     return NULL;
  92   }
  93   if (cpuset == NULL) {
  94     log_debug(os, container)("Required cgroup cpuset subsystem not found");
  95     return NULL;
  96   }
  97   if (cpu == NULL) {
  98     log_debug(os, container)("Required cgroup cpu subsystem not found");
  99     return NULL;
 100   }
 101   if (cpuacct == NULL) {
 102     log_debug(os, container)("Required cgroup cpuacct subsystem not found");
 103     return NULL;
 104   }
 105 
 106   /*
 107    * Read /proc/self/cgroup and map host mount point to

 108    * local one via /proc/self/mountinfo content above
 109    *
 110    * Docker example:
 111    * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
 112    *
 113    * Host example:
 114    * 5:memory:/user.slice
 115    *
 116    * Construct a path to the process specific memory and cpuset
 117    * cgroup directory.
 118    *
 119    * For a container running under Docker from memory example above
 120    * the paths would be:
 121    *
 122    * /sys/fs/cgroup/memory
 123    *
 124    * For a Host from memory example above the path would be:
 125    *
 126    * /sys/fs/cgroup/memory/user.slice
 127    *
 128    */
 129   cgroup = fopen("/proc/self/cgroup", "r");
 130   if (cgroup == NULL) {
 131     log_debug(os, container)("Can't open /proc/self/cgroup, %s",
 132                              os::strerror(errno));
 133     return NULL;
 134   }
 135 
 136   while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
 137     char *controllers;
 138     char *token;
 139     char *base;
 140 
 141     /* Skip cgroup number */
 142     strsep(&p, ":");
 143     /* Get controllers and base */
 144     controllers = strsep(&p, ":");
 145     base = strsep(&p, "\n");
 146 
 147     if (controllers == NULL) {
 148       continue;
 149     }
 150 
 151     while ((token = strsep(&controllers, ",")) != NULL) {
 152       if (strcmp(token, "memory") == 0) {
 153         memory->set_subsystem_path(base);
 154       } else if (strcmp(token, "cpuset") == 0) {
 155         cpuset->set_subsystem_path(base);
 156       } else if (strcmp(token, "cpu") == 0) {
 157         cpu->set_subsystem_path(base);
 158       } else if (strcmp(token, "cpuacct") == 0) {
 159         cpuacct->set_subsystem_path(base);
 160       }
 161     }
 162   }
 163 
 164   fclose(cgroup);
 165   return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
 166 }
 167 
 168 /* active_processor_count
 169  *
 170  * Calculate an appropriate number of active processors for the
 171  * VM to use based on these three inputs.
 172  *
 173  * cpu affinity
 174  * cgroup cpu quota & cpu period
 175  * cgroup cpu shares
 176  *
 177  * Algorithm:
 178  *
 179  * Determine the number of available CPUs from sched_getaffinity
 180  *
 181  * If user specified a quota (quota != -1), calculate the number of
 182  * required CPUs by dividing quota by period.
 183  *
 184  * If shares are in effect (shares != -1), calculate the number




  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include <string.h>
  26 #include <math.h>
  27 #include <errno.h>
  28 #include "cgroupSubsystem_linux.hpp"
  29 #include "cgroupV1Subsystem_linux.hpp"
  30 #include "cgroupV2Subsystem_linux.hpp"
  31 #include "logging/log.hpp"
  32 #include "memory/allocation.hpp"
  33 #include "runtime/globals.hpp"
  34 #include "runtime/os.hpp"
  35 #include "utilities/globalDefinitions.hpp"
  36 
  37 CgroupSubsystem* CgroupSubsystemFactory::create() {
  38   CgroupV1MemoryController* memory = NULL;
  39   CgroupV1Controller* cpuset = NULL;
  40   CgroupV1Controller* cpu = NULL;
  41   CgroupV1Controller* cpuacct = NULL;
  42   FILE *mntinfo = NULL;
  43   FILE *cgroups = NULL;
  44   FILE *cgroup = NULL;
  45   char buf[MAXPATHLEN+1];
  46   char tmproot[MAXPATHLEN+1];
  47   char tmpmount[MAXPATHLEN+1];
  48   char *p;
  49   bool is_cgroupsV2;
  50   // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
  51   // at the kernel level.
  52   bool all_controllers_enabled;
  53 
  54   CgroupInfo cg_infos[CG_INFO_LENGTH];
  55   int cpuset_idx  = 0;
  56   int cpu_idx     = 1;
  57   int cpuacct_idx = 2;
  58   int memory_idx  = 3;
  59 
  60   /*
  61    * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
  62    *
  63    * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers
  64    * must have non-zero for the hierarchy ID field.
  65    */
  66   cgroups = fopen("/proc/cgroups", "r");
  67   if (cgroups == NULL) {
  68       log_debug(os, container)("Can't open /proc/cgroups, %s",
  69                                os::strerror(errno));
  70       return NULL;
  71   }
  72 
  73   while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
  74     char name[MAXPATHLEN+1];
  75     int  hierarchy_id;
  76     int  enabled;
  77 
  78     // Format of /proc/cgroups documented via man 7 cgroups
  79     if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
  80       continue;
  81     }
  82     if (strcmp(name, "memory") == 0) {
  83       cg_infos[memory_idx]._name = os::strdup(name);
  84       cg_infos[memory_idx]._hierarchy_id = hierarchy_id;
  85       cg_infos[memory_idx]._enabled = (enabled == 1);
  86     } else if (strcmp(name, "cpuset") == 0) {
  87       cg_infos[cpuset_idx]._name = os::strdup(name);
  88       cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id;
  89       cg_infos[cpuset_idx]._enabled = (enabled == 1);
  90     } else if (strcmp(name, "cpu") == 0) {
  91       cg_infos[cpu_idx]._name = os::strdup(name);
  92       cg_infos[cpu_idx]._hierarchy_id = hierarchy_id;
  93       cg_infos[cpu_idx]._enabled = (enabled == 1);
  94     } else if (strcmp(name, "cpuacct") == 0) {
  95       cg_infos[cpuacct_idx]._name = os::strdup(name);
  96       cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id;
  97       cg_infos[cpuacct_idx]._enabled = (enabled == 1);
  98     }
  99   }
 100   fclose(cgroups);
 101 
 102   is_cgroupsV2 = true;
 103   all_controllers_enabled = true;
 104   for (int i = 0; i < CG_INFO_LENGTH; i++) {
 105     is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
 106     all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
 107   }
 108 
 109   if (!all_controllers_enabled) {
 110     // one or more controllers enabled, disable container support
 111     log_debug(os, container)("One or more required controllers not enabled at kernel level.");
 112     return NULL;
 113   }
 114 
 115   /*
 116    * Read /proc/self/cgroup and determine:
 117    *  - the cgroup path for cgroups v2 or
 118    *  - on a cgroups v1 system, collect info for mapping
 119    *    the host mount point to the local one via /proc/self/mountinfo below.
 120    */
 121   cgroup = fopen("/proc/self/cgroup", "r");
 122   if (cgroup == NULL) {
 123     log_debug(os, container)("Can't open /proc/self/cgroup, %s",
 124                              os::strerror(errno));
 125     return NULL;
 126   }
 127 
 128   while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
 129     char *controllers;
 130     char *token;
 131     char *hierarchy_id_str;
 132     int  hierarchy_id;
 133     char *cgroup_path;
 134 
 135     hierarchy_id_str = strsep(&p, ":");
 136     hierarchy_id = atoi(hierarchy_id_str);
 137     /* Get controllers and base */
 138     controllers = strsep(&p, ":");
 139     cgroup_path = strsep(&p, "\n");
 140 
 141     if (controllers == NULL) {
 142       continue;
 143     }
 144 
 145     while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
 146       if (strcmp(token, "memory") == 0) {
 147         assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
 148         cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path);
 149       } else if (strcmp(token, "cpuset") == 0) {
 150         assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
 151         cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path);
 152       } else if (strcmp(token, "cpu") == 0) {
 153         assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
 154         cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path);
 155       } else if (strcmp(token, "cpuacct") == 0) {
 156         assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
 157         cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path);
 158       }
 159     }
 160     if (is_cgroupsV2) {
 161       for (int i = 0; i < CG_INFO_LENGTH; i++) {
 162         cg_infos[i]._cgroup_path = os::strdup(cgroup_path);
 163       }
 164     }
 165   }
 166   fclose(cgroup);
 167 
 168   if (is_cgroupsV2) {
 169     // Find the cgroup2 mount point by reading /proc/self/mountinfo
 170     mntinfo = fopen("/proc/self/mountinfo", "r");
 171     if (mntinfo == NULL) {
 172         log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
 173                                  os::strerror(errno));
 174         return NULL;
 175     }
 176 
 177     char cgroupv2_mount[MAXPATHLEN+1];
 178     char fstype[MAXPATHLEN+1];
 179     bool mount_point_found = false;
 180     while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
 181       char *tmp_mount_point = cgroupv2_mount;
 182       char *tmp_fs_type = fstype;
 183 
 184       // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
 185       if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) {
 186         // we likely have an early match return, be sure we have cgroup2 as fstype
 187         if (strcmp("cgroup2", tmp_fs_type) == 0) {
 188           mount_point_found = true;
 189           break;
 190         }
 191       }
 192     }
 193     fclose(mntinfo);
 194     if (!mount_point_found) {
 195       log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
 196       return NULL;
 197     }
 198     // Cgroups v2 case, we have all the info we need.
 199     // Construct the subsystem, free resources and return
 200     // Note: any index in cg_infos will do as the path is the same for
 201     //       all controllers.
 202     CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path);
 203     for (int i = 0; i < CG_INFO_LENGTH; i++) {
 204       os::free(cg_infos[i]._name);
 205       os::free(cg_infos[i]._cgroup_path);
 206     }
 207     log_debug(os, container)("Detected cgroups v2 unified hierarchy");
 208     return new CgroupV2Subsystem(unified);
 209   }
 210 
 211   // What follows is cgroups v1
 212   log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers");
 213 
 214   /*
 215    * Find the cgroup mount point for memory and cpuset
 216    * by reading /proc/self/mountinfo
 217    *
 218    * Example for docker:
 219    * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
 220    *
 221    * Example for host:
 222    * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
 223    */
 224   mntinfo = fopen("/proc/self/mountinfo", "r");
 225   if (mntinfo == NULL) {
 226       log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
 227                                os::strerror(errno));
 228       return NULL;
 229   }
 230 
 231   while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
 232     char tmpcgroups[MAXPATHLEN+1];


 236     // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
 237     if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
 238       continue;
 239     }
 240     while ((token = strsep(&cptr, ",")) != NULL) {
 241       if (strcmp(token, "memory") == 0) {
 242         memory = new CgroupV1MemoryController(tmproot, tmpmount);
 243       } else if (strcmp(token, "cpuset") == 0) {
 244         cpuset = new CgroupV1Controller(tmproot, tmpmount);
 245       } else if (strcmp(token, "cpu") == 0) {
 246         cpu = new CgroupV1Controller(tmproot, tmpmount);
 247       } else if (strcmp(token, "cpuacct") == 0) {
 248         cpuacct= new CgroupV1Controller(tmproot, tmpmount);
 249       }
 250     }
 251   }
 252 
 253   fclose(mntinfo);
 254 
 255   if (memory == NULL) {
 256     log_debug(os, container)("Required cgroup v1 memory subsystem not found");
 257     return NULL;
 258   }
 259   if (cpuset == NULL) {
 260     log_debug(os, container)("Required cgroup v1 cpuset subsystem not found");
 261     return NULL;
 262   }
 263   if (cpu == NULL) {
 264     log_debug(os, container)("Required cgroup v1 cpu subsystem not found");
 265     return NULL;
 266   }
 267   if (cpuacct == NULL) {
 268     log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found");
 269     return NULL;
 270   }
 271 
 272   /*
 273    * Use info gathered previously from /proc/self/cgroup
 274    * and map host mount point to
 275    * local one via /proc/self/mountinfo content above
 276    *
 277    * Docker example:
 278    * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
 279    *
 280    * Host example:
 281    * 5:memory:/user.slice
 282    *
 283    * Construct a path to the process specific memory and cpuset
 284    * cgroup directory.
 285    *
 286    * For a container running under Docker from memory example above
 287    * the paths would be:
 288    *
 289    * /sys/fs/cgroup/memory
 290    *
 291    * For a Host from memory example above the path would be:
 292    *
 293    * /sys/fs/cgroup/memory/user.slice
 294    *
 295    */
 296   for (int i = 0; i < CG_INFO_LENGTH; i++) {
 297     CgroupInfo info = cg_infos[i];
 298     if (strcmp(info._name, "memory") == 0) {
 299       memory->set_subsystem_path(info._cgroup_path);
 300     } else if (strcmp(info._name, "cpuset") == 0) {
 301       cpuset->set_subsystem_path(info._cgroup_path);
 302     } else if (strcmp(info._name, "cpu") == 0) {
 303       cpu->set_subsystem_path(info._cgroup_path);
 304     } else if (strcmp(info._name, "cpuacct") == 0) {
 305       cpuacct->set_subsystem_path(info._cgroup_path);






















 306     }
 307   }


 308   return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
 309 }
 310 
 311 /* active_processor_count
 312  *
 313  * Calculate an appropriate number of active processors for the
 314  * VM to use based on these three inputs.
 315  *
 316  * cpu affinity
 317  * cgroup cpu quota & cpu period
 318  * cgroup cpu shares
 319  *
 320  * Algorithm:
 321  *
 322  * Determine the number of available CPUs from sched_getaffinity
 323  *
 324  * If user specified a quota (quota != -1), calculate the number of
 325  * required CPUs by dividing quota by period.
 326  *
 327  * If shares are in effect (shares != -1), calculate the number


< prev index next >