< prev index next >

src/hotspot/os/linux/cgroupSubsystem_linux.cpp

Print this page
@  rev 56576 : 8230305: Cgroups v2: Container awareness
|  Summary: Implement Cgroups v2 container awareness in hotspot
|  Reviewed-by: bobv
o  rev 56575 : 8230848: OSContainer: Refactor container detection code
|  Summary: Move cgroups v1 implementation details out of osContainer_linux.cpp
~  Reviewed-by: bobv

*** 25,34 **** --- 25,35 ---- #include <string.h> #include <math.h> #include <errno.h> #include "cgroupSubsystem_linux.hpp" #include "cgroupV1Subsystem_linux.hpp" + #include "cgroupV2Subsystem_linux.hpp" #include "logging/log.hpp" #include "memory/allocation.hpp" #include "runtime/globals.hpp" #include "runtime/os.hpp" #include "utilities/globalDefinitions.hpp"
*** 37,51 **** --- 38,217 ---- CgroupV1MemoryController* memory = NULL; CgroupV1Controller* cpuset = NULL; CgroupV1Controller* cpu = NULL; CgroupV1Controller* cpuacct = NULL; FILE *mntinfo = NULL; + FILE *cgroups = NULL; FILE *cgroup = NULL; char buf[MAXPATHLEN+1]; char tmproot[MAXPATHLEN+1]; char tmpmount[MAXPATHLEN+1]; char *p; + bool is_cgroupsV2; + // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled + // at the kernel level. + bool all_controllers_enabled; + + CgroupInfo cg_infos[CG_INFO_LENGTH]; + int cpuset_idx = 0; + int cpu_idx = 1; + int cpuacct_idx = 2; + int memory_idx = 3; + + /* + * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1. + * + * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers + * must have non-zero for the hierarchy ID field. + */ + cgroups = fopen("/proc/cgroups", "r"); + if (cgroups == NULL) { + log_debug(os, container)("Can't open /proc/cgroups, %s", + os::strerror(errno)); + return NULL; + } + + while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) { + char name[MAXPATHLEN+1]; + int hierarchy_id; + int enabled; + + // Format of /proc/cgroups documented via man 7 cgroups + if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) { + continue; + } + if (strcmp(name, "memory") == 0) { + cg_infos[memory_idx]._name = os::strdup(name); + cg_infos[memory_idx]._hierarchy_id = hierarchy_id; + cg_infos[memory_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpuset") == 0) { + cg_infos[cpuset_idx]._name = os::strdup(name); + cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpuset_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpu") == 0) { + cg_infos[cpu_idx]._name = os::strdup(name); + cg_infos[cpu_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpu_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpuacct") == 0) { + cg_infos[cpuacct_idx]._name = os::strdup(name); + cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpuacct_idx]._enabled = (enabled == 1); + } + } + fclose(cgroups); + + is_cgroupsV2 = true; + all_controllers_enabled = true; + for (int i = 0; i < CG_INFO_LENGTH; i++) { + is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0; + all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled; + } + + if (!all_controllers_enabled) { + // one or more controllers enabled, disable container support + log_debug(os, container)("One or more required controllers not enabled at kernel level."); + return NULL; + } + + /* + * Read /proc/self/cgroup and determine: + * - the cgroup path for cgroups v2 or + * - on a cgroups v1 system, collect info for mapping + * the host mount point to the local one via /proc/self/mountinfo below. + */ + cgroup = fopen("/proc/self/cgroup", "r"); + if (cgroup == NULL) { + log_debug(os, container)("Can't open /proc/self/cgroup, %s", + os::strerror(errno)); + return NULL; + } + + while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { + char *controllers; + char *token; + char *hierarchy_id_str; + int hierarchy_id; + char *cgroup_path; + + hierarchy_id_str = strsep(&p, ":"); + hierarchy_id = atoi(hierarchy_id_str); + /* Get controllers and base */ + controllers = strsep(&p, ":"); + cgroup_path = strsep(&p, "\n"); + + if (controllers == NULL) { + continue; + } + + while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) { + if (strcmp(token, "memory") == 0) { + assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpuset") == 0) { + assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpu") == 0) { + assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpuacct") == 0) { + assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path); + } + } + if (is_cgroupsV2) { + for (int i = 0; i < CG_INFO_LENGTH; i++) { + cg_infos[i]._cgroup_path = os::strdup(cgroup_path); + } + } + } + fclose(cgroup); + + if (is_cgroupsV2) { + // Find the cgroup2 mount point by reading /proc/self/mountinfo + mntinfo = fopen("/proc/self/mountinfo", "r"); + if (mntinfo == NULL) { + log_debug(os, container)("Can't open /proc/self/mountinfo, %s", + os::strerror(errno)); + return NULL; + } + + char cgroupv2_mount[MAXPATHLEN+1]; + char fstype[MAXPATHLEN+1]; + bool mount_point_found = false; + while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { + char *tmp_mount_point = cgroupv2_mount; + char *tmp_fs_type = fstype; + + // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt + if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) { + // we likely have an early match return, be sure we have cgroup2 as fstype + if (strcmp("cgroup2", tmp_fs_type) == 0) { + mount_point_found = true; + break; + } + } + } + fclose(mntinfo); + if (!mount_point_found) { + log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo"); + return NULL; + } + // Cgroups v2 case, we have all the info we need. + // Construct the subsystem, free resources and return + // Note: any index in cg_infos will do as the path is the same for + // all controllers. + CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path); + for (int i = 0; i < CG_INFO_LENGTH; i++) { + os::free(cg_infos[i]._name); + os::free(cg_infos[i]._cgroup_path); + } + log_debug(os, container)("Detected cgroups v2 unified hierarchy"); + return new CgroupV2Subsystem(unified); + } + + // What follows is cgroups v1 + log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers"); /* * Find the cgroup mount point for memory and cpuset * by reading /proc/self/mountinfo *
*** 85,112 **** } fclose(mntinfo); if (memory == NULL) { ! log_debug(os, container)("Required cgroup memory subsystem not found"); return NULL; } if (cpuset == NULL) { ! log_debug(os, container)("Required cgroup cpuset subsystem not found"); return NULL; } if (cpu == NULL) { ! log_debug(os, container)("Required cgroup cpu subsystem not found"); return NULL; } if (cpuacct == NULL) { ! log_debug(os, container)("Required cgroup cpuacct subsystem not found"); return NULL; } /* ! * Read /proc/self/cgroup and map host mount point to * local one via /proc/self/mountinfo content above * * Docker example: * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 * --- 251,279 ---- } fclose(mntinfo); if (memory == NULL) { ! log_debug(os, container)("Required cgroup v1 memory subsystem not found"); return NULL; } if (cpuset == NULL) { ! log_debug(os, container)("Required cgroup v1 cpuset subsystem not found"); return NULL; } if (cpu == NULL) { ! log_debug(os, container)("Required cgroup v1 cpu subsystem not found"); return NULL; } if (cpuacct == NULL) { ! log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found"); return NULL; } /* ! * Use info gathered previously from /proc/self/cgroup ! * and map host mount point to * local one via /proc/self/mountinfo content above * * Docker example: * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 *
*** 124,169 **** * For a Host from memory example above the path would be: * * /sys/fs/cgroup/memory/user.slice * */ ! cgroup = fopen("/proc/self/cgroup", "r"); ! if (cgroup == NULL) { ! log_debug(os, container)("Can't open /proc/self/cgroup, %s", ! os::strerror(errno)); ! return NULL; ! } ! ! while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { ! char *controllers; ! char *token; ! char *base; ! ! /* Skip cgroup number */ ! strsep(&p, ":"); ! /* Get controllers and base */ ! controllers = strsep(&p, ":"); ! base = strsep(&p, "\n"); ! ! if (controllers == NULL) { ! continue; ! } ! ! while ((token = strsep(&controllers, ",")) != NULL) { ! if (strcmp(token, "memory") == 0) { ! memory->set_subsystem_path(base); ! } else if (strcmp(token, "cpuset") == 0) { ! cpuset->set_subsystem_path(base); ! } else if (strcmp(token, "cpu") == 0) { ! cpu->set_subsystem_path(base); ! } else if (strcmp(token, "cpuacct") == 0) { ! cpuacct->set_subsystem_path(base); ! } } } - - fclose(cgroup); return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory); } /* active_processor_count * --- 291,312 ---- * For a Host from memory example above the path would be: * * /sys/fs/cgroup/memory/user.slice * */ ! for (int i = 0; i < CG_INFO_LENGTH; i++) { ! CgroupInfo info = cg_infos[i]; ! if (strcmp(info._name, "memory") == 0) { ! memory->set_subsystem_path(info._cgroup_path); ! } else if (strcmp(info._name, "cpuset") == 0) { ! cpuset->set_subsystem_path(info._cgroup_path); ! } else if (strcmp(info._name, "cpu") == 0) { ! cpu->set_subsystem_path(info._cgroup_path); ! } else if (strcmp(info._name, "cpuacct") == 0) { ! cpuacct->set_subsystem_path(info._cgroup_path); } } return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory); } /* active_processor_count *
< prev index next >