--- old/src/hotspot/os/linux/cgroupSubsystem_linux.cpp 2019-10-14 20:38:38.123929959 +0200 +++ new/src/hotspot/os/linux/cgroupSubsystem_linux.cpp 2019-10-14 20:38:37.997929696 +0200 @@ -27,6 +27,7 @@ #include #include "cgroupSubsystem_linux.hpp" #include "cgroupV1Subsystem_linux.hpp" +#include "cgroupV2Subsystem_linux.hpp" #include "logging/log.hpp" #include "memory/allocation.hpp" #include "runtime/globals.hpp" @@ -39,11 +40,176 @@ CgroupV1Controller* cpu = NULL; CgroupV1Controller* cpuacct = NULL; FILE *mntinfo = NULL; + FILE *cgroups = NULL; FILE *cgroup = NULL; char buf[MAXPATHLEN+1]; char tmproot[MAXPATHLEN+1]; char tmpmount[MAXPATHLEN+1]; char *p; + bool is_cgroupsV2; + // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled + // at the kernel level. + bool all_controllers_enabled; + + CgroupInfo cg_infos[CG_INFO_LENGTH]; + int cpuset_idx = 0; + int cpu_idx = 1; + int cpuacct_idx = 2; + int memory_idx = 3; + + /* + * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1. + * + * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers + * must have non-zero for the hierarchy ID field. + */ + cgroups = fopen("/proc/cgroups", "r"); + if (cgroups == NULL) { + log_debug(os, container)("Can't open /proc/cgroups, %s", + os::strerror(errno)); + return NULL; + } + + while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) { + char name[MAXPATHLEN+1]; + int hierarchy_id; + int enabled; + + // Format of /proc/cgroups documented via man 7 cgroups + if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) { + continue; + } + if (strcmp(name, "memory") == 0) { + cg_infos[memory_idx]._name = os::strdup(name); + cg_infos[memory_idx]._hierarchy_id = hierarchy_id; + cg_infos[memory_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpuset") == 0) { + cg_infos[cpuset_idx]._name = os::strdup(name); + cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpuset_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpu") == 0) { + cg_infos[cpu_idx]._name = os::strdup(name); + cg_infos[cpu_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpu_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpuacct") == 0) { + cg_infos[cpuacct_idx]._name = os::strdup(name); + cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpuacct_idx]._enabled = (enabled == 1); + } + } + fclose(cgroups); + + is_cgroupsV2 = true; + all_controllers_enabled = true; + for (int i = 0; i < CG_INFO_LENGTH; i++) { + is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0; + all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled; + } + + if (!all_controllers_enabled) { + // one or more controllers enabled, disable container support + log_debug(os, container)("One or more required controllers not enabled at kernel level."); + return NULL; + } + + /* + * Read /proc/self/cgroup and determine: + * - the cgroup path for cgroups v2 or + * - on a cgroups v1 system, collect info for mapping + * the host mount point to the local one via /proc/self/mountinfo below. + */ + cgroup = fopen("/proc/self/cgroup", "r"); + if (cgroup == NULL) { + log_debug(os, container)("Can't open /proc/self/cgroup, %s", + os::strerror(errno)); + return NULL; + } + + while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { + char *controllers; + char *token; + char *hierarchy_id_str; + int hierarchy_id; + char *cgroup_path; + + hierarchy_id_str = strsep(&p, ":"); + hierarchy_id = atoi(hierarchy_id_str); + /* Get controllers and base */ + controllers = strsep(&p, ":"); + cgroup_path = strsep(&p, "\n"); + + if (controllers == NULL) { + continue; + } + + while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) { + if (strcmp(token, "memory") == 0) { + assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpuset") == 0) { + assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpu") == 0) { + assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpuacct") == 0) { + assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path); + } + } + if (is_cgroupsV2) { + for (int i = 0; i < CG_INFO_LENGTH; i++) { + cg_infos[i]._cgroup_path = os::strdup(cgroup_path); + } + } + } + fclose(cgroup); + + if (is_cgroupsV2) { + // Find the cgroup2 mount point by reading /proc/self/mountinfo + mntinfo = fopen("/proc/self/mountinfo", "r"); + if (mntinfo == NULL) { + log_debug(os, container)("Can't open /proc/self/mountinfo, %s", + os::strerror(errno)); + return NULL; + } + + char cgroupv2_mount[MAXPATHLEN+1]; + char fstype[MAXPATHLEN+1]; + bool mount_point_found = false; + while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { + char *tmp_mount_point = cgroupv2_mount; + char *tmp_fs_type = fstype; + + // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt + if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) { + // we likely have an early match return, be sure we have cgroup2 as fstype + if (strcmp("cgroup2", tmp_fs_type) == 0) { + mount_point_found = true; + break; + } + } + } + fclose(mntinfo); + if (!mount_point_found) { + log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo"); + return NULL; + } + // Cgroups v2 case, we have all the info we need. + // Construct the subsystem, free resources and return + // Note: any index in cg_infos will do as the path is the same for + // all controllers. + CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path); + for (int i = 0; i < CG_INFO_LENGTH; i++) { + os::free(cg_infos[i]._name); + os::free(cg_infos[i]._cgroup_path); + } + log_debug(os, container)("Detected cgroups v2 unified hierarchy"); + return new CgroupV2Subsystem(unified); + } + + // What follows is cgroups v1 + log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers"); /* * Find the cgroup mount point for memory and cpuset @@ -87,24 +253,25 @@ fclose(mntinfo); if (memory == NULL) { - log_debug(os, container)("Required cgroup memory subsystem not found"); + log_debug(os, container)("Required cgroup v1 memory subsystem not found"); return NULL; } if (cpuset == NULL) { - log_debug(os, container)("Required cgroup cpuset subsystem not found"); + log_debug(os, container)("Required cgroup v1 cpuset subsystem not found"); return NULL; } if (cpu == NULL) { - log_debug(os, container)("Required cgroup cpu subsystem not found"); + log_debug(os, container)("Required cgroup v1 cpu subsystem not found"); return NULL; } if (cpuacct == NULL) { - log_debug(os, container)("Required cgroup cpuacct subsystem not found"); + log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found"); return NULL; } /* - * Read /proc/self/cgroup and map host mount point to + * Use info gathered previously from /proc/self/cgroup + * and map host mount point to * local one via /proc/self/mountinfo content above * * Docker example: @@ -126,42 +293,18 @@ * /sys/fs/cgroup/memory/user.slice * */ - cgroup = fopen("/proc/self/cgroup", "r"); - if (cgroup == NULL) { - log_debug(os, container)("Can't open /proc/self/cgroup, %s", - os::strerror(errno)); - return NULL; - } - - while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { - char *controllers; - char *token; - char *base; - - /* Skip cgroup number */ - strsep(&p, ":"); - /* Get controllers and base */ - controllers = strsep(&p, ":"); - base = strsep(&p, "\n"); - - if (controllers == NULL) { - continue; - } - - while ((token = strsep(&controllers, ",")) != NULL) { - if (strcmp(token, "memory") == 0) { - memory->set_subsystem_path(base); - } else if (strcmp(token, "cpuset") == 0) { - cpuset->set_subsystem_path(base); - } else if (strcmp(token, "cpu") == 0) { - cpu->set_subsystem_path(base); - } else if (strcmp(token, "cpuacct") == 0) { - cpuacct->set_subsystem_path(base); - } + for (int i = 0; i < CG_INFO_LENGTH; i++) { + CgroupInfo info = cg_infos[i]; + if (strcmp(info._name, "memory") == 0) { + memory->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpuset") == 0) { + cpuset->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpu") == 0) { + cpu->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpuacct") == 0) { + cpuacct->set_subsystem_path(info._cgroup_path); } } - - fclose(cgroup); return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory); }