< prev index next >
src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Print this page
@ rev 56576 : 8230305: Cgroups v2: Container awareness
| Summary: Implement Cgroups v2 container awareness in hotspot
| Reviewed-by: bobv
o rev 56575 : 8230848: OSContainer: Refactor container detection code
| Summary: Move cgroups v1 implementation details out of osContainer_linux.cpp
~ Reviewed-by: bobv
*** 25,34 ****
--- 25,35 ----
#include <string.h>
#include <math.h>
#include <errno.h>
#include "cgroupSubsystem_linux.hpp"
#include "cgroupV1Subsystem_linux.hpp"
+ #include "cgroupV2Subsystem_linux.hpp"
#include "logging/log.hpp"
#include "memory/allocation.hpp"
#include "runtime/globals.hpp"
#include "runtime/os.hpp"
#include "utilities/globalDefinitions.hpp"
*** 37,51 ****
--- 38,217 ----
CgroupV1MemoryController* memory = NULL;
CgroupV1Controller* cpuset = NULL;
CgroupV1Controller* cpu = NULL;
CgroupV1Controller* cpuacct = NULL;
FILE *mntinfo = NULL;
+ FILE *cgroups = NULL;
FILE *cgroup = NULL;
char buf[MAXPATHLEN+1];
char tmproot[MAXPATHLEN+1];
char tmpmount[MAXPATHLEN+1];
char *p;
+ bool is_cgroupsV2;
+ // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
+ // at the kernel level.
+ bool all_controllers_enabled;
+
+ CgroupInfo cg_infos[CG_INFO_LENGTH];
+ int cpuset_idx = 0;
+ int cpu_idx = 1;
+ int cpuacct_idx = 2;
+ int memory_idx = 3;
+
+ /*
+ * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
+ *
+ * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers
+ * must have non-zero for the hierarchy ID field.
+ */
+ cgroups = fopen("/proc/cgroups", "r");
+ if (cgroups == NULL) {
+ log_debug(os, container)("Can't open /proc/cgroups, %s",
+ os::strerror(errno));
+ return NULL;
+ }
+
+ while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
+ char name[MAXPATHLEN+1];
+ int hierarchy_id;
+ int enabled;
+
+ // Format of /proc/cgroups documented via man 7 cgroups
+ if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
+ continue;
+ }
+ if (strcmp(name, "memory") == 0) {
+ cg_infos[memory_idx]._name = os::strdup(name);
+ cg_infos[memory_idx]._hierarchy_id = hierarchy_id;
+ cg_infos[memory_idx]._enabled = (enabled == 1);
+ } else if (strcmp(name, "cpuset") == 0) {
+ cg_infos[cpuset_idx]._name = os::strdup(name);
+ cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id;
+ cg_infos[cpuset_idx]._enabled = (enabled == 1);
+ } else if (strcmp(name, "cpu") == 0) {
+ cg_infos[cpu_idx]._name = os::strdup(name);
+ cg_infos[cpu_idx]._hierarchy_id = hierarchy_id;
+ cg_infos[cpu_idx]._enabled = (enabled == 1);
+ } else if (strcmp(name, "cpuacct") == 0) {
+ cg_infos[cpuacct_idx]._name = os::strdup(name);
+ cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id;
+ cg_infos[cpuacct_idx]._enabled = (enabled == 1);
+ }
+ }
+ fclose(cgroups);
+
+ is_cgroupsV2 = true;
+ all_controllers_enabled = true;
+ for (int i = 0; i < CG_INFO_LENGTH; i++) {
+ is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
+ all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
+ }
+
+ if (!all_controllers_enabled) {
+ // one or more controllers enabled, disable container support
+ log_debug(os, container)("One or more required controllers not enabled at kernel level.");
+ return NULL;
+ }
+
+ /*
+ * Read /proc/self/cgroup and determine:
+ * - the cgroup path for cgroups v2 or
+ * - on a cgroups v1 system, collect info for mapping
+ * the host mount point to the local one via /proc/self/mountinfo below.
+ */
+ cgroup = fopen("/proc/self/cgroup", "r");
+ if (cgroup == NULL) {
+ log_debug(os, container)("Can't open /proc/self/cgroup, %s",
+ os::strerror(errno));
+ return NULL;
+ }
+
+ while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
+ char *controllers;
+ char *token;
+ char *hierarchy_id_str;
+ int hierarchy_id;
+ char *cgroup_path;
+
+ hierarchy_id_str = strsep(&p, ":");
+ hierarchy_id = atoi(hierarchy_id_str);
+ /* Get controllers and base */
+ controllers = strsep(&p, ":");
+ cgroup_path = strsep(&p, "\n");
+
+ if (controllers == NULL) {
+ continue;
+ }
+
+ while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
+ if (strcmp(token, "memory") == 0) {
+ assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+ cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path);
+ } else if (strcmp(token, "cpuset") == 0) {
+ assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+ cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path);
+ } else if (strcmp(token, "cpu") == 0) {
+ assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+ cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path);
+ } else if (strcmp(token, "cpuacct") == 0) {
+ assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+ cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path);
+ }
+ }
+ if (is_cgroupsV2) {
+ for (int i = 0; i < CG_INFO_LENGTH; i++) {
+ cg_infos[i]._cgroup_path = os::strdup(cgroup_path);
+ }
+ }
+ }
+ fclose(cgroup);
+
+ if (is_cgroupsV2) {
+ // Find the cgroup2 mount point by reading /proc/self/mountinfo
+ mntinfo = fopen("/proc/self/mountinfo", "r");
+ if (mntinfo == NULL) {
+ log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
+ os::strerror(errno));
+ return NULL;
+ }
+
+ char cgroupv2_mount[MAXPATHLEN+1];
+ char fstype[MAXPATHLEN+1];
+ bool mount_point_found = false;
+ while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
+ char *tmp_mount_point = cgroupv2_mount;
+ char *tmp_fs_type = fstype;
+
+ // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+ if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) {
+ // we likely have an early match return, be sure we have cgroup2 as fstype
+ if (strcmp("cgroup2", tmp_fs_type) == 0) {
+ mount_point_found = true;
+ break;
+ }
+ }
+ }
+ fclose(mntinfo);
+ if (!mount_point_found) {
+ log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
+ return NULL;
+ }
+ // Cgroups v2 case, we have all the info we need.
+ // Construct the subsystem, free resources and return
+ // Note: any index in cg_infos will do as the path is the same for
+ // all controllers.
+ CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path);
+ for (int i = 0; i < CG_INFO_LENGTH; i++) {
+ os::free(cg_infos[i]._name);
+ os::free(cg_infos[i]._cgroup_path);
+ }
+ log_debug(os, container)("Detected cgroups v2 unified hierarchy");
+ return new CgroupV2Subsystem(unified);
+ }
+
+ // What follows is cgroups v1
+ log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers");
/*
* Find the cgroup mount point for memory and cpuset
* by reading /proc/self/mountinfo
*
*** 85,112 ****
}
fclose(mntinfo);
if (memory == NULL) {
! log_debug(os, container)("Required cgroup memory subsystem not found");
return NULL;
}
if (cpuset == NULL) {
! log_debug(os, container)("Required cgroup cpuset subsystem not found");
return NULL;
}
if (cpu == NULL) {
! log_debug(os, container)("Required cgroup cpu subsystem not found");
return NULL;
}
if (cpuacct == NULL) {
! log_debug(os, container)("Required cgroup cpuacct subsystem not found");
return NULL;
}
/*
! * Read /proc/self/cgroup and map host mount point to
* local one via /proc/self/mountinfo content above
*
* Docker example:
* 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
*
--- 251,279 ----
}
fclose(mntinfo);
if (memory == NULL) {
! log_debug(os, container)("Required cgroup v1 memory subsystem not found");
return NULL;
}
if (cpuset == NULL) {
! log_debug(os, container)("Required cgroup v1 cpuset subsystem not found");
return NULL;
}
if (cpu == NULL) {
! log_debug(os, container)("Required cgroup v1 cpu subsystem not found");
return NULL;
}
if (cpuacct == NULL) {
! log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found");
return NULL;
}
/*
! * Use info gathered previously from /proc/self/cgroup
! * and map host mount point to
* local one via /proc/self/mountinfo content above
*
* Docker example:
* 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
*
*** 124,169 ****
* For a Host from memory example above the path would be:
*
* /sys/fs/cgroup/memory/user.slice
*
*/
! cgroup = fopen("/proc/self/cgroup", "r");
! if (cgroup == NULL) {
! log_debug(os, container)("Can't open /proc/self/cgroup, %s",
! os::strerror(errno));
! return NULL;
! }
!
! while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
! char *controllers;
! char *token;
! char *base;
!
! /* Skip cgroup number */
! strsep(&p, ":");
! /* Get controllers and base */
! controllers = strsep(&p, ":");
! base = strsep(&p, "\n");
!
! if (controllers == NULL) {
! continue;
! }
!
! while ((token = strsep(&controllers, ",")) != NULL) {
! if (strcmp(token, "memory") == 0) {
! memory->set_subsystem_path(base);
! } else if (strcmp(token, "cpuset") == 0) {
! cpuset->set_subsystem_path(base);
! } else if (strcmp(token, "cpu") == 0) {
! cpu->set_subsystem_path(base);
! } else if (strcmp(token, "cpuacct") == 0) {
! cpuacct->set_subsystem_path(base);
! }
}
}
-
- fclose(cgroup);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
}
/* active_processor_count
*
--- 291,312 ----
* For a Host from memory example above the path would be:
*
* /sys/fs/cgroup/memory/user.slice
*
*/
! for (int i = 0; i < CG_INFO_LENGTH; i++) {
! CgroupInfo info = cg_infos[i];
! if (strcmp(info._name, "memory") == 0) {
! memory->set_subsystem_path(info._cgroup_path);
! } else if (strcmp(info._name, "cpuset") == 0) {
! cpuset->set_subsystem_path(info._cgroup_path);
! } else if (strcmp(info._name, "cpu") == 0) {
! cpu->set_subsystem_path(info._cgroup_path);
! } else if (strcmp(info._name, "cpuacct") == 0) {
! cpuacct->set_subsystem_path(info._cgroup_path);
}
}
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
}
/* active_processor_count
*
< prev index next >