10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include <string.h>
26 #include <math.h>
27 #include <errno.h>
28 #include "cgroupSubsystem_linux.hpp"
29 #include "cgroupV1Subsystem_linux.hpp"
30 #include "logging/log.hpp"
31 #include "memory/allocation.hpp"
32 #include "runtime/globals.hpp"
33 #include "runtime/os.hpp"
34 #include "utilities/globalDefinitions.hpp"
35
36 CgroupSubsystem* CgroupSubsystemFactory::create() {
37 CgroupV1MemoryController* memory = NULL;
38 CgroupV1Controller* cpuset = NULL;
39 CgroupV1Controller* cpu = NULL;
40 CgroupV1Controller* cpuacct = NULL;
41 FILE *mntinfo = NULL;
42 FILE *cgroup = NULL;
43 char buf[MAXPATHLEN+1];
44 char tmproot[MAXPATHLEN+1];
45 char tmpmount[MAXPATHLEN+1];
46 char *p;
47
48 /*
49 * Find the cgroup mount point for memory and cpuset
50 * by reading /proc/self/mountinfo
51 *
52 * Example for docker:
53 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
54 *
55 * Example for host:
56 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
57 */
58 mntinfo = fopen("/proc/self/mountinfo", "r");
59 if (mntinfo == NULL) {
60 log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
61 os::strerror(errno));
62 return NULL;
63 }
64
65 while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
66 char tmpcgroups[MAXPATHLEN+1];
70 // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
71 if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
72 continue;
73 }
74 while ((token = strsep(&cptr, ",")) != NULL) {
75 if (strcmp(token, "memory") == 0) {
76 memory = new CgroupV1MemoryController(tmproot, tmpmount);
77 } else if (strcmp(token, "cpuset") == 0) {
78 cpuset = new CgroupV1Controller(tmproot, tmpmount);
79 } else if (strcmp(token, "cpu") == 0) {
80 cpu = new CgroupV1Controller(tmproot, tmpmount);
81 } else if (strcmp(token, "cpuacct") == 0) {
82 cpuacct= new CgroupV1Controller(tmproot, tmpmount);
83 }
84 }
85 }
86
87 fclose(mntinfo);
88
89 if (memory == NULL) {
90 log_debug(os, container)("Required cgroup memory subsystem not found");
91 return NULL;
92 }
93 if (cpuset == NULL) {
94 log_debug(os, container)("Required cgroup cpuset subsystem not found");
95 return NULL;
96 }
97 if (cpu == NULL) {
98 log_debug(os, container)("Required cgroup cpu subsystem not found");
99 return NULL;
100 }
101 if (cpuacct == NULL) {
102 log_debug(os, container)("Required cgroup cpuacct subsystem not found");
103 return NULL;
104 }
105
106 /*
107 * Read /proc/self/cgroup and map host mount point to
108 * local one via /proc/self/mountinfo content above
109 *
110 * Docker example:
111 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
112 *
113 * Host example:
114 * 5:memory:/user.slice
115 *
116 * Construct a path to the process specific memory and cpuset
117 * cgroup directory.
118 *
119 * For a container running under Docker from memory example above
120 * the paths would be:
121 *
122 * /sys/fs/cgroup/memory
123 *
124 * For a Host from memory example above the path would be:
125 *
126 * /sys/fs/cgroup/memory/user.slice
127 *
128 */
129 cgroup = fopen("/proc/self/cgroup", "r");
130 if (cgroup == NULL) {
131 log_debug(os, container)("Can't open /proc/self/cgroup, %s",
132 os::strerror(errno));
133 return NULL;
134 }
135
136 while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
137 char *controllers;
138 char *token;
139 char *base;
140
141 /* Skip cgroup number */
142 strsep(&p, ":");
143 /* Get controllers and base */
144 controllers = strsep(&p, ":");
145 base = strsep(&p, "\n");
146
147 if (controllers == NULL) {
148 continue;
149 }
150
151 while ((token = strsep(&controllers, ",")) != NULL) {
152 if (strcmp(token, "memory") == 0) {
153 memory->set_subsystem_path(base);
154 } else if (strcmp(token, "cpuset") == 0) {
155 cpuset->set_subsystem_path(base);
156 } else if (strcmp(token, "cpu") == 0) {
157 cpu->set_subsystem_path(base);
158 } else if (strcmp(token, "cpuacct") == 0) {
159 cpuacct->set_subsystem_path(base);
160 }
161 }
162 }
163
164 fclose(cgroup);
165 return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
166 }
167
168 /* active_processor_count
169 *
170 * Calculate an appropriate number of active processors for the
171 * VM to use based on these three inputs.
172 *
173 * cpu affinity
174 * cgroup cpu quota & cpu period
175 * cgroup cpu shares
176 *
177 * Algorithm:
178 *
179 * Determine the number of available CPUs from sched_getaffinity
180 *
181 * If user specified a quota (quota != -1), calculate the number of
182 * required CPUs by dividing quota by period.
183 *
184 * If shares are in effect (shares != -1), calculate the number
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include <string.h>
26 #include <math.h>
27 #include <errno.h>
28 #include "cgroupSubsystem_linux.hpp"
29 #include "cgroupV1Subsystem_linux.hpp"
30 #include "cgroupV2Subsystem_linux.hpp"
31 #include "logging/log.hpp"
32 #include "memory/allocation.hpp"
33 #include "runtime/globals.hpp"
34 #include "runtime/os.hpp"
35 #include "utilities/globalDefinitions.hpp"
36
37 CgroupSubsystem* CgroupSubsystemFactory::create() {
38 CgroupV1MemoryController* memory = NULL;
39 CgroupV1Controller* cpuset = NULL;
40 CgroupV1Controller* cpu = NULL;
41 CgroupV1Controller* cpuacct = NULL;
42 FILE *mntinfo = NULL;
43 FILE *cgroups = NULL;
44 FILE *cgroup = NULL;
45 char buf[MAXPATHLEN+1];
46 char tmproot[MAXPATHLEN+1];
47 char tmpmount[MAXPATHLEN+1];
48 char *p;
49 bool is_cgroupsV2;
50 // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
51 // at the kernel level.
52 bool all_controllers_enabled;
53
54 CgroupInfo cg_infos[CG_INFO_LENGTH];
55 int cpuset_idx = 0;
56 int cpu_idx = 1;
57 int cpuacct_idx = 2;
58 int memory_idx = 3;
59
60 /*
61 * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
62 *
63 * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers
64 * must have non-zero for the hierarchy ID field.
65 */
66 cgroups = fopen("/proc/cgroups", "r");
67 if (cgroups == NULL) {
68 log_debug(os, container)("Can't open /proc/cgroups, %s",
69 os::strerror(errno));
70 return NULL;
71 }
72
73 while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
74 char name[MAXPATHLEN+1];
75 int hierarchy_id;
76 int enabled;
77
78 // Format of /proc/cgroups documented via man 7 cgroups
79 if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
80 continue;
81 }
82 if (strcmp(name, "memory") == 0) {
83 cg_infos[memory_idx]._name = os::strdup(name);
84 cg_infos[memory_idx]._hierarchy_id = hierarchy_id;
85 cg_infos[memory_idx]._enabled = (enabled == 1);
86 } else if (strcmp(name, "cpuset") == 0) {
87 cg_infos[cpuset_idx]._name = os::strdup(name);
88 cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id;
89 cg_infos[cpuset_idx]._enabled = (enabled == 1);
90 } else if (strcmp(name, "cpu") == 0) {
91 cg_infos[cpu_idx]._name = os::strdup(name);
92 cg_infos[cpu_idx]._hierarchy_id = hierarchy_id;
93 cg_infos[cpu_idx]._enabled = (enabled == 1);
94 } else if (strcmp(name, "cpuacct") == 0) {
95 cg_infos[cpuacct_idx]._name = os::strdup(name);
96 cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id;
97 cg_infos[cpuacct_idx]._enabled = (enabled == 1);
98 }
99 }
100 fclose(cgroups);
101
102 is_cgroupsV2 = true;
103 all_controllers_enabled = true;
104 for (int i = 0; i < CG_INFO_LENGTH; i++) {
105 is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
106 all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
107 }
108
109 if (!all_controllers_enabled) {
110 // one or more controllers enabled, disable container support
111 log_debug(os, container)("One or more required controllers not enabled at kernel level.");
112 return NULL;
113 }
114
115 /*
116 * Read /proc/self/cgroup and determine:
117 * - the cgroup path for cgroups v2 or
118 * - on a cgroups v1 system, collect info for mapping
119 * the host mount point to the local one via /proc/self/mountinfo below.
120 */
121 cgroup = fopen("/proc/self/cgroup", "r");
122 if (cgroup == NULL) {
123 log_debug(os, container)("Can't open /proc/self/cgroup, %s",
124 os::strerror(errno));
125 return NULL;
126 }
127
128 while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
129 char *controllers;
130 char *token;
131 char *hierarchy_id_str;
132 int hierarchy_id;
133 char *cgroup_path;
134
135 hierarchy_id_str = strsep(&p, ":");
136 hierarchy_id = atoi(hierarchy_id_str);
137 /* Get controllers and base */
138 controllers = strsep(&p, ":");
139 cgroup_path = strsep(&p, "\n");
140
141 if (controllers == NULL) {
142 continue;
143 }
144
145 while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
146 if (strcmp(token, "memory") == 0) {
147 assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
148 cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path);
149 } else if (strcmp(token, "cpuset") == 0) {
150 assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
151 cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path);
152 } else if (strcmp(token, "cpu") == 0) {
153 assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
154 cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path);
155 } else if (strcmp(token, "cpuacct") == 0) {
156 assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
157 cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path);
158 }
159 }
160 if (is_cgroupsV2) {
161 for (int i = 0; i < CG_INFO_LENGTH; i++) {
162 cg_infos[i]._cgroup_path = os::strdup(cgroup_path);
163 }
164 }
165 }
166 fclose(cgroup);
167
168 if (is_cgroupsV2) {
169 // Find the cgroup2 mount point by reading /proc/self/mountinfo
170 mntinfo = fopen("/proc/self/mountinfo", "r");
171 if (mntinfo == NULL) {
172 log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
173 os::strerror(errno));
174 return NULL;
175 }
176
177 char cgroupv2_mount[MAXPATHLEN+1];
178 char fstype[MAXPATHLEN+1];
179 bool mount_point_found = false;
180 while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
181 char *tmp_mount_point = cgroupv2_mount;
182 char *tmp_fs_type = fstype;
183
184 // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
185 if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) {
186 // we likely have an early match return, be sure we have cgroup2 as fstype
187 if (strcmp("cgroup2", tmp_fs_type) == 0) {
188 mount_point_found = true;
189 break;
190 }
191 }
192 }
193 fclose(mntinfo);
194 if (!mount_point_found) {
195 log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
196 return NULL;
197 }
198 // Cgroups v2 case, we have all the info we need.
199 // Construct the subsystem, free resources and return
200 // Note: any index in cg_infos will do as the path is the same for
201 // all controllers.
202 CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path);
203 for (int i = 0; i < CG_INFO_LENGTH; i++) {
204 os::free(cg_infos[i]._name);
205 os::free(cg_infos[i]._cgroup_path);
206 }
207 log_debug(os, container)("Detected cgroups v2 unified hierarchy");
208 return new CgroupV2Subsystem(unified);
209 }
210
211 // What follows is cgroups v1
212 log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers");
213
214 /*
215 * Find the cgroup mount point for memory and cpuset
216 * by reading /proc/self/mountinfo
217 *
218 * Example for docker:
219 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
220 *
221 * Example for host:
222 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
223 */
224 mntinfo = fopen("/proc/self/mountinfo", "r");
225 if (mntinfo == NULL) {
226 log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
227 os::strerror(errno));
228 return NULL;
229 }
230
231 while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
232 char tmpcgroups[MAXPATHLEN+1];
236 // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
237 if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
238 continue;
239 }
240 while ((token = strsep(&cptr, ",")) != NULL) {
241 if (strcmp(token, "memory") == 0) {
242 memory = new CgroupV1MemoryController(tmproot, tmpmount);
243 } else if (strcmp(token, "cpuset") == 0) {
244 cpuset = new CgroupV1Controller(tmproot, tmpmount);
245 } else if (strcmp(token, "cpu") == 0) {
246 cpu = new CgroupV1Controller(tmproot, tmpmount);
247 } else if (strcmp(token, "cpuacct") == 0) {
248 cpuacct= new CgroupV1Controller(tmproot, tmpmount);
249 }
250 }
251 }
252
253 fclose(mntinfo);
254
255 if (memory == NULL) {
256 log_debug(os, container)("Required cgroup v1 memory subsystem not found");
257 return NULL;
258 }
259 if (cpuset == NULL) {
260 log_debug(os, container)("Required cgroup v1 cpuset subsystem not found");
261 return NULL;
262 }
263 if (cpu == NULL) {
264 log_debug(os, container)("Required cgroup v1 cpu subsystem not found");
265 return NULL;
266 }
267 if (cpuacct == NULL) {
268 log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found");
269 return NULL;
270 }
271
272 /*
273 * Use info gathered previously from /proc/self/cgroup
274 * and map host mount point to
275 * local one via /proc/self/mountinfo content above
276 *
277 * Docker example:
278 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
279 *
280 * Host example:
281 * 5:memory:/user.slice
282 *
283 * Construct a path to the process specific memory and cpuset
284 * cgroup directory.
285 *
286 * For a container running under Docker from memory example above
287 * the paths would be:
288 *
289 * /sys/fs/cgroup/memory
290 *
291 * For a Host from memory example above the path would be:
292 *
293 * /sys/fs/cgroup/memory/user.slice
294 *
295 */
296 for (int i = 0; i < CG_INFO_LENGTH; i++) {
297 CgroupInfo info = cg_infos[i];
298 if (strcmp(info._name, "memory") == 0) {
299 memory->set_subsystem_path(info._cgroup_path);
300 } else if (strcmp(info._name, "cpuset") == 0) {
301 cpuset->set_subsystem_path(info._cgroup_path);
302 } else if (strcmp(info._name, "cpu") == 0) {
303 cpu->set_subsystem_path(info._cgroup_path);
304 } else if (strcmp(info._name, "cpuacct") == 0) {
305 cpuacct->set_subsystem_path(info._cgroup_path);
306 }
307 }
308 return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
309 }
310
311 /* active_processor_count
312 *
313 * Calculate an appropriate number of active processors for the
314 * VM to use based on these three inputs.
315 *
316 * cpu affinity
317 * cgroup cpu quota & cpu period
318 * cgroup cpu shares
319 *
320 * Algorithm:
321 *
322 * Determine the number of available CPUs from sched_getaffinity
323 *
324 * If user specified a quota (quota != -1), calculate the number of
325 * required CPUs by dividing quota by period.
326 *
327 * If shares are in effect (shares != -1), calculate the number
|