1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include <string.h> 26 #include <math.h> 27 #include <errno.h> 28 #include "utilities/globalDefinitions.hpp" 29 #include "memory/allocation.hpp" 30 #include "runtime/os.hpp" 31 #include "logging/log.hpp" 32 #include "osContainer_linux.hpp" 33 34 /* 35 * Warning: Some linux distros use 0x7FFFFFFFFFFFF000 36 * and others use 0x7FFFFFFFFFFFFFFF for unlimited. 37 */ 38 #define UNLIMITED_MEM CONST64(0x7FFFFFFFFFFFF000) 39 40 #define PER_CPU_SHARES 1024 41 42 bool OSContainer::_is_initialized = false; 43 bool OSContainer::_is_containerized = false; 44 45 class CgroupSubsystem: CHeapObj<mtInternal> { 46 friend class OSContainer; 47 48 private: 49 /* mountinfo contents */ 50 char *_root; 51 char *_mount_point; 52 53 /* Constructed subsystem directory */ 54 char *_path; 55 56 public: 57 CgroupSubsystem(char *root, char *mountpoint) { 58 _root = os::strdup(root); 59 _mount_point = os::strdup(mountpoint); 60 _path = NULL; 61 } 62 63 /* 64 * Set directory to subsystem specific files based 65 * on the contents of the mountinfo and cgroup files. 66 */ 67 void set_subsystem_path(char *cgroup_path) { 68 char buf[MAXPATHLEN+1]; 69 if (_root != NULL && cgroup_path != NULL) { 70 if (strcmp(_root, "/") == 0) { 71 int buflen; 72 strncpy(buf, _mount_point, MAXPATHLEN); 73 buf[MAXPATHLEN-1] = '\0'; 74 if (strcmp(cgroup_path,"/") != 0) { 75 buflen = strlen(buf); 76 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 77 return; 78 } 79 strncat(buf, cgroup_path, MAXPATHLEN-buflen); 80 buf[MAXPATHLEN-1] = '\0'; 81 } 82 _path = os::strdup(buf); 83 } else { 84 if (strcmp(_root, cgroup_path) == 0) { 85 strncpy(buf, _mount_point, MAXPATHLEN); 86 buf[MAXPATHLEN-1] = '\0'; 87 _path = os::strdup(buf); 88 } else { 89 char *p = strstr(_root, cgroup_path); 90 if (p != NULL && p == _root) { 91 if (strlen(cgroup_path) > strlen(_root)) { 92 int buflen; 93 strncpy(buf, _mount_point, MAXPATHLEN); 94 buf[MAXPATHLEN-1] = '\0'; 95 buflen = strlen(buf); 96 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 97 return; 98 } 99 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); 100 buf[MAXPATHLEN-1] = '\0'; 101 _path = os::strdup(buf); 102 } 103 } 104 } 105 } 106 } 107 } 108 109 char *subsystem_path() { return _path; } 110 }; 111 112 CgroupSubsystem* memory = NULL; 113 CgroupSubsystem* cpuset = NULL; 114 CgroupSubsystem* cpu = NULL; 115 CgroupSubsystem* cpuacct = NULL; 116 117 typedef char * cptr; 118 119 PRAGMA_DIAG_PUSH 120 PRAGMA_FORMAT_NONLITERAL_IGNORED 121 template <typename T> int subsystem_file_contents(CgroupSubsystem* c, 122 const char *filename, 123 const char *scan_fmt, 124 T returnval) { 125 FILE *fp = NULL; 126 char *p; 127 char file[MAXPATHLEN+1]; 128 char buf[MAXPATHLEN+1]; 129 130 if (c != NULL && c->subsystem_path() != NULL) { 131 strncpy(file, c->subsystem_path(), MAXPATHLEN); 132 file[MAXPATHLEN-1] = '\0'; 133 int filelen = strlen(file); 134 if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) { 135 log_debug(os, container)("File path too long %s, %s", file, filename); 136 return OSCONTAINER_ERROR; 137 } 138 strncat(file, filename, MAXPATHLEN-filelen); 139 log_trace(os, container)("Path to %s is %s", filename, file); 140 fp = fopen(file, "r"); 141 if (fp != NULL) { 142 p = fgets(buf, MAXPATHLEN, fp); 143 if (p != NULL) { 144 int matched = sscanf(p, scan_fmt, returnval); 145 if (matched == 1) { 146 fclose(fp); 147 return 0; 148 } else { 149 log_debug(os, container)("Type %s not found in file %s", 150 scan_fmt , file); 151 } 152 } else { 153 log_debug(os, container)("Empty file %s", file); 154 } 155 } else { 156 log_debug(os, container)("Open of file %s failed, %s", file, 157 os::strerror(errno)); 158 } 159 } 160 if (fp != NULL) 161 fclose(fp); 162 return OSCONTAINER_ERROR; 163 } 164 PRAGMA_DIAG_POP 165 166 #define GET_CONTAINER_INFO(return_type, subsystem, filename, \ 167 logstring, scan_fmt, variable) \ 168 return_type variable; \ 169 { \ 170 int err; \ 171 err = subsystem_file_contents(subsystem, \ 172 filename, \ 173 scan_fmt, \ 174 &variable); \ 175 if (err != 0) \ 176 return (return_type) OSCONTAINER_ERROR; \ 177 \ 178 log_trace(os, container)(logstring, variable); \ 179 } 180 181 #define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ 182 logstring, scan_fmt, variable, bufsize) \ 183 char variable[bufsize]; \ 184 { \ 185 int err; \ 186 err = subsystem_file_contents(subsystem, \ 187 filename, \ 188 scan_fmt, \ 189 variable); \ 190 if (err != 0) \ 191 return (return_type) NULL; \ 192 \ 193 log_trace(os, container)(logstring, variable); \ 194 } 195 196 /* init 197 * 198 * Initialize the container support and determine if 199 * we are running under cgroup control. 200 */ 201 void OSContainer::init() { 202 int mountid; 203 int parentid; 204 int major; 205 int minor; 206 FILE *mntinfo = NULL; 207 FILE *cgroup = NULL; 208 char buf[MAXPATHLEN+1]; 209 char tmproot[MAXPATHLEN+1]; 210 char tmpmount[MAXPATHLEN+1]; 211 char tmpbase[MAXPATHLEN+1]; 212 char *p; 213 jlong mem_limit; 214 215 assert(!_is_initialized, "Initializing OSContainer more than once"); 216 217 _is_initialized = true; 218 _is_containerized = false; 219 220 log_trace(os, container)("OSContainer::init: Initializing Container Support"); 221 if (!UseContainerSupport) { 222 log_trace(os, container)("Container Support not enabled"); 223 return; 224 } 225 226 /* 227 * Find the cgroup mount point for memory and cpuset 228 * by reading /proc/self/mountinfo 229 * 230 * Example for docker: 231 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory 232 * 233 * Example for host: 234 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory 235 */ 236 mntinfo = fopen("/proc/self/mountinfo", "r"); 237 if (mntinfo == NULL) { 238 log_debug(os, container)("Can't open /proc/self/mountinfo, %s", 239 os::strerror(errno)); 240 return; 241 } 242 243 while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { 244 // Look for the filesystem type and see if it's cgroup 245 char fstype[MAXPATHLEN+1]; 246 fstype[0] = '\0'; 247 char *s = strstr(p, " - "); 248 if (s != NULL && 249 sscanf(s, " - %s", fstype) == 1 && 250 strcmp(fstype, "cgroup") == 0) { 251 252 if (strstr(p, "memory") != NULL) { 253 int matched = sscanf(p, "%d %d %d:%d %s %s", 254 &mountid, 255 &parentid, 256 &major, 257 &minor, 258 tmproot, 259 tmpmount); 260 if (matched == 6) { 261 memory = new CgroupSubsystem(tmproot, tmpmount); 262 } 263 else 264 log_debug(os, container)("Incompatible str containing cgroup and memory: %s", p); 265 } else if (strstr(p, "cpuset") != NULL) { 266 int matched = sscanf(p, "%d %d %d:%d %s %s", 267 &mountid, 268 &parentid, 269 &major, 270 &minor, 271 tmproot, 272 tmpmount); 273 if (matched == 6) { 274 cpuset = new CgroupSubsystem(tmproot, tmpmount); 275 } 276 else { 277 log_debug(os, container)("Incompatible str containing cgroup and cpuset: %s", p); 278 } 279 } else if (strstr(p, "cpu,cpuacct") != NULL) { 280 int matched = sscanf(p, "%d %d %d:%d %s %s", 281 &mountid, 282 &parentid, 283 &major, 284 &minor, 285 tmproot, 286 tmpmount); 287 if (matched == 6) { 288 cpu = new CgroupSubsystem(tmproot, tmpmount); 289 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 290 } 291 else { 292 log_debug(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s", p); 293 } 294 } else if (strstr(p, "cpuacct") != NULL) { 295 int matched = sscanf(p, "%d %d %d:%d %s %s", 296 &mountid, 297 &parentid, 298 &major, 299 &minor, 300 tmproot, 301 tmpmount); 302 if (matched == 6) { 303 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 304 } 305 else { 306 log_debug(os, container)("Incompatible str containing cgroup and cpuacct: %s", p); 307 } 308 } else if (strstr(p, "cpu") != NULL) { 309 int matched = sscanf(p, "%d %d %d:%d %s %s", 310 &mountid, 311 &parentid, 312 &major, 313 &minor, 314 tmproot, 315 tmpmount); 316 if (matched == 6) { 317 cpu = new CgroupSubsystem(tmproot, tmpmount); 318 } 319 else { 320 log_debug(os, container)("Incompatible str containing cgroup and cpu: %s", p); 321 } 322 } 323 } 324 } 325 326 fclose(mntinfo); 327 328 if (memory == NULL || cpuset == NULL || cpu == NULL || cpuacct == NULL) { 329 log_debug(os, container)("Required cgroup subsystems not found"); 330 return; 331 } 332 333 /* 334 * Read /proc/self/cgroup and map host mount point to 335 * local one via /proc/self/mountinfo content above 336 * 337 * Docker example: 338 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 339 * 340 * Host example: 341 * 5:memory:/user.slice 342 * 343 * Construct a path to the process specific memory and cpuset 344 * cgroup directory. 345 * 346 * For a container running under Docker from memory example above 347 * the paths would be: 348 * 349 * /sys/fs/cgroup/memory 350 * 351 * For a Host from memory example above the path would be: 352 * 353 * /sys/fs/cgroup/memory/user.slice 354 * 355 */ 356 cgroup = fopen("/proc/self/cgroup", "r"); 357 if (cgroup == NULL) { 358 log_debug(os, container)("Can't open /proc/self/cgroup, %s", 359 os::strerror(errno)); 360 return; 361 } 362 363 while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { 364 int cgno; 365 int matched; 366 char *controller; 367 char *base; 368 369 /* Skip cgroup number */ 370 strsep(&p, ":"); 371 /* Get controller and base */ 372 controller = strsep(&p, ":"); 373 base = strsep(&p, "\n"); 374 375 if (controller != NULL) { 376 if (strstr(controller, "memory") != NULL) { 377 memory->set_subsystem_path(base); 378 } else if (strstr(controller, "cpuset") != NULL) { 379 cpuset->set_subsystem_path(base); 380 } else if (strstr(controller, "cpu,cpuacct") != NULL) { 381 cpu->set_subsystem_path(base); 382 cpuacct->set_subsystem_path(base); 383 } else if (strstr(controller, "cpuacct") != NULL) { 384 cpuacct->set_subsystem_path(base); 385 } else if (strstr(controller, "cpu") != NULL) { 386 cpu->set_subsystem_path(base); 387 } 388 } 389 } 390 391 fclose(cgroup); 392 393 // We need to update the amount of physical memory now that 394 // command line arguments have been processed. 395 if ((mem_limit = memory_limit_in_bytes()) > 0) { 396 os::Linux::set_physical_memory(mem_limit); 397 } 398 399 _is_containerized = true; 400 401 } 402 403 char * OSContainer::container_type() { 404 if (is_containerized()) { 405 return (char *)"cgroupv1"; 406 } else { 407 return NULL; 408 } 409 } 410 411 412 /* memory_limit_in_bytes 413 * 414 * Return the limit of available memory for this process. 415 * 416 * return: 417 * memory limit in bytes or 418 * -1 for unlimited 419 * OSCONTAINER_ERROR for not supported 420 */ 421 jlong OSContainer::memory_limit_in_bytes() { 422 GET_CONTAINER_INFO(jlong, memory, "/memory.limit_in_bytes", 423 "Memory Limit is: " JLONG_FORMAT, JLONG_FORMAT, memlimit); 424 425 if (memlimit >= UNLIMITED_MEM) { 426 log_trace(os, container)("Memory Limit is: Unlimited"); 427 return (jlong)-1; 428 } 429 else { 430 return memlimit; 431 } 432 } 433 434 jlong OSContainer::memory_and_swap_limit_in_bytes() { 435 GET_CONTAINER_INFO(jlong, memory, "/memory.memsw.limit_in_bytes", 436 "Memory and Swap Limit is: " JLONG_FORMAT, JLONG_FORMAT, memswlimit); 437 if (memswlimit >= UNLIMITED_MEM) { 438 log_trace(os, container)("Memory and Swap Limit is: Unlimited"); 439 return (jlong)-1; 440 } else { 441 return memswlimit; 442 } 443 } 444 445 jlong OSContainer::memory_soft_limit_in_bytes() { 446 GET_CONTAINER_INFO(jlong, memory, "/memory.soft_limit_in_bytes", 447 "Memory Soft Limit is: " JLONG_FORMAT, JLONG_FORMAT, memsoftlimit); 448 if (memsoftlimit >= UNLIMITED_MEM) { 449 log_trace(os, container)("Memory Soft Limit is: Unlimited"); 450 return (jlong)-1; 451 } else { 452 return memsoftlimit; 453 } 454 } 455 456 /* memory_usage_in_bytes 457 * 458 * Return the amount of used memory for this process. 459 * 460 * return: 461 * memory usage in bytes or 462 * -1 for unlimited 463 * OSCONTAINER_ERROR for not supported 464 */ 465 jlong OSContainer::memory_usage_in_bytes() { 466 GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes", 467 "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); 468 return memusage; 469 } 470 471 /* memory_max_usage_in_bytes 472 * 473 * Return the maximum amount of used memory for this process. 474 * 475 * return: 476 * max memory usage in bytes or 477 * OSCONTAINER_ERROR for not supported 478 */ 479 jlong OSContainer::memory_max_usage_in_bytes() { 480 GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes", 481 "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage); 482 return memmaxusage; 483 } 484 485 /* active_processor_count 486 * 487 * Calculate an appropriate number of active processors for the 488 * VM to use based on these three cgroup options. 489 * 490 * cpu affinity 491 * cpu quota & cpu period 492 * cpu shares 493 * 494 * Algorithm: 495 * 496 * Determine the number of available CPUs from sched_getaffinity 497 * 498 * If user specified a quota (quota != -1), calculate the number of 499 * required CPUs by dividing quota by period. 500 * 501 * If shares are in effect (shares != -1), calculate the number 502 * of cpus required for the shares by dividing the share value 503 * by PER_CPU_SHARES. 504 * 505 * All results of division are rounded up to the next whole number. 506 * 507 * Return the smaller number from the three different settings. 508 * 509 * return: 510 * number of cpus 511 * OSCONTAINER_ERROR if failure occured during extract of cpuset info 512 */ 513 int OSContainer::active_processor_count() { 514 int cpu_count, share_count, quota_count; 515 int share, quota, period; 516 int result; 517 518 cpu_count = os::Linux::active_processor_count(); 519 520 share = cpu_shares(); 521 if (share > -1) { 522 share_count = ceilf((float)share / (float)PER_CPU_SHARES); 523 log_trace(os, container)("cpu_share count: %d", share_count); 524 } else { 525 share_count = cpu_count; 526 } 527 528 quota = cpu_quota(); 529 period = cpu_period(); 530 if (quota > -1 && period > 0) { 531 quota_count = ceilf((float)quota / (float)period); 532 log_trace(os, container)("quota_count: %d", quota_count); 533 } else { 534 quota_count = cpu_count; 535 } 536 537 result = MIN2(cpu_count, MIN2(share_count, quota_count)); 538 log_trace(os, container)("OSContainer::active_processor_count: %d", result); 539 return result; 540 } 541 542 char * OSContainer::cpu_cpuset_cpus() { 543 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus", 544 "cpuset.cpus is: %s", "%1023s", cpus, 1024); 545 return os::strdup(cpus); 546 } 547 548 char * OSContainer::cpu_cpuset_memory_nodes() { 549 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems", 550 "cpuset.mems is: %s", "%1023s", mems, 1024); 551 return os::strdup(mems); 552 } 553 554 /* cpu_quota 555 * 556 * Return the number of milliseconds per period 557 * process is guaranteed to run. 558 * 559 * return: 560 * quota time in milliseconds 561 * -1 for no quota 562 * OSCONTAINER_ERROR for not supported 563 */ 564 int OSContainer::cpu_quota() { 565 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us", 566 "CPU Quota is: %d", "%d", quota); 567 return quota; 568 } 569 570 int OSContainer::cpu_period() { 571 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us", 572 "CPU Period is: %d", "%d", period); 573 return period; 574 } 575 576 /* cpu_shares 577 * 578 * Return the amount of cpu shares available to the process 579 * 580 * return: 581 * Share number (typically a number relative to 1024) 582 * (2048 typically expresses 2 CPUs worth of processing) 583 * -1 for no share setup 584 * OSCONTAINER_ERROR for not supported 585 */ 586 int OSContainer::cpu_shares() { 587 GET_CONTAINER_INFO(int, cpu, "/cpu.shares", 588 "CPU Shares is: %d", "%d", shares); 589 // Convert 1024 to no shares setup 590 if (shares == 1024) return -1; 591 592 return shares; 593 } 594