rev 48382 : 8194232: Container memory not properly recognized.
1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include <string.h> 26 #include <math.h> 27 #include <errno.h> 28 #include "utilities/globalDefinitions.hpp" 29 #include "memory/allocation.hpp" 30 #include "runtime/os.hpp" 31 #include "logging/log.hpp" 32 #include "osContainer_linux.hpp" 33 34 /* 35 * Warning: Some linux distros use 0x7FFFFFFFFFFFF000, 36 * some (SLES 12.1 ppc64le) 0x7FFFFFFFFFFF0000, 37 * and others use 0x7FFFFFFFFFFFFFFF for unlimited. 38 */ 39 #define UNLIMITED_MEM CONST64(0x7FFFFFFFFFFF0000) 40 41 #define PER_CPU_SHARES 1024 42 43 bool OSContainer::_is_initialized = false; 44 bool OSContainer::_is_containerized = false; 45 46 class CgroupSubsystem: CHeapObj<mtInternal> { 47 friend class OSContainer; 48 49 private: 50 /* mountinfo contents */ 51 char *_root; 52 char *_mount_point; 53 54 /* Constructed subsystem directory */ 55 char *_path; 56 57 public: 58 CgroupSubsystem(char *root, char *mountpoint) { 59 _root = os::strdup(root); 60 _mount_point = os::strdup(mountpoint); 61 _path = NULL; 62 } 63 64 /* 65 * Set directory to subsystem specific files based 66 * on the contents of the mountinfo and cgroup files. 67 */ 68 void set_subsystem_path(char *cgroup_path) { 69 char buf[MAXPATHLEN+1]; 70 if (_root != NULL && cgroup_path != NULL) { 71 if (strcmp(_root, "/") == 0) { 72 int buflen; 73 strncpy(buf, _mount_point, MAXPATHLEN); 74 buf[MAXPATHLEN-1] = '\0'; 75 if (strcmp(cgroup_path,"/") != 0) { 76 buflen = strlen(buf); 77 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 78 return; 79 } 80 strncat(buf, cgroup_path, MAXPATHLEN-buflen); 81 buf[MAXPATHLEN-1] = '\0'; 82 } 83 _path = os::strdup(buf); 84 } else { 85 if (strcmp(_root, cgroup_path) == 0) { 86 strncpy(buf, _mount_point, MAXPATHLEN); 87 buf[MAXPATHLEN-1] = '\0'; 88 _path = os::strdup(buf); 89 } else { 90 char *p = strstr(_root, cgroup_path); 91 if (p != NULL && p == _root) { 92 if (strlen(cgroup_path) > strlen(_root)) { 93 int buflen; 94 strncpy(buf, _mount_point, MAXPATHLEN); 95 buf[MAXPATHLEN-1] = '\0'; 96 buflen = strlen(buf); 97 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 98 return; 99 } 100 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); 101 buf[MAXPATHLEN-1] = '\0'; 102 _path = os::strdup(buf); 103 } 104 } 105 } 106 } 107 } 108 } 109 110 char *subsystem_path() { return _path; } 111 }; 112 113 CgroupSubsystem* memory = NULL; 114 CgroupSubsystem* cpuset = NULL; 115 CgroupSubsystem* cpu = NULL; 116 CgroupSubsystem* cpuacct = NULL; 117 118 typedef char * cptr; 119 120 PRAGMA_DIAG_PUSH 121 PRAGMA_FORMAT_NONLITERAL_IGNORED 122 template <typename T> int subsystem_file_contents(CgroupSubsystem* c, 123 const char *filename, 124 const char *scan_fmt, 125 T returnval) { 126 FILE *fp = NULL; 127 char *p; 128 char file[MAXPATHLEN+1]; 129 char buf[MAXPATHLEN+1]; 130 131 if (c != NULL && c->subsystem_path() != NULL) { 132 strncpy(file, c->subsystem_path(), MAXPATHLEN); 133 file[MAXPATHLEN-1] = '\0'; 134 int filelen = strlen(file); 135 if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) { 136 log_debug(os, container)("File path too long %s, %s", file, filename); 137 return OSCONTAINER_ERROR; 138 } 139 strncat(file, filename, MAXPATHLEN-filelen); 140 log_trace(os, container)("Path to %s is %s", filename, file); 141 fp = fopen(file, "r"); 142 if (fp != NULL) { 143 p = fgets(buf, MAXPATHLEN, fp); 144 if (p != NULL) { 145 int matched = sscanf(p, scan_fmt, returnval); 146 if (matched == 1) { 147 fclose(fp); 148 return 0; 149 } else { 150 log_debug(os, container)("Type %s not found in file %s", 151 scan_fmt , file); 152 } 153 } else { 154 log_debug(os, container)("Empty file %s", file); 155 } 156 } else { 157 log_debug(os, container)("Open of file %s failed, %s", file, 158 os::strerror(errno)); 159 } 160 } 161 if (fp != NULL) 162 fclose(fp); 163 return OSCONTAINER_ERROR; 164 } 165 PRAGMA_DIAG_POP 166 167 #define GET_CONTAINER_INFO(return_type, subsystem, filename, \ 168 logstring, scan_fmt, variable) \ 169 return_type variable; \ 170 { \ 171 int err; \ 172 err = subsystem_file_contents(subsystem, \ 173 filename, \ 174 scan_fmt, \ 175 &variable); \ 176 if (err != 0) \ 177 return (return_type) OSCONTAINER_ERROR; \ 178 \ 179 log_trace(os, container)(logstring, variable); \ 180 } 181 182 #define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ 183 logstring, scan_fmt, variable, bufsize) \ 184 char variable[bufsize]; \ 185 { \ 186 int err; \ 187 err = subsystem_file_contents(subsystem, \ 188 filename, \ 189 scan_fmt, \ 190 variable); \ 191 if (err != 0) \ 192 return (return_type) NULL; \ 193 \ 194 log_trace(os, container)(logstring, variable); \ 195 } 196 197 /* init 198 * 199 * Initialize the container support and determine if 200 * we are running under cgroup control. 201 */ 202 void OSContainer::init() { 203 int mountid; 204 int parentid; 205 int major; 206 int minor; 207 FILE *mntinfo = NULL; 208 FILE *cgroup = NULL; 209 char buf[MAXPATHLEN+1]; 210 char tmproot[MAXPATHLEN+1]; 211 char tmpmount[MAXPATHLEN+1]; 212 char tmpbase[MAXPATHLEN+1]; 213 char *p; 214 jlong mem_limit; 215 216 assert(!_is_initialized, "Initializing OSContainer more than once"); 217 218 _is_initialized = true; 219 _is_containerized = false; 220 221 log_trace(os, container)("OSContainer::init: Initializing Container Support"); 222 if (!UseContainerSupport) { 223 log_trace(os, container)("Container Support not enabled"); 224 return; 225 } 226 227 /* 228 * Find the cgroup mount point for memory and cpuset 229 * by reading /proc/self/mountinfo 230 * 231 * Example for docker: 232 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory 233 * 234 * Example for host: 235 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory 236 */ 237 mntinfo = fopen("/proc/self/mountinfo", "r"); 238 if (mntinfo == NULL) { 239 log_debug(os, container)("Can't open /proc/self/mountinfo, %s", 240 os::strerror(errno)); 241 return; 242 } 243 244 while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { 245 // Look for the filesystem type and see if it's cgroup 246 char fstype[MAXPATHLEN+1]; 247 fstype[0] = '\0'; 248 char *s = strstr(p, " - "); 249 if (s != NULL && 250 sscanf(s, " - %s", fstype) == 1 && 251 strcmp(fstype, "cgroup") == 0) { 252 253 if (strstr(p, "memory") != NULL) { 254 int matched = sscanf(p, "%d %d %d:%d %s %s", 255 &mountid, 256 &parentid, 257 &major, 258 &minor, 259 tmproot, 260 tmpmount); 261 if (matched == 6) { 262 memory = new CgroupSubsystem(tmproot, tmpmount); 263 } 264 else 265 log_debug(os, container)("Incompatible str containing cgroup and memory: %s", p); 266 } else if (strstr(p, "cpuset") != NULL) { 267 int matched = sscanf(p, "%d %d %d:%d %s %s", 268 &mountid, 269 &parentid, 270 &major, 271 &minor, 272 tmproot, 273 tmpmount); 274 if (matched == 6) { 275 cpuset = new CgroupSubsystem(tmproot, tmpmount); 276 } 277 else { 278 log_debug(os, container)("Incompatible str containing cgroup and cpuset: %s", p); 279 } 280 } else if (strstr(p, "cpu,cpuacct") != NULL) { 281 int matched = sscanf(p, "%d %d %d:%d %s %s", 282 &mountid, 283 &parentid, 284 &major, 285 &minor, 286 tmproot, 287 tmpmount); 288 if (matched == 6) { 289 cpu = new CgroupSubsystem(tmproot, tmpmount); 290 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 291 } 292 else { 293 log_debug(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s", p); 294 } 295 } else if (strstr(p, "cpuacct") != NULL) { 296 int matched = sscanf(p, "%d %d %d:%d %s %s", 297 &mountid, 298 &parentid, 299 &major, 300 &minor, 301 tmproot, 302 tmpmount); 303 if (matched == 6) { 304 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 305 } 306 else { 307 log_debug(os, container)("Incompatible str containing cgroup and cpuacct: %s", p); 308 } 309 } else if (strstr(p, "cpu") != NULL) { 310 int matched = sscanf(p, "%d %d %d:%d %s %s", 311 &mountid, 312 &parentid, 313 &major, 314 &minor, 315 tmproot, 316 tmpmount); 317 if (matched == 6) { 318 cpu = new CgroupSubsystem(tmproot, tmpmount); 319 } 320 else { 321 log_debug(os, container)("Incompatible str containing cgroup and cpu: %s", p); 322 } 323 } 324 } 325 } 326 327 fclose(mntinfo); 328 329 if (memory == NULL || cpuset == NULL || cpu == NULL || cpuacct == NULL) { 330 log_debug(os, container)("Required cgroup subsystems not found"); 331 return; 332 } 333 334 /* 335 * Read /proc/self/cgroup and map host mount point to 336 * local one via /proc/self/mountinfo content above 337 * 338 * Docker example: 339 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 340 * 341 * Host example: 342 * 5:memory:/user.slice 343 * 344 * Construct a path to the process specific memory and cpuset 345 * cgroup directory. 346 * 347 * For a container running under Docker from memory example above 348 * the paths would be: 349 * 350 * /sys/fs/cgroup/memory 351 * 352 * For a Host from memory example above the path would be: 353 * 354 * /sys/fs/cgroup/memory/user.slice 355 * 356 */ 357 cgroup = fopen("/proc/self/cgroup", "r"); 358 if (cgroup == NULL) { 359 log_debug(os, container)("Can't open /proc/self/cgroup, %s", 360 os::strerror(errno)); 361 return; 362 } 363 364 while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { 365 int cgno; 366 int matched; 367 char *controller; 368 char *base; 369 370 /* Skip cgroup number */ 371 strsep(&p, ":"); 372 /* Get controller and base */ 373 controller = strsep(&p, ":"); 374 base = strsep(&p, "\n"); 375 376 if (controller != NULL) { 377 if (strstr(controller, "memory") != NULL) { 378 memory->set_subsystem_path(base); 379 } else if (strstr(controller, "cpuset") != NULL) { 380 cpuset->set_subsystem_path(base); 381 } else if (strstr(controller, "cpu,cpuacct") != NULL) { 382 cpu->set_subsystem_path(base); 383 cpuacct->set_subsystem_path(base); 384 } else if (strstr(controller, "cpuacct") != NULL) { 385 cpuacct->set_subsystem_path(base); 386 } else if (strstr(controller, "cpu") != NULL) { 387 cpu->set_subsystem_path(base); 388 } 389 } 390 } 391 392 fclose(cgroup); 393 394 // We need to update the amount of physical memory now that 395 // command line arguments have been processed. 396 if ((mem_limit = memory_limit_in_bytes()) > 0) { 397 os::Linux::set_physical_memory(mem_limit); 398 } 399 400 _is_containerized = true; 401 402 } 403 404 char * OSContainer::container_type() { 405 if (is_containerized()) { 406 return (char *)"cgroupv1"; 407 } else { 408 return NULL; 409 } 410 } 411 412 413 /* memory_limit_in_bytes 414 * 415 * Return the limit of available memory for this process. 416 * 417 * return: 418 * memory limit in bytes or 419 * -1 for unlimited 420 * OSCONTAINER_ERROR for not supported 421 */ 422 jlong OSContainer::memory_limit_in_bytes() { 423 GET_CONTAINER_INFO(jlong, memory, "/memory.limit_in_bytes", 424 "Memory Limit is: " JLONG_FORMAT, JLONG_FORMAT, memlimit); 425 426 if (memlimit >= UNLIMITED_MEM) { 427 log_trace(os, container)("Memory Limit is: Unlimited"); 428 return (jlong)-1; 429 } 430 else { 431 return memlimit; 432 } 433 } 434 435 jlong OSContainer::memory_and_swap_limit_in_bytes() { 436 GET_CONTAINER_INFO(jlong, memory, "/memory.memsw.limit_in_bytes", 437 "Memory and Swap Limit is: " JLONG_FORMAT, JLONG_FORMAT, memswlimit); 438 if (memswlimit >= UNLIMITED_MEM) { 439 log_trace(os, container)("Memory and Swap Limit is: Unlimited"); 440 return (jlong)-1; 441 } else { 442 return memswlimit; 443 } 444 } 445 446 jlong OSContainer::memory_soft_limit_in_bytes() { 447 GET_CONTAINER_INFO(jlong, memory, "/memory.soft_limit_in_bytes", 448 "Memory Soft Limit is: " JLONG_FORMAT, JLONG_FORMAT, memsoftlimit); 449 if (memsoftlimit >= UNLIMITED_MEM) { 450 log_trace(os, container)("Memory Soft Limit is: Unlimited"); 451 return (jlong)-1; 452 } else { 453 return memsoftlimit; 454 } 455 } 456 457 /* memory_usage_in_bytes 458 * 459 * Return the amount of used memory for this process. 460 * 461 * return: 462 * memory usage in bytes or 463 * -1 for unlimited 464 * OSCONTAINER_ERROR for not supported 465 */ 466 jlong OSContainer::memory_usage_in_bytes() { 467 GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes", 468 "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); 469 return memusage; 470 } 471 472 /* memory_max_usage_in_bytes 473 * 474 * Return the maximum amount of used memory for this process. 475 * 476 * return: 477 * max memory usage in bytes or 478 * OSCONTAINER_ERROR for not supported 479 */ 480 jlong OSContainer::memory_max_usage_in_bytes() { 481 GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes", 482 "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage); 483 return memmaxusage; 484 } 485 486 /* active_processor_count 487 * 488 * Calculate an appropriate number of active processors for the 489 * VM to use based on these three cgroup options. 490 * 491 * cpu affinity 492 * cpu quota & cpu period 493 * cpu shares 494 * 495 * Algorithm: 496 * 497 * Determine the number of available CPUs from sched_getaffinity 498 * 499 * If user specified a quota (quota != -1), calculate the number of 500 * required CPUs by dividing quota by period. 501 * 502 * If shares are in effect (shares != -1), calculate the number 503 * of cpus required for the shares by dividing the share value 504 * by PER_CPU_SHARES. 505 * 506 * All results of division are rounded up to the next whole number. 507 * 508 * Return the smaller number from the three different settings. 509 * 510 * return: 511 * number of cpus 512 * OSCONTAINER_ERROR if failure occured during extract of cpuset info 513 */ 514 int OSContainer::active_processor_count() { 515 int cpu_count, share_count, quota_count; 516 int share, quota, period; 517 int result; 518 519 cpu_count = os::Linux::active_processor_count(); 520 521 share = cpu_shares(); 522 if (share > -1) { 523 share_count = ceilf((float)share / (float)PER_CPU_SHARES); 524 log_trace(os, container)("cpu_share count: %d", share_count); 525 } else { 526 share_count = cpu_count; 527 } 528 529 quota = cpu_quota(); 530 period = cpu_period(); 531 if (quota > -1 && period > 0) { 532 quota_count = ceilf((float)quota / (float)period); 533 log_trace(os, container)("quota_count: %d", quota_count); 534 } else { 535 quota_count = cpu_count; 536 } 537 538 result = MIN2(cpu_count, MIN2(share_count, quota_count)); 539 log_trace(os, container)("OSContainer::active_processor_count: %d", result); 540 return result; 541 } 542 543 char * OSContainer::cpu_cpuset_cpus() { 544 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus", 545 "cpuset.cpus is: %s", "%1023s", cpus, 1024); 546 return os::strdup(cpus); 547 } 548 549 char * OSContainer::cpu_cpuset_memory_nodes() { 550 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems", 551 "cpuset.mems is: %s", "%1023s", mems, 1024); 552 return os::strdup(mems); 553 } 554 555 /* cpu_quota 556 * 557 * Return the number of milliseconds per period 558 * process is guaranteed to run. 559 * 560 * return: 561 * quota time in milliseconds 562 * -1 for no quota 563 * OSCONTAINER_ERROR for not supported 564 */ 565 int OSContainer::cpu_quota() { 566 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us", 567 "CPU Quota is: %d", "%d", quota); 568 return quota; 569 } 570 571 int OSContainer::cpu_period() { 572 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us", 573 "CPU Period is: %d", "%d", period); 574 return period; 575 } 576 577 /* cpu_shares 578 * 579 * Return the amount of cpu shares available to the process 580 * 581 * return: 582 * Share number (typically a number relative to 1024) 583 * (2048 typically expresses 2 CPUs worth of processing) 584 * -1 for no share setup 585 * OSCONTAINER_ERROR for not supported 586 */ 587 int OSContainer::cpu_shares() { 588 GET_CONTAINER_INFO(int, cpu, "/cpu.shares", 589 "CPU Shares is: %d", "%d", shares); 590 // Convert 1024 to no shares setup 591 if (shares == 1024) return -1; 592 593 return shares; 594 } 595 --- EOF ---