< prev index next >

src/hotspot/os/linux/osContainer_linux.cpp

Print this page




  37  * is commonly used in cloud frameworks like Kubernetes[1],
  38  * AWS[2] and Mesos[3] in a similar way. They spawn containers with
  39  * --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
  40  * the inverse for determining the number of possible available
  41  * CPUs to the JVM inside a container. See JDK-8216366.
  42  *
  43  * [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
  44  *     In particular:
  45  *        When using Docker:
  46  *          The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
  47  *          fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
  48  *          --cpu-shares flag in the docker run command.
  49  * [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
  50  * [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
  51  *     https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
  52  */
  53 #define PER_CPU_SHARES 1024
  54 
  55 bool  OSContainer::_is_initialized   = false;
  56 bool  OSContainer::_is_containerized = false;

  57 julong _unlimited_memory;
  58 
  59 class CgroupSubsystem: CHeapObj<mtInternal> {
  60  friend class OSContainer;
  61 

  62  private:


  63     /* mountinfo contents */
  64     char *_root;
  65     char *_mount_point;
  66 
  67     /* Constructed subsystem directory */
  68     char *_path;
  69 
  70  public:
  71     CgroupSubsystem(char *root, char *mountpoint) {
  72       _root = os::strdup(root);
  73       _mount_point = os::strdup(mountpoint);
  74       _path = NULL;

  75     }
  76 
  77     /*
  78      * Set directory to subsystem specific files based
  79      * on the contents of the mountinfo and cgroup files.
  80      */
  81     void set_subsystem_path(char *cgroup_path) {
  82       char buf[MAXPATHLEN+1];
  83       if (_root != NULL && cgroup_path != NULL) {
  84         if (strcmp(_root, "/") == 0) {
  85           int buflen;
  86           strncpy(buf, _mount_point, MAXPATHLEN);
  87           buf[MAXPATHLEN-1] = '\0';
  88           if (strcmp(cgroup_path,"/") != 0) {
  89             buflen = strlen(buf);
  90             if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
  91               return;
  92             }
  93             strncat(buf, cgroup_path, MAXPATHLEN-buflen);
  94             buf[MAXPATHLEN-1] = '\0';


 104             if (p != NULL && p == _root) {
 105               if (strlen(cgroup_path) > strlen(_root)) {
 106                 int buflen;
 107                 strncpy(buf, _mount_point, MAXPATHLEN);
 108                 buf[MAXPATHLEN-1] = '\0';
 109                 buflen = strlen(buf);
 110                 if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) {
 111                   return;
 112                 }
 113                 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);
 114                 buf[MAXPATHLEN-1] = '\0';
 115                 _path = os::strdup(buf);
 116               }
 117             }
 118           }
 119         }
 120       }
 121     }
 122 
 123     char *subsystem_path() { return _path; }









 124 };
 125 
 126 class CgroupMemorySubsystem: CgroupSubsystem {
 127  friend class OSContainer;
 128 
 129  private:
 130     /* Some container runtimes set limits via cgroup
 131      * hierarchy. If set to true consider also memory.stat
 132      * file if everything else seems unlimited */
 133     bool _uses_mem_hierarchy;
 134     volatile jlong _memory_limit_in_bytes;
 135     volatile jlong _next_check_counter;
 136 
 137  public:
 138     CgroupMemorySubsystem(char *root, char *mountpoint) : CgroupSubsystem::CgroupSubsystem(root, mountpoint) {
 139       _uses_mem_hierarchy = false;
 140       _memory_limit_in_bytes = -1;
 141       _next_check_counter = min_jlong;
 142 
 143     }
 144 
 145     bool is_hierarchical() { return _uses_mem_hierarchy; }
 146     void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }
 147 
 148     bool should_check_memory_limit() {
 149       return os::elapsed_counter() > _next_check_counter;
 150     }
 151     jlong memory_limit_in_bytes() { return _memory_limit_in_bytes; }
 152     void set_memory_limit_in_bytes(jlong value) {
 153       _memory_limit_in_bytes = value;
 154       // max memory limit is unlikely to change, but we want to remain
 155       // responsive to configuration changes. A very short (20ms) grace time
 156       // between re-read avoids excessive overhead during startup without
 157       // significantly reducing the VMs ability to promptly react to reduced
 158       // memory availability
 159       _next_check_counter = os::elapsed_counter() + (NANOSECS_PER_SEC/50);
 160     }
 161 
 162 };
 163 
 164 CgroupMemorySubsystem* memory = NULL;
 165 CgroupSubsystem* cpuset = NULL;
 166 CgroupSubsystem* cpu = NULL;
 167 CgroupSubsystem* cpuacct = NULL;
 168 
 169 typedef char * cptr;
 170 
 171 PRAGMA_DIAG_PUSH
 172 PRAGMA_FORMAT_NONLITERAL_IGNORED
 173 template <typename T> int subsystem_file_line_contents(CgroupSubsystem* c,
 174                                               const char *filename,
 175                                               const char *matchline,
 176                                               const char *scan_fmt,
 177                                               T returnval) {
 178   FILE *fp = NULL;
 179   char *p;


 464  *    A number > 0 if true, or
 465  *    OSCONTAINER_ERROR for not supported
 466  */
 467 jlong OSContainer::uses_mem_hierarchy() {
 468   GET_CONTAINER_INFO(jlong, memory, "/memory.use_hierarchy",
 469                     "Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy);
 470   return use_hierarchy;
 471 }
 472 
 473 
 474 /* memory_limit_in_bytes
 475  *
 476  * Return the limit of available memory for this process.
 477  *
 478  * return:
 479  *    memory limit in bytes or
 480  *    -1 for unlimited
 481  *    OSCONTAINER_ERROR for not supported
 482  */
 483 jlong OSContainer::memory_limit_in_bytes() {
 484   if (!memory->should_check_memory_limit()) {
 485     return memory->memory_limit_in_bytes();
 486   }
 487   jlong memory_limit = read_memory_limit_in_bytes();
 488   // Update CgroupMemorySubsystem to avoid re-reading container settings too often
 489   memory->set_memory_limit_in_bytes(memory_limit);
 490   return memory_limit;
 491 }
 492 
 493 jlong OSContainer::read_memory_limit_in_bytes() {
 494   GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes",
 495                      "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);
 496 
 497   if (memlimit >= _unlimited_memory) {
 498     log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
 499     if (memory->is_hierarchical()) {
 500       const char* matchline = "hierarchical_memory_limit";
 501       const char* format = "%s " JULONG_FORMAT;
 502       GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,
 503                              "Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)
 504       if (hier_memlimit >= _unlimited_memory) {


 600  *
 601  * If neither shares or quotas have been specified, return the
 602  * number of active processors in the system.
 603  *
 604  * If both shares and quotas have been specified, the results are
 605  * based on the flag PreferContainerQuotaForCPUCount.  If true,
 606  * return the quota value.  If false return the smallest value
 607  * between shares or quotas.
 608  *
 609  * If shares and/or quotas have been specified, the resulting number
 610  * returned will never exceed the number of active processors.
 611  *
 612  * return:
 613  *    number of CPUs
 614  */
 615 int OSContainer::active_processor_count() {
 616   int quota_count = 0, share_count = 0;
 617   int cpu_count, limit_count;
 618   int result;
 619 








 620   cpu_count = limit_count = os::Linux::active_processor_count();
 621   int quota  = cpu_quota();
 622   int period = cpu_period();
 623   int share  = cpu_shares();
 624 
 625   if (quota > -1 && period > 0) {
 626     quota_count = ceilf((float)quota / (float)period);
 627     log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
 628   }
 629   if (share > -1) {
 630     share_count = ceilf((float)share / (float)PER_CPU_SHARES);
 631     log_trace(os, container)("CPU Share count based on shares: %d", share_count);
 632   }
 633 
 634   // If both shares and quotas are setup results depend
 635   // on flag PreferContainerQuotaForCPUCount.
 636   // If true, limit CPU count to quota
 637   // If false, use minimum of shares and quotas
 638   if (quota_count !=0 && share_count != 0) {
 639     if (PreferContainerQuotaForCPUCount) {
 640       limit_count = quota_count;
 641     } else {
 642       limit_count = MIN2(quota_count, share_count);
 643     }
 644   } else if (quota_count != 0) {
 645     limit_count = quota_count;
 646   } else if (share_count != 0) {
 647     limit_count = share_count;
 648   }
 649 
 650   result = MIN2(cpu_count, limit_count);
 651   log_trace(os, container)("OSContainer::active_processor_count: %d", result);





 652   return result;
 653 }
 654 
 655 char * OSContainer::cpu_cpuset_cpus() {
 656   GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus",
 657                      "cpuset.cpus is: %s", "%1023s", cpus, 1024);
 658   return os::strdup(cpus);
 659 }
 660 
 661 char * OSContainer::cpu_cpuset_memory_nodes() {
 662   GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems",
 663                      "cpuset.mems is: %s", "%1023s", mems, 1024);
 664   return os::strdup(mems);
 665 }
 666 
 667 /* cpu_quota
 668  *
 669  * Return the number of milliseconds per period
 670  * process is guaranteed to run.
 671  *




  37  * is commonly used in cloud frameworks like Kubernetes[1],
  38  * AWS[2] and Mesos[3] in a similar way. They spawn containers with
  39  * --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
  40  * the inverse for determining the number of possible available
  41  * CPUs to the JVM inside a container. See JDK-8216366.
  42  *
  43  * [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
  44  *     In particular:
  45  *        When using Docker:
  46  *          The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
  47  *          fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
  48  *          --cpu-shares flag in the docker run command.
  49  * [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
  50  * [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
  51  *     https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
  52  */
  53 #define PER_CPU_SHARES 1024
  54 
  55 bool  OSContainer::_is_initialized   = false;
  56 bool  OSContainer::_is_containerized = false;
  57 int   OSContainer::_active_processor_count = 1;
  58 julong _unlimited_memory;
  59 
  60 class CgroupSubsystem: CHeapObj<mtInternal> {
  61  friend class OSContainer;
  62 
  63 
  64  private:
  65     volatile jlong _next_check_counter;
  66 
  67     /* mountinfo contents */
  68     char *_root;
  69     char *_mount_point;
  70 
  71     /* Constructed subsystem directory */
  72     char *_path;
  73 
  74  public:
  75     CgroupSubsystem(char *root, char *mountpoint) {
  76       _root = os::strdup(root);
  77       _mount_point = os::strdup(mountpoint);
  78       _path = NULL;
  79       _next_check_counter = min_jlong;
  80     }
  81 
  82     /*
  83      * Set directory to subsystem specific files based
  84      * on the contents of the mountinfo and cgroup files.
  85      */
  86     void set_subsystem_path(char *cgroup_path) {
  87       char buf[MAXPATHLEN+1];
  88       if (_root != NULL && cgroup_path != NULL) {
  89         if (strcmp(_root, "/") == 0) {
  90           int buflen;
  91           strncpy(buf, _mount_point, MAXPATHLEN);
  92           buf[MAXPATHLEN-1] = '\0';
  93           if (strcmp(cgroup_path,"/") != 0) {
  94             buflen = strlen(buf);
  95             if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
  96               return;
  97             }
  98             strncat(buf, cgroup_path, MAXPATHLEN-buflen);
  99             buf[MAXPATHLEN-1] = '\0';


 109             if (p != NULL && p == _root) {
 110               if (strlen(cgroup_path) > strlen(_root)) {
 111                 int buflen;
 112                 strncpy(buf, _mount_point, MAXPATHLEN);
 113                 buf[MAXPATHLEN-1] = '\0';
 114                 buflen = strlen(buf);
 115                 if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) {
 116                   return;
 117                 }
 118                 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);
 119                 buf[MAXPATHLEN-1] = '\0';
 120                 _path = os::strdup(buf);
 121               }
 122             }
 123           }
 124         }
 125       }
 126     }
 127 
 128     char *subsystem_path() { return _path; }
 129 
 130     bool check_cache_timeout() {
 131       return os::elapsed_counter() > _next_check_counter;
 132     }
 133 
 134     void set_cache_timeout(jlong timeout) {
 135       _next_check_counter = os::elapsed_counter() + timeout;
 136     }
 137     
 138 };
 139 
 140 class CgroupMemorySubsystem: CgroupSubsystem {
 141  friend class OSContainer;
 142 
 143  private:
 144     /* Some container runtimes set limits via cgroup
 145      * hierarchy. If set to true consider also memory.stat
 146      * file if everything else seems unlimited */
 147     bool _uses_mem_hierarchy;
 148     volatile jlong _memory_limit_in_bytes;

 149 
 150  public:
 151     CgroupMemorySubsystem(char *root, char *mountpoint) : CgroupSubsystem::CgroupSubsystem(root, mountpoint) {
 152       _uses_mem_hierarchy = false;
 153       _memory_limit_in_bytes = -1;

 154 
 155     }
 156 
 157     bool is_hierarchical() { return _uses_mem_hierarchy; }
 158     void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }
 159 



 160     jlong memory_limit_in_bytes() { return _memory_limit_in_bytes; }
 161     void set_memory_limit_in_bytes(jlong value) {
 162       _memory_limit_in_bytes = value;
 163       // max memory limit is unlikely to change, but we want to remain
 164       // responsive to configuration changes. A very short (20ms) grace time
 165       // between re-read avoids excessive overhead during startup without
 166       // significantly reducing the VMs ability to promptly react to reduced
 167       // memory availability
 168       set_cache_timeout(OSCONTAINER_CACHE_TIMEOUT);
 169     }
 170 
 171 };
 172 
 173 CgroupMemorySubsystem* memory = NULL;
 174 CgroupSubsystem* cpuset = NULL;
 175 CgroupSubsystem* cpu = NULL;
 176 CgroupSubsystem* cpuacct = NULL;
 177 
 178 typedef char * cptr;
 179 
 180 PRAGMA_DIAG_PUSH
 181 PRAGMA_FORMAT_NONLITERAL_IGNORED
 182 template <typename T> int subsystem_file_line_contents(CgroupSubsystem* c,
 183                                               const char *filename,
 184                                               const char *matchline,
 185                                               const char *scan_fmt,
 186                                               T returnval) {
 187   FILE *fp = NULL;
 188   char *p;


 473  *    A number > 0 if true, or
 474  *    OSCONTAINER_ERROR for not supported
 475  */
 476 jlong OSContainer::uses_mem_hierarchy() {
 477   GET_CONTAINER_INFO(jlong, memory, "/memory.use_hierarchy",
 478                     "Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy);
 479   return use_hierarchy;
 480 }
 481 
 482 
 483 /* memory_limit_in_bytes
 484  *
 485  * Return the limit of available memory for this process.
 486  *
 487  * return:
 488  *    memory limit in bytes or
 489  *    -1 for unlimited
 490  *    OSCONTAINER_ERROR for not supported
 491  */
 492 jlong OSContainer::memory_limit_in_bytes() {
 493   if (!memory->check_cache_timeout()) {
 494     return memory->memory_limit_in_bytes();
 495   }
 496   jlong memory_limit = read_memory_limit_in_bytes();
 497   // Update CgroupMemorySubsystem to avoid re-reading container settings too often
 498   memory->set_memory_limit_in_bytes(memory_limit);
 499   return memory_limit;
 500 }
 501 
 502 jlong OSContainer::read_memory_limit_in_bytes() {
 503   GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes",
 504                      "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);
 505 
 506   if (memlimit >= _unlimited_memory) {
 507     log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
 508     if (memory->is_hierarchical()) {
 509       const char* matchline = "hierarchical_memory_limit";
 510       const char* format = "%s " JULONG_FORMAT;
 511       GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,
 512                              "Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)
 513       if (hier_memlimit >= _unlimited_memory) {


 609  *
 610  * If neither shares or quotas have been specified, return the
 611  * number of active processors in the system.
 612  *
 613  * If both shares and quotas have been specified, the results are
 614  * based on the flag PreferContainerQuotaForCPUCount.  If true,
 615  * return the quota value.  If false return the smallest value
 616  * between shares or quotas.
 617  *
 618  * If shares and/or quotas have been specified, the resulting number
 619  * returned will never exceed the number of active processors.
 620  *
 621  * return:
 622  *    number of CPUs
 623  */
 624 int OSContainer::active_processor_count() {
 625   int quota_count = 0, share_count = 0;
 626   int cpu_count, limit_count;
 627   int result;
 628 
 629   // We use a cache with a timeout to avoid performing expensive
 630   // computations in the event this function is called frequently.
 631   // [See 8227006].
 632   if (!cpu->check_cache_timeout()) {
 633     log_trace(os, container)("OSContainer::active_processor_count (cached): %d", OSContainer::_active_processor_count);
 634     return OSContainer::_active_processor_count;
 635   }
 636 
 637   cpu_count = limit_count = os::Linux::active_processor_count();
 638   int quota  = cpu_quota();
 639   int period = cpu_period();
 640   int share  = cpu_shares();
 641 
 642   if (quota > -1 && period > 0) {
 643     quota_count = ceilf((float)quota / (float)period);
 644     log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
 645   }
 646   if (share > -1) {
 647     share_count = ceilf((float)share / (float)PER_CPU_SHARES);
 648     log_trace(os, container)("CPU Share count based on shares: %d", share_count);
 649   }
 650 
 651   // If both shares and quotas are setup results depend
 652   // on flag PreferContainerQuotaForCPUCount.
 653   // If true, limit CPU count to quota
 654   // If false, use minimum of shares and quotas
 655   if (quota_count !=0 && share_count != 0) {
 656     if (PreferContainerQuotaForCPUCount) {
 657       limit_count = quota_count;
 658     } else {
 659       limit_count = MIN2(quota_count, share_count);
 660     }
 661   } else if (quota_count != 0) {
 662     limit_count = quota_count;
 663   } else if (share_count != 0) {
 664     limit_count = share_count;
 665   }
 666 
 667   result = MIN2(cpu_count, limit_count);
 668   log_trace(os, container)("OSContainer::active_processor_count: %d", result);
 669 
 670   // Update the value and set the cache timeout to 20ms.
 671   OSContainer::_active_processor_count = result;
 672   cpu->set_cache_timeout(OSCONTAINER_CACHE_TIMEOUT);
 673 
 674   return result;
 675 }
 676 
 677 char * OSContainer::cpu_cpuset_cpus() {
 678   GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus",
 679                      "cpuset.cpus is: %s", "%1023s", cpus, 1024);
 680   return os::strdup(cpus);
 681 }
 682 
 683 char * OSContainer::cpu_cpuset_memory_nodes() {
 684   GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems",
 685                      "cpuset.mems is: %s", "%1023s", mems, 1024);
 686   return os::strdup(mems);
 687 }
 688 
 689 /* cpu_quota
 690  *
 691  * Return the number of milliseconds per period
 692  * process is guaranteed to run.
 693  *


< prev index next >