--- old/src/hotspot/share/utilities/stringUtils.cpp 2020-02-21 02:14:50.193917067 -0800 +++ new/src/hotspot/share/utilities/stringUtils.cpp 2020-02-21 02:14:49.804739820 -0800 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "utilities/debug.hpp" +#include "utilities/ostream.hpp" #include "utilities/stringUtils.hpp" int StringUtils::replace_no_expand(char* string, const char* from, const char* to) { @@ -65,3 +66,281 @@ return 2.0 * (double) hit / (double) total; } + +class StringMatcher { + public: + typedef int getc_function_t(const char* &source, const char* limit); + const getc_function_t* _pattern_getc; + const getc_function_t* _string_getc; + + StringMatcher(getc_function_t pattern_getc, + getc_function_t string_getc) + : _pattern_getc(pattern_getc), + _string_getc(string_getc) + { } + + enum { // special results from _pattern_getc + string_match_comma = -0x100 + ',', + string_match_star = -0x100 + '*', + string_match_eos = -0x100 + '\0' + }; + + private: + const char* + skip_anchor_word(const char* match, + const char* match_end, + int anchor_length, + const char* pattern, + const char* pattern_end) { + assert(pattern < pattern_end && anchor_length > 0, ""); + const char* begp = pattern; + int ch1 = _pattern_getc(begp, pattern_end); + // note that begp is now advanced over ch1 + assert(ch1 > 0, "regular char only"); + const char* matchp = match; + const char* limitp = match_end - anchor_length; + while (matchp <= limitp) { + int mch = _string_getc(matchp, match_end); + if (mch == ch1) { + const char* patp = begp; + const char* anchorp = matchp; + while (patp < pattern_end) { + char ch = _pattern_getc(patp, pattern_end); + char mch = _string_getc(anchorp, match_end); + if (mch != ch) { + anchorp = NULL; + break; + } + } + if (anchorp != NULL) { + return anchorp; // Found a full copy of the anchor. + } + // That did not work, so restart the search for ch1. + } + } + return NULL; + } + + public: + bool string_match(const char* pattern, + const char* string) { + return string_match(pattern, pattern + strlen(pattern), + string, string + strlen(string)); + } + bool string_match(const char* pattern, const char* pattern_end, + const char* string, const char* string_end) { + const char* patp = pattern; + switch (_pattern_getc(patp, pattern_end)) { + case string_match_eos: + return false; // Empty pattern is always false. + case string_match_star: + if (patp == pattern_end) { + return true; // Lone star pattern is always true. + } + break; + } + patp = pattern; // Reset after lookahead. + const char* matchp = string; // NULL if failing + for (;;) { + int ch = _pattern_getc(patp, pattern_end); + switch (ch) { + case string_match_eos: + case string_match_comma: + // End of a list item; see if it's a match. + if (matchp == string_end) { + return true; + } + if (ch == string_match_comma) { + // Get ready to match the next item. + matchp = string; + continue; + } + return false; // End of all items. + + case string_match_star: + if (matchp != NULL) { + // Wildcard: Parse out following anchor word and look for it. + const char* begp = patp; + const char* endp = patp; + int anchor_len = 0; + for (;;) { + // get as many following regular characters as possible + endp = patp; + ch = _pattern_getc(patp, pattern_end); + if (ch <= 0) { + break; + } + anchor_len += 1; + } + // Anchor word [begp..endp) does not contain ch, so back up. + // Now do an eager match to the anchor word, and commit to it. + patp = endp; + if (ch == string_match_eos || + ch == string_match_comma) { + // Anchor word is at end of pattern, so treat it as a fixed pattern. + const char* limitp = (matchp + strlen(matchp)) - anchor_len; + matchp = limitp; + patp = begp; + // Resume normal scanning at the only possible match position. + continue; + } + // Find a floating occurrence of the anchor and continue matching. + // Note: This is greedy; there is no backtrack here. Good enough. + matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp); + } + continue; + } + // Normal character. + if (matchp != NULL) { + int mch = _string_getc(matchp, string_end); + if (mch != ch) { + matchp = NULL; + } + } + } + } +}; + +// Match a wildcarded class list to a proposed class name (in internal form). +// Commas or newlines separate multiple possible matches; stars are shell-style wildcards. +class ClassListMatcher : public StringMatcher { + public: + ClassListMatcher() + : StringMatcher(pattern_list_getc, class_name_getc) + { } + + private: + static int pattern_list_getc(const char* &pattern_ptr, + const char* pattern_end) { + if (pattern_ptr == pattern_end) { + return string_match_eos; + } + int ch = (unsigned char) *pattern_ptr++; + switch (ch) { + case ' ': case '\t': case '\n': case '\r': + case ',': + // End of list item. + for (;;) { + switch (*pattern_ptr) { + case ' ': case '\t': case '\n': case '\r': + case ',': + pattern_ptr += 1; // Collapse multiple commas or spaces. + continue; + } + break; + } + return string_match_comma; + + case '*': + // Wildcard, matching any number of chars. + while (*pattern_ptr == '*') { + pattern_ptr += 1; // Collapse multiple stars. + } + return string_match_star; + + case '.': + ch = '/'; // Look for internal form of package separator + break; + + case '\\': + // Superquote in pattern escapes * , whitespace, and itself. + if (pattern_ptr < pattern_end) { + ch = (unsigned char) *pattern_ptr++; + } + break; + } + + assert(ch > 0, "regular char only"); + return ch; + } + + static int class_name_getc(const char* &name_ptr, + const char* name_end) { + if (name_ptr == name_end) { + return string_match_eos; + } + int ch = (unsigned char) *name_ptr++; + if (ch == '.') { + ch = '/'; // Normalize to internal form of package separator + } + return ch; // plain character + } +}; + +static bool class_list_match_sane(); + +bool StringUtils::class_list_match(const char* class_pattern_list, + const char* class_name) { + assert(class_list_match_sane(), ""); + if (class_pattern_list == NULL || class_name == NULL || class_name[0] == '\0') + return false; + ClassListMatcher clm; + return clm.string_match(class_pattern_list, class_name); +} + +#ifdef ASSERT +static void +class_list_match_sane(const char* pat, const char* str, bool result = true) { + if (result) { + assert(StringUtils::class_list_match(pat, str), "%s ~ %s", pat, str); + } else { + assert(!StringUtils::class_list_match(pat, str), "%s !~ %s", pat, str); + } +} + +static bool +class_list_match_sane() { + static bool done = false; + if (done) return true; + done = true; + class_list_match_sane("foo", "foo"); + class_list_match_sane("foo,", "foo"); + class_list_match_sane(",foo,", "foo"); + class_list_match_sane("bar,foo", "foo"); + class_list_match_sane("bar,foo,", "foo"); + class_list_match_sane("*", "foo"); + class_list_match_sane("foo.bar", "foo/bar"); + class_list_match_sane("foo/bar", "foo.bar"); + class_list_match_sane("\\foo", "foo"); + class_list_match_sane("\\*foo", "*foo"); + const char* foo = "foo!"; + char buf[100], buf2[100]; + const int m = strlen(foo); + for (int n = 0; n <= 1; n++) { + for (int a = -1; a <= 1; a++) { + for (int i = 0; i <= m; i++) { + for (int j = i; j <= m; j++) { + if (j == i && j > 0) continue; + for (int k = j; k <= m; k++) { + if (k == j && k > i) continue; + for (int l = k; l <= m; l++) { + if (l == k && l > j) continue; + char* bp = &buf[0]; + strncpy(bp, foo + 0, i - 0); bp += i - 0; + *bp++ = '*'; + strncpy(bp, foo + j, k - j); bp += k - j; + *bp++ = '*'; + strncpy(bp, foo + l, m - l); bp += m - l; + if (n) { + *bp++ = 'N'; // make it fail + } + *bp++ = '\0'; + if (a != 0) { + if (a < 0) { + strcpy(buf2, buf); + strcat(buf, "X*, "); + strcat(buf, buf2); + } else { + strcat(buf, ", Y"); + } + } + class_list_match_sane(buf, foo, !n); + } + } + } + } + } + } + return true; +} +#endif //ASSERT