7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "utilities/debug.hpp"
27 #include "utilities/stringUtils.hpp"
28
29 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
30 int replace_count = 0;
31 size_t from_len = strlen(from);
32 size_t to_len = strlen(to);
33 assert(from_len >= to_len, "must not expand input");
34
35 for (char* dst = string; *dst && (dst = strstr(dst, from)) != NULL;) {
36 char* left_over = dst + from_len;
37 memmove(dst, to, to_len); // does not copy trailing 0 of <to>
38 dst += to_len; // skip over the replacement.
39 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
40 ++ replace_count;
41 }
42
43 return replace_count;
44 }
45
46 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) {
48
49 // filter out zero-length strings else we will underflow on len-1 below
50 if (len1 == 0 || len2 == 0) {
51 return 0.0;
52 }
53
54 size_t total = len1 + len2;
55 size_t hit = 0;
56
57 for (size_t i = 0; i < len1 - 1; i++) {
58 for (size_t j = 0; j < len2 - 1; j++) {
59 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
60 ++hit;
61 break;
62 }
63 }
64 }
65
66 return 2.0 * (double) hit / (double) total;
67 }
|
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "utilities/debug.hpp"
27 #include "utilities/ostream.hpp"
28 #include "utilities/stringUtils.hpp"
29
30 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
31 int replace_count = 0;
32 size_t from_len = strlen(from);
33 size_t to_len = strlen(to);
34 assert(from_len >= to_len, "must not expand input");
35
36 for (char* dst = string; *dst && (dst = strstr(dst, from)) != NULL;) {
37 char* left_over = dst + from_len;
38 memmove(dst, to, to_len); // does not copy trailing 0 of <to>
39 dst += to_len; // skip over the replacement.
40 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
41 ++ replace_count;
42 }
43
44 return replace_count;
45 }
46
47 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) {
49
50 // filter out zero-length strings else we will underflow on len-1 below
51 if (len1 == 0 || len2 == 0) {
52 return 0.0;
53 }
54
55 size_t total = len1 + len2;
56 size_t hit = 0;
57
58 for (size_t i = 0; i < len1 - 1; i++) {
59 for (size_t j = 0; j < len2 - 1; j++) {
60 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
61 ++hit;
62 break;
63 }
64 }
65 }
66
67 return 2.0 * (double) hit / (double) total;
68 }
69
70 class StringMatcher {
71 public:
72 typedef int getc_function_t(const char* &source, const char* limit);
73 const getc_function_t* _pattern_getc;
74 const getc_function_t* _string_getc;
75
76 StringMatcher(getc_function_t pattern_getc,
77 getc_function_t string_getc)
78 : _pattern_getc(pattern_getc),
79 _string_getc(string_getc)
80 { }
81
82 enum { // special results from _pattern_getc
83 string_match_comma = -0x100 + ',',
84 string_match_star = -0x100 + '*',
85 string_match_eos = -0x100 + '\0'
86 };
87
88 private:
89 const char*
90 skip_anchor_word(const char* match,
91 const char* match_end,
92 int anchor_length,
93 const char* pattern,
94 const char* pattern_end) {
95 assert(pattern < pattern_end && anchor_length > 0, "");
96 const char* begp = pattern;
97 int ch1 = _pattern_getc(begp, pattern_end);
98 // note that begp is now advanced over ch1
99 assert(ch1 > 0, "regular char only");
100 const char* matchp = match;
101 const char* limitp = match_end - anchor_length;
102 while (matchp <= limitp) {
103 int mch = _string_getc(matchp, match_end);
104 if (mch == ch1) {
105 const char* patp = begp;
106 const char* anchorp = matchp;
107 while (patp < pattern_end) {
108 char ch = _pattern_getc(patp, pattern_end);
109 char mch = _string_getc(anchorp, match_end);
110 if (mch != ch) {
111 anchorp = NULL;
112 break;
113 }
114 }
115 if (anchorp != NULL) {
116 return anchorp; // Found a full copy of the anchor.
117 }
118 // That did not work, so restart the search for ch1.
119 }
120 }
121 return NULL;
122 }
123
124 public:
125 bool string_match(const char* pattern,
126 const char* string) {
127 return string_match(pattern, pattern + strlen(pattern),
128 string, string + strlen(string));
129 }
130 bool string_match(const char* pattern, const char* pattern_end,
131 const char* string, const char* string_end) {
132 const char* patp = pattern;
133 switch (_pattern_getc(patp, pattern_end)) {
134 case string_match_eos:
135 return false; // Empty pattern is always false.
136 case string_match_star:
137 if (patp == pattern_end) {
138 return true; // Lone star pattern is always true.
139 }
140 break;
141 }
142 patp = pattern; // Reset after lookahead.
143 const char* matchp = string; // NULL if failing
144 for (;;) {
145 int ch = _pattern_getc(patp, pattern_end);
146 switch (ch) {
147 case string_match_eos:
148 case string_match_comma:
149 // End of a list item; see if it's a match.
150 if (matchp == string_end) {
151 return true;
152 }
153 if (ch == string_match_comma) {
154 // Get ready to match the next item.
155 matchp = string;
156 continue;
157 }
158 return false; // End of all items.
159
160 case string_match_star:
161 if (matchp != NULL) {
162 // Wildcard: Parse out following anchor word and look for it.
163 const char* begp = patp;
164 const char* endp = patp;
165 int anchor_len = 0;
166 for (;;) {
167 // get as many following regular characters as possible
168 endp = patp;
169 ch = _pattern_getc(patp, pattern_end);
170 if (ch <= 0) {
171 break;
172 }
173 anchor_len += 1;
174 }
175 // Anchor word [begp..endp) does not contain ch, so back up.
176 // Now do an eager match to the anchor word, and commit to it.
177 patp = endp;
178 if (ch == string_match_eos ||
179 ch == string_match_comma) {
180 // Anchor word is at end of pattern, so treat it as a fixed pattern.
181 const char* limitp = (matchp + strlen(matchp)) - anchor_len;
182 matchp = limitp;
183 patp = begp;
184 // Resume normal scanning at the only possible match position.
185 continue;
186 }
187 // Find a floating occurrence of the anchor and continue matching.
188 // Note: This is greedy; there is no backtrack here. Good enough.
189 matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
190 }
191 continue;
192 }
193 // Normal character.
194 if (matchp != NULL) {
195 int mch = _string_getc(matchp, string_end);
196 if (mch != ch) {
197 matchp = NULL;
198 }
199 }
200 }
201 }
202 };
203
204 // Match a wildcarded class list to a proposed class name (in internal form).
205 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
206 class ClassListMatcher : public StringMatcher {
207 public:
208 ClassListMatcher()
209 : StringMatcher(pattern_list_getc, class_name_getc)
210 { }
211
212 private:
213 static int pattern_list_getc(const char* &pattern_ptr,
214 const char* pattern_end) {
215 if (pattern_ptr == pattern_end) {
216 return string_match_eos;
217 }
218 int ch = (unsigned char) *pattern_ptr++;
219 switch (ch) {
220 case ' ': case '\t': case '\n': case '\r':
221 case ',':
222 // End of list item.
223 for (;;) {
224 switch (*pattern_ptr) {
225 case ' ': case '\t': case '\n': case '\r':
226 case ',':
227 pattern_ptr += 1; // Collapse multiple commas or spaces.
228 continue;
229 }
230 break;
231 }
232 return string_match_comma;
233
234 case '*':
235 // Wildcard, matching any number of chars.
236 while (*pattern_ptr == '*') {
237 pattern_ptr += 1; // Collapse multiple stars.
238 }
239 return string_match_star;
240
241 case '.':
242 ch = '/'; // Look for internal form of package separator
243 break;
244
245 case '\\':
246 // Superquote in pattern escapes * , whitespace, and itself.
247 if (pattern_ptr < pattern_end) {
248 ch = (unsigned char) *pattern_ptr++;
249 }
250 break;
251 }
252
253 assert(ch > 0, "regular char only");
254 return ch;
255 }
256
257 static int class_name_getc(const char* &name_ptr,
258 const char* name_end) {
259 if (name_ptr == name_end) {
260 return string_match_eos;
261 }
262 int ch = (unsigned char) *name_ptr++;
263 if (ch == '.') {
264 ch = '/'; // Normalize to internal form of package separator
265 }
266 return ch; // plain character
267 }
268 };
269
270 static bool class_list_match_sane();
271
272 bool StringUtils::class_list_match(const char* class_pattern_list,
273 const char* class_name) {
274 assert(class_list_match_sane(), "");
275 if (class_pattern_list == NULL || class_name == NULL || class_name[0] == '\0')
276 return false;
277 ClassListMatcher clm;
278 return clm.string_match(class_pattern_list, class_name);
279 }
280
281 #ifdef ASSERT
282 static void
283 class_list_match_sane(const char* pat, const char* str, bool result = true) {
284 if (result) {
285 assert(StringUtils::class_list_match(pat, str), "%s ~ %s", pat, str);
286 } else {
287 assert(!StringUtils::class_list_match(pat, str), "%s !~ %s", pat, str);
288 }
289 }
290
291 static bool
292 class_list_match_sane() {
293 static bool done = false;
294 if (done) return true;
295 done = true;
296 class_list_match_sane("foo", "foo");
297 class_list_match_sane("foo,", "foo");
298 class_list_match_sane(",foo,", "foo");
299 class_list_match_sane("bar,foo", "foo");
300 class_list_match_sane("bar,foo,", "foo");
301 class_list_match_sane("*", "foo");
302 class_list_match_sane("foo.bar", "foo/bar");
303 class_list_match_sane("foo/bar", "foo.bar");
304 class_list_match_sane("\\foo", "foo");
305 class_list_match_sane("\\*foo", "*foo");
306 const char* foo = "foo!";
307 char buf[100], buf2[100];
308 const int m = strlen(foo);
309 for (int n = 0; n <= 1; n++) {
310 for (int a = -1; a <= 1; a++) {
311 for (int i = 0; i <= m; i++) {
312 for (int j = i; j <= m; j++) {
313 if (j == i && j > 0) continue;
314 for (int k = j; k <= m; k++) {
315 if (k == j && k > i) continue;
316 for (int l = k; l <= m; l++) {
317 if (l == k && l > j) continue;
318 char* bp = &buf[0];
319 strncpy(bp, foo + 0, i - 0); bp += i - 0;
320 *bp++ = '*';
321 strncpy(bp, foo + j, k - j); bp += k - j;
322 *bp++ = '*';
323 strncpy(bp, foo + l, m - l); bp += m - l;
324 if (n) {
325 *bp++ = 'N'; // make it fail
326 }
327 *bp++ = '\0';
328 if (a != 0) {
329 if (a < 0) {
330 strcpy(buf2, buf);
331 strcat(buf, "X*, ");
332 strcat(buf, buf2);
333 } else {
334 strcat(buf, ", Y");
335 }
336 }
337 class_list_match_sane(buf, foo, !n);
338 }
339 }
340 }
341 }
342 }
343 }
344 return true;
345 }
346 #endif //ASSERT
|