rev 57965 : [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes
1 /*
2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /**
25 * @test
26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
27 * @author Mike McCloskey
28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034
39 *
40 * @library /test/lib
41 * @library /lib/testlibrary/java/lang
42 * @build jdk.test.lib.RandomFactory
43 * @run main RegExTest
44 * @key randomness
45 */
46
47 import java.io.BufferedReader;
48 import java.io.ByteArrayInputStream;
49 import java.io.ByteArrayOutputStream;
50 import java.io.File;
51 import java.io.FileInputStream;
52 import java.io.InputStreamReader;
53 import java.io.ObjectInputStream;
54 import java.io.ObjectOutputStream;
55 import java.math.BigInteger;
56 import java.nio.CharBuffer;
57 import java.nio.file.Files;
58 import java.util.ArrayList;
59 import java.util.Arrays;
60 import java.util.List;
61 import java.util.Random;
62 import java.util.Scanner;
63 import java.util.function.Function;
64 import java.util.function.Predicate;
65 import java.util.regex.Matcher;
66 import java.util.regex.MatchResult;
67 import java.util.regex.Pattern;
68 import java.util.regex.PatternSyntaxException;
69 import jdk.test.lib.RandomFactory;
70
71 /**
72 * This is a test class created to check the operation of
73 * the Pattern and Matcher classes.
74 */
75 public class RegExTest {
76
77 private static Random generator = RandomFactory.getRandom();
78 private static boolean failure = false;
79 private static int failCount = 0;
80 private static String firstFailure = null;
81
82 /**
83 * Main to interpret arguments and run several tests.
84 *
85 */
86 public static void main(String[] args) throws Exception {
87 // Most of the tests are in a file
88 processFile("TestCases.txt");
89 //processFile("PerlCases.txt");
90 processFile("BMPTestCases.txt");
91 processFile("SupplementaryTestCases.txt");
92
93 // These test many randomly generated char patterns
94 bm();
95 slice();
96
97 // These are hard to put into the file
98 escapes();
99 blankInput();
100
101 // Substitition tests on randomly generated sequences
102 globalSubstitute();
103 stringbufferSubstitute();
104 stringbuilderSubstitute();
105
106 substitutionBasher();
107 substitutionBasher2();
108
109 // Canonical Equivalence
110 ceTest();
111
112 // Anchors
113 anchorTest();
114
115 // boolean match calls
116 matchesTest();
117 lookingAtTest();
118
119 // Pattern API
120 patternMatchesTest();
121
122 // Misc
123 lookbehindTest();
124 nullArgumentTest();
125 backRefTest();
126 groupCaptureTest();
127 caretTest();
128 charClassTest();
129 emptyPatternTest();
130 findIntTest();
131 group0Test();
132 longPatternTest();
133 octalTest();
134 ampersandTest();
135 negationTest();
136 splitTest();
137 appendTest();
138 caseFoldingTest();
139 commentsTest();
140 unixLinesTest();
141 replaceFirstTest();
142 gTest();
143 zTest();
144 serializeTest();
145 reluctantRepetitionTest();
146 multilineDollarTest();
147 dollarAtEndTest();
148 caretBetweenTerminatorsTest();
149 // This RFE rejected in Tiger numOccurrencesTest();
150 javaCharClassTest();
151 nonCaptureRepetitionTest();
152 notCapturedGroupCurlyMatchTest();
153 escapedSegmentTest();
154 literalPatternTest();
155 literalReplacementTest();
156 regionTest();
157 toStringTest();
158 negatedCharClassTest();
159 findFromTest();
160 boundsTest();
161 unicodeWordBoundsTest();
162 caretAtEndTest();
163 wordSearchTest();
164 hitEndTest();
165 toMatchResultTest();
166 toMatchResultTest2();
167 surrogatesInClassTest();
168 removeQEQuotingTest();
169 namedGroupCaptureTest();
170 nonBmpClassComplementTest();
171 unicodePropertiesTest();
172 unicodeHexNotationTest();
173 unicodeClassesTest();
174 unicodeCharacterNameTest();
175 horizontalAndVerticalWSTest();
176 linebreakTest();
177 branchTest();
178 groupCurlyNotFoundSuppTest();
179 groupCurlyBackoffTest();
180 patternAsPredicate();
181 patternAsMatchPredicate();
182 invalidFlags();
183 embeddedFlags();
184 grapheme();
185 expoBacktracking();
186 invalidGroupName();
187 illegalRepetitionRange();
188 surrogatePairWithCanonEq();
189
190 if (failure) {
191 throw new
192 RuntimeException("RegExTest failed, 1st failure: " +
193 firstFailure);
194 } else {
195 System.err.println("OKAY: All tests passed.");
196 }
197 }
198
199 // Utility functions
200
201 private static String getRandomAlphaString(int length) {
202 StringBuffer buf = new StringBuffer(length);
203 for (int i=0; i<length; i++) {
204 char randChar = (char)(97 + generator.nextInt(26));
205 buf.append(randChar);
206 }
207 return buf.toString();
208 }
209
210 private static void check(Matcher m, String expected) {
211 m.find();
212 if (!m.group().equals(expected))
213 failCount++;
214 }
215
216 private static void check(Matcher m, String result, boolean expected) {
217 m.find();
218 if (m.group().equals(result) != expected)
219 failCount++;
220 }
221
222 private static void check(Pattern p, String s, boolean expected) {
223 if (p.matcher(s).find() != expected)
224 failCount++;
225 }
226
227 private static void check(String p, String s, boolean expected) {
228 Matcher matcher = Pattern.compile(p).matcher(s);
229 if (matcher.find() != expected)
230 failCount++;
231 }
232
233 private static void check(String p, char c, boolean expected) {
234 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
235 Pattern pattern = Pattern.compile(propertyPattern);
236 char[] ca = new char[1]; ca[0] = c;
237 Matcher matcher = pattern.matcher(new String(ca));
238 if (!matcher.find())
239 failCount++;
240 }
241
242 private static void check(String p, int codePoint, boolean expected) {
243 String propertyPattern = expected ? "\\p" + p : "\\P" + p;
244 Pattern pattern = Pattern.compile(propertyPattern);
245 char[] ca = Character.toChars(codePoint);
246 Matcher matcher = pattern.matcher(new String(ca));
247 if (!matcher.find())
248 failCount++;
249 }
250
251 private static void check(String p, int flag, String input, String s,
252 boolean expected)
253 {
254 Pattern pattern = Pattern.compile(p, flag);
255 Matcher matcher = pattern.matcher(input);
256 if (expected)
257 check(matcher, s, expected);
258 else
259 check(pattern, input, false);
260 }
261
262 private static void report(String testName) {
263 int spacesToAdd = 30 - testName.length();
264 StringBuffer paddedNameBuffer = new StringBuffer(testName);
265 for (int i=0; i<spacesToAdd; i++)
266 paddedNameBuffer.append(" ");
267 String paddedName = paddedNameBuffer.toString();
268 System.err.println(paddedName + ": " +
269 (failCount==0 ? "Passed":"Failed("+failCount+")"));
270 if (failCount > 0) {
271 failure = true;
272
273 if (firstFailure == null) {
274 firstFailure = testName;
275 }
276 }
277
278 failCount = 0;
279 }
280
281 /**
282 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
283 * supplementary characters. This method does NOT fully take care
284 * of the regex syntax.
285 */
286 private static String toSupplementaries(String s) {
287 int length = s.length();
288 StringBuffer sb = new StringBuffer(length * 2);
289
290 for (int i = 0; i < length; ) {
291 char c = s.charAt(i++);
292 if (c == '\\') {
293 sb.append(c);
294 if (i < length) {
295 c = s.charAt(i++);
296 sb.append(c);
297 if (c == 'u') {
298 // assume no syntax error
299 sb.append(s.charAt(i++));
300 sb.append(s.charAt(i++));
301 sb.append(s.charAt(i++));
302 sb.append(s.charAt(i++));
303 }
304 }
305 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
306 sb.append('\ud800').append((char)('\udc00'+c));
307 } else {
308 sb.append(c);
309 }
310 }
311 return sb.toString();
312 }
313
314 // Regular expression tests
315
316 // This is for bug 6178785
317 // Test if an expected NPE gets thrown when passing in a null argument
318 private static boolean check(Runnable test) {
319 try {
320 test.run();
321 failCount++;
322 return false;
323 } catch (NullPointerException npe) {
324 return true;
325 }
326 }
327
328 private static void nullArgumentTest() {
329 check(() -> Pattern.compile(null));
330 check(() -> Pattern.matches(null, null));
331 check(() -> Pattern.matches("xyz", null));
332 check(() -> Pattern.quote(null));
333 check(() -> Pattern.compile("xyz").split(null));
334 check(() -> Pattern.compile("xyz").matcher(null));
335
336 final Matcher m = Pattern.compile("xyz").matcher("xyz");
337 m.matches();
338 check(() -> m.appendTail((StringBuffer) null));
339 check(() -> m.appendTail((StringBuilder)null));
340 check(() -> m.replaceAll((String) null));
341 check(() -> m.replaceAll((Function<MatchResult, String>)null));
342 check(() -> m.replaceFirst((String)null));
343 check(() -> m.replaceFirst((Function<MatchResult, String>) null));
344 check(() -> m.appendReplacement((StringBuffer)null, null));
345 check(() -> m.appendReplacement((StringBuilder)null, null));
346 check(() -> m.reset(null));
347 check(() -> Matcher.quoteReplacement(null));
348 //check(() -> m.usePattern(null));
349
350 report("Null Argument");
351 }
352
353 // This is for bug6635133
354 // Test if surrogate pair in Unicode escapes can be handled correctly.
355 private static void surrogatesInClassTest() throws Exception {
356 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
357 Matcher matcher = pattern.matcher("\ud834\udd22");
358 if (!matcher.find())
359 failCount++;
360
361 report("Surrogate pair in Unicode escape");
362 }
363
364 // This is for bug6990617
365 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
366 // char encoding is only 2 or 3 digits instead of 4 and the first quoted
367 // char is an octal digit.
368 private static void removeQEQuotingTest() throws Exception {
369 Pattern pattern =
370 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
371 Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
372 if (!matcher.find())
373 failCount++;
374
375 report("Remove Q/E Quoting");
376 }
377
378 // This is for bug 4988891
379 // Test toMatchResult to see that it is a copy of the Matcher
380 // that is not affected by subsequent operations on the original
381 private static void toMatchResultTest() throws Exception {
382 Pattern pattern = Pattern.compile("squid");
383 Matcher matcher = pattern.matcher(
384 "agiantsquidofdestinyasmallsquidoffate");
385 matcher.find();
386 int matcherStart1 = matcher.start();
387 MatchResult mr = matcher.toMatchResult();
388 if (mr == matcher)
389 failCount++;
390 int resultStart1 = mr.start();
391 if (matcherStart1 != resultStart1)
392 failCount++;
393 matcher.find();
394 int matcherStart2 = matcher.start();
395 int resultStart2 = mr.start();
396 if (matcherStart2 == resultStart2)
397 failCount++;
398 if (resultStart1 != resultStart2)
399 failCount++;
400 MatchResult mr2 = matcher.toMatchResult();
401 if (mr == mr2)
402 failCount++;
403 if (mr2.start() != matcherStart2)
404 failCount++;
405 report("toMatchResult is a copy");
406 }
407
408 private static void checkExpectedISE(Runnable test) {
409 try {
410 test.run();
411 failCount++;
412 } catch (IllegalStateException x) {
413 } catch (IndexOutOfBoundsException xx) {
414 failCount++;
415 }
416 }
417
418 private static void checkExpectedIOOE(Runnable test) {
419 try {
420 test.run();
421 failCount++;
422 } catch (IndexOutOfBoundsException x) {}
423 }
424
425 // This is for bug 8074678
426 // Test the result of toMatchResult throws ISE if no match is availble
427 private static void toMatchResultTest2() throws Exception {
428 Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
429 matcher.find();
430 MatchResult mr = matcher.toMatchResult();
431
432 checkExpectedISE(() -> mr.start());
433 checkExpectedISE(() -> mr.start(2));
434 checkExpectedISE(() -> mr.end());
435 checkExpectedISE(() -> mr.end(2));
436 checkExpectedISE(() -> mr.group());
437 checkExpectedISE(() -> mr.group(2));
438
439 matcher = Pattern.compile("(match)").matcher("there is a match");
440 matcher.find();
441 MatchResult mr2 = matcher.toMatchResult();
442 checkExpectedIOOE(() -> mr2.start(2));
443 checkExpectedIOOE(() -> mr2.end(2));
444 checkExpectedIOOE(() -> mr2.group(2));
445
446 report("toMatchResult2 appropriate exceptions");
447 }
448
449 // This is for bug 5013885
450 // Must test a slice to see if it reports hitEnd correctly
451 private static void hitEndTest() throws Exception {
452 // Basic test of Slice node
453 Pattern p = Pattern.compile("^squidattack");
454 Matcher m = p.matcher("squack");
455 m.find();
456 if (m.hitEnd())
457 failCount++;
458 m.reset("squid");
459 m.find();
460 if (!m.hitEnd())
461 failCount++;
462
463 // Test Slice, SliceA and SliceU nodes
464 for (int i=0; i<3; i++) {
465 int flags = 0;
466 if (i==1) flags = Pattern.CASE_INSENSITIVE;
467 if (i==2) flags = Pattern.UNICODE_CASE;
468 p = Pattern.compile("^abc", flags);
469 m = p.matcher("ad");
470 m.find();
471 if (m.hitEnd())
472 failCount++;
473 m.reset("ab");
474 m.find();
475 if (!m.hitEnd())
476 failCount++;
477 }
478
479 // Test Boyer-Moore node
480 p = Pattern.compile("catattack");
481 m = p.matcher("attack");
482 m.find();
483 if (!m.hitEnd())
484 failCount++;
485
486 p = Pattern.compile("catattack");
487 m = p.matcher("attackattackattackcatatta");
488 m.find();
489 if (!m.hitEnd())
490 failCount++;
491
492 // 8184706: Matching u+0d at EOL against \R should hit-end
493 p = Pattern.compile("...\\R");
494 m = p.matcher("cat" + (char)0x0a);
495 m.find();
496 if (m.hitEnd())
497 failCount++;
498
499 m = p.matcher("cat" + (char)0x0d);
500 m.find();
501 if (!m.hitEnd())
502 failCount++;
503
504 m = p.matcher("cat" + (char)0x0d + (char)0x0a);
505 m.find();
506 if (m.hitEnd())
507 failCount++;
508
509 report("hitEnd");
510 }
511
512 // This is for bug 4997476
513 // It is weird code submitted by customer demonstrating a regression
514 private static void wordSearchTest() throws Exception {
515 String testString = new String("word1 word2 word3");
516 Pattern p = Pattern.compile("\\b");
517 Matcher m = p.matcher(testString);
518 int position = 0;
519 int start = 0;
520 while (m.find(position)) {
521 start = m.start();
522 if (start == testString.length())
523 break;
524 if (m.find(start+1)) {
525 position = m.start();
526 } else {
527 position = testString.length();
528 }
529 if (testString.substring(start, position).equals(" "))
530 continue;
531 if (!testString.substring(start, position-1).startsWith("word"))
532 failCount++;
533 }
534 report("Customer word search");
535 }
536
537 // This is for bug 4994840
538 private static void caretAtEndTest() throws Exception {
539 // Problem only occurs with multiline patterns
540 // containing a beginning-of-line caret "^" followed
541 // by an expression that also matches the empty string.
542 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
543 Matcher matcher = pattern.matcher("\r");
544 matcher.find();
545 matcher.find();
546 report("Caret at end");
547 }
548
549 // This test is for 4979006
550 // Check to see if word boundary construct properly handles unicode
551 // non spacing marks
552 private static void unicodeWordBoundsTest() throws Exception {
553 String spaces = " ";
554 String wordChar = "a";
555 String nsm = "\u030a";
556
557 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
558
559 Pattern pattern = Pattern.compile("\\b");
560 Matcher matcher = pattern.matcher("");
561 // S=other B=word character N=non spacing mark .=word boundary
562 // SS.BB.SS
563 String input = spaces + wordChar + wordChar + spaces;
564 twoFindIndexes(input, matcher, 2, 4);
565 // SS.BBN.SS
566 input = spaces + wordChar +wordChar + nsm + spaces;
567 twoFindIndexes(input, matcher, 2, 5);
568 // SS.BN.SS
569 input = spaces + wordChar + nsm + spaces;
570 twoFindIndexes(input, matcher, 2, 4);
571 // SS.BNN.SS
572 input = spaces + wordChar + nsm + nsm + spaces;
573 twoFindIndexes(input, matcher, 2, 5);
574 // SSN.BB.SS
575 input = spaces + nsm + wordChar + wordChar + spaces;
576 twoFindIndexes(input, matcher, 3, 5);
577 // SS.BNB.SS
578 input = spaces + wordChar + nsm + wordChar + spaces;
579 twoFindIndexes(input, matcher, 2, 5);
580 // SSNNSS
581 input = spaces + nsm + nsm + spaces;
582 matcher.reset(input);
583 if (matcher.find())
584 failCount++;
585 // SSN.BBN.SS
586 input = spaces + nsm + wordChar + wordChar + nsm + spaces;
587 twoFindIndexes(input, matcher, 3, 6);
588
589 report("Unicode word boundary");
590 }
591
592 private static void twoFindIndexes(String input, Matcher matcher, int a,
593 int b) throws Exception
594 {
595 matcher.reset(input);
596 matcher.find();
597 if (matcher.start() != a)
598 failCount++;
599 matcher.find();
600 if (matcher.start() != b)
601 failCount++;
602 }
603
604 // This test is for 6284152
605 static void check(String regex, String input, String[] expected) {
606 List<String> result = new ArrayList<String>();
607 Pattern p = Pattern.compile(regex);
608 Matcher m = p.matcher(input);
609 while (m.find()) {
610 result.add(m.group());
611 }
612 if (!Arrays.asList(expected).equals(result))
613 failCount++;
614 }
615
616 private static void lookbehindTest() throws Exception {
617 //Positive
618 check("(?<=%.{0,5})foo\\d",
619 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
620 new String[]{"foo1", "foo2", "foo3"});
621
622 //boundary at end of the lookbehind sub-regex should work consistently
623 //with the boundary just after the lookbehind sub-regex
624 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
625 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
626 check("(?<!abc )\\bfoo", "abc foo", new String[0]);
627 check("(?<!abc \\b)foo", "abc foo", new String[0]);
628
629 //Negative
630 check("(?<!%.{0,5})foo\\d",
631 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
632 new String[] {"foo4", "foo5"});
633
634 //Positive greedy
635 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
636
637 //Positive reluctant
638 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
639
640 //supplementary
641 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
642 new String[] {"fo\ud800\udc00o"});
643 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
644 new String[] {"fo\ud800\udc00o"});
645 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
646 new String[] {"fo\ud800\udc00o"});
647 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
648 new String[] {"fo\ud800\udc00o"});
649 report("Lookbehind");
650 }
651
652 // This test is for 4938995
653 // Check to see if weak region boundaries are transparent to
654 // lookahead and lookbehind constructs
655 private static void boundsTest() throws Exception {
656 String fullMessage = "catdogcat";
657 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
658 Matcher matcher = pattern.matcher("catdogca");
659 matcher.useTransparentBounds(true);
660 if (matcher.find())
661 failCount++;
662 matcher.reset("atdogcat");
663 if (matcher.find())
664 failCount++;
665 matcher.reset(fullMessage);
666 if (!matcher.find())
667 failCount++;
668 matcher.reset(fullMessage);
669 matcher.region(0,9);
670 if (!matcher.find())
671 failCount++;
672 matcher.reset(fullMessage);
673 matcher.region(0,6);
674 if (!matcher.find())
675 failCount++;
676 matcher.reset(fullMessage);
677 matcher.region(3,6);
678 if (!matcher.find())
679 failCount++;
680 matcher.useTransparentBounds(false);
681 if (matcher.find())
682 failCount++;
683
684 // Negative lookahead/lookbehind
685 pattern = Pattern.compile("(?<!cat)dog(?!cat)");
686 matcher = pattern.matcher("dogcat");
687 matcher.useTransparentBounds(true);
688 matcher.region(0,3);
689 if (matcher.find())
690 failCount++;
691 matcher.reset("catdog");
692 matcher.region(3,6);
693 if (matcher.find())
694 failCount++;
695 matcher.useTransparentBounds(false);
696 matcher.reset("dogcat");
697 matcher.region(0,3);
698 if (!matcher.find())
699 failCount++;
700 matcher.reset("catdog");
701 matcher.region(3,6);
702 if (!matcher.find())
703 failCount++;
704
705 report("Region bounds transparency");
706 }
707
708 // This test is for 4945394
709 private static void findFromTest() throws Exception {
710 String message = "This is 40 $0 message.";
711 Pattern pat = Pattern.compile("\\$0");
712 Matcher match = pat.matcher(message);
713 if (!match.find())
714 failCount++;
715 if (match.find())
716 failCount++;
717 if (match.find())
718 failCount++;
719 report("Check for alternating find");
720 }
721
722 // This test is for 4872664 and 4892980
723 private static void negatedCharClassTest() throws Exception {
724 Pattern pattern = Pattern.compile("[^>]");
725 Matcher matcher = pattern.matcher("\u203A");
726 if (!matcher.matches())
727 failCount++;
728 pattern = Pattern.compile("[^fr]");
729 matcher = pattern.matcher("a");
730 if (!matcher.find())
731 failCount++;
732 matcher.reset("\u203A");
733 if (!matcher.find())
734 failCount++;
735 String s = "for";
736 String result[] = s.split("[^fr]");
737 if (!result[0].equals("f"))
738 failCount++;
739 if (!result[1].equals("r"))
740 failCount++;
741 s = "f\u203Ar";
742 result = s.split("[^fr]");
743 if (!result[0].equals("f"))
744 failCount++;
745 if (!result[1].equals("r"))
746 failCount++;
747
748 // Test adding to bits, subtracting a node, then adding to bits again
749 pattern = Pattern.compile("[^f\u203Ar]");
750 matcher = pattern.matcher("a");
751 if (!matcher.find())
752 failCount++;
753 matcher.reset("f");
754 if (matcher.find())
755 failCount++;
756 matcher.reset("\u203A");
757 if (matcher.find())
758 failCount++;
759 matcher.reset("r");
760 if (matcher.find())
761 failCount++;
762 matcher.reset("\u203B");
763 if (!matcher.find())
764 failCount++;
765
766 // Test subtracting a node, adding to bits, subtracting again
767 pattern = Pattern.compile("[^\u203Ar\u203B]");
768 matcher = pattern.matcher("a");
769 if (!matcher.find())
770 failCount++;
771 matcher.reset("\u203A");
772 if (matcher.find())
773 failCount++;
774 matcher.reset("r");
775 if (matcher.find())
776 failCount++;
777 matcher.reset("\u203B");
778 if (matcher.find())
779 failCount++;
780 matcher.reset("\u203C");
781 if (!matcher.find())
782 failCount++;
783
784 report("Negated Character Class");
785 }
786
787 // This test is for 4628291
788 private static void toStringTest() throws Exception {
789 Pattern pattern = Pattern.compile("b+");
790 if (pattern.toString() != "b+")
791 failCount++;
792 Matcher matcher = pattern.matcher("aaabbbccc");
793 String matcherString = matcher.toString(); // unspecified
794 matcher.find();
795 matcherString = matcher.toString(); // unspecified
796 matcher.region(0,3);
797 matcherString = matcher.toString(); // unspecified
798 matcher.reset();
799 matcherString = matcher.toString(); // unspecified
800 report("toString");
801 }
802
803 // This test is for 4808962
804 private static void literalPatternTest() throws Exception {
805 int flags = Pattern.LITERAL;
806
807 Pattern pattern = Pattern.compile("abc\\t$^", flags);
808 check(pattern, "abc\\t$^", true);
809
810 pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
811 check(pattern, "abc\\t$^", true);
812
813 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
814 check(pattern, "\\Qa^$bcabc\\E", true);
815 check(pattern, "a^$bcabc", false);
816
817 pattern = Pattern.compile("\\\\Q\\\\E");
818 check(pattern, "\\Q\\E", true);
819
820 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
821 check(pattern, "abcefg\\Q\\Ehij", true);
822
823 pattern = Pattern.compile("\\\\\\Q\\\\E");
824 check(pattern, "\\\\\\\\", true);
825
826 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
827 check(pattern, "\\Qa^$bcabc\\E", true);
828 check(pattern, "a^$bcabc", false);
829
830 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
831 check(pattern, "\\Qabc\\Edef", true);
832 check(pattern, "abcdef", false);
833
834 pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
835 check(pattern, "abc\\Edef", true);
836 check(pattern, "abcdef", false);
837
838 pattern = Pattern.compile(Pattern.quote("\\E"));
839 check(pattern, "\\E", true);
840
841 pattern = Pattern.compile("((((abc.+?:)", flags);
842 check(pattern, "((((abc.+?:)", true);
843
844 flags |= Pattern.MULTILINE;
845
846 pattern = Pattern.compile("^cat$", flags);
847 check(pattern, "abc^cat$def", true);
848 check(pattern, "cat", false);
849
850 flags |= Pattern.CASE_INSENSITIVE;
851
852 pattern = Pattern.compile("abcdef", flags);
853 check(pattern, "ABCDEF", true);
854 check(pattern, "AbCdEf", true);
855
856 flags |= Pattern.DOTALL;
857
858 pattern = Pattern.compile("a...b", flags);
859 check(pattern, "A...b", true);
860 check(pattern, "Axxxb", false);
861
862 flags |= Pattern.CANON_EQ;
863
864 Pattern p = Pattern.compile("testa\u030a", flags);
865 check(pattern, "testa\u030a", false);
866 check(pattern, "test\u00e5", false);
867
868 // Supplementary character test
869 flags = Pattern.LITERAL;
870
871 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
872 check(pattern, toSupplementaries("abc\\t$^"), true);
873
874 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
875 check(pattern, toSupplementaries("abc\\t$^"), true);
876
877 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
878 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
879 check(pattern, toSupplementaries("a^$bcabc"), false);
880
881 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
882 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
883 check(pattern, toSupplementaries("a^$bcabc"), false);
884
885 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
886 check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
887 check(pattern, toSupplementaries("abcdef"), false);
888
889 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
890 check(pattern, toSupplementaries("abc\\Edef"), true);
891 check(pattern, toSupplementaries("abcdef"), false);
892
893 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
894 check(pattern, toSupplementaries("((((abc.+?:)"), true);
895
896 flags |= Pattern.MULTILINE;
897
898 pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
899 check(pattern, toSupplementaries("abc^cat$def"), true);
900 check(pattern, toSupplementaries("cat"), false);
901
902 flags |= Pattern.DOTALL;
903
904 // note: this is case-sensitive.
905 pattern = Pattern.compile(toSupplementaries("a...b"), flags);
906 check(pattern, toSupplementaries("a...b"), true);
907 check(pattern, toSupplementaries("axxxb"), false);
908
909 flags |= Pattern.CANON_EQ;
910
911 String t = toSupplementaries("test");
912 p = Pattern.compile(t + "a\u030a", flags);
913 check(pattern, t + "a\u030a", false);
914 check(pattern, t + "\u00e5", false);
915
916 report("Literal pattern");
917 }
918
919 // This test is for 4803179
920 // This test is also for 4808962, replacement parts
921 private static void literalReplacementTest() throws Exception {
922 int flags = Pattern.LITERAL;
923
924 Pattern pattern = Pattern.compile("abc", flags);
925 Matcher matcher = pattern.matcher("zzzabczzz");
926 String replaceTest = "$0";
927 String result = matcher.replaceAll(replaceTest);
928 if (!result.equals("zzzabczzz"))
929 failCount++;
930
931 matcher.reset();
932 String literalReplacement = matcher.quoteReplacement(replaceTest);
933 result = matcher.replaceAll(literalReplacement);
934 if (!result.equals("zzz$0zzz"))
935 failCount++;
936
937 matcher.reset();
938 replaceTest = "\\t$\\$";
939 literalReplacement = matcher.quoteReplacement(replaceTest);
940 result = matcher.replaceAll(literalReplacement);
941 if (!result.equals("zzz\\t$\\$zzz"))
942 failCount++;
943
944 // Supplementary character test
945 pattern = Pattern.compile(toSupplementaries("abc"), flags);
946 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
947 replaceTest = "$0";
948 result = matcher.replaceAll(replaceTest);
949 if (!result.equals(toSupplementaries("zzzabczzz")))
950 failCount++;
951
952 matcher.reset();
953 literalReplacement = matcher.quoteReplacement(replaceTest);
954 result = matcher.replaceAll(literalReplacement);
955 if (!result.equals(toSupplementaries("zzz$0zzz")))
956 failCount++;
957
958 matcher.reset();
959 replaceTest = "\\t$\\$";
960 literalReplacement = matcher.quoteReplacement(replaceTest);
961 result = matcher.replaceAll(literalReplacement);
962 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
963 failCount++;
964
965 // IAE should be thrown if backslash or '$' is the last character
966 // in replacement string
967 try {
968 "\uac00".replaceAll("\uac00", "$");
969 failCount++;
970 } catch (IllegalArgumentException iie) {
971 } catch (Exception e) {
972 failCount++;
973 }
974 try {
975 "\uac00".replaceAll("\uac00", "\\");
976 failCount++;
977 } catch (IllegalArgumentException iie) {
978 } catch (Exception e) {
979 failCount++;
980 }
981 report("Literal replacement");
982 }
983
984 // This test is for 4757029
985 private static void regionTest() throws Exception {
986 Pattern pattern = Pattern.compile("abc");
987 Matcher matcher = pattern.matcher("abcdefabc");
988
989 matcher.region(0,9);
990 if (!matcher.find())
991 failCount++;
992 if (!matcher.find())
993 failCount++;
994 matcher.region(0,3);
995 if (!matcher.find())
996 failCount++;
997 matcher.region(3,6);
998 if (matcher.find())
999 failCount++;
1000 matcher.region(0,2);
1001 if (matcher.find())
1002 failCount++;
1003
1004 expectRegionFail(matcher, 1, -1);
1005 expectRegionFail(matcher, -1, -1);
1006 expectRegionFail(matcher, -1, 1);
1007 expectRegionFail(matcher, 5, 3);
1008 expectRegionFail(matcher, 5, 12);
1009 expectRegionFail(matcher, 12, 12);
1010
1011 pattern = Pattern.compile("^abc$");
1012 matcher = pattern.matcher("zzzabczzz");
1013 matcher.region(0,9);
1014 if (matcher.find())
1015 failCount++;
1016 matcher.region(3,6);
1017 if (!matcher.find())
1018 failCount++;
1019 matcher.region(3,6);
1020 matcher.useAnchoringBounds(false);
1021 if (matcher.find())
1022 failCount++;
1023
1024 // Supplementary character test
1025 pattern = Pattern.compile(toSupplementaries("abc"));
1026 matcher = pattern.matcher(toSupplementaries("abcdefabc"));
1027 matcher.region(0,9*2);
1028 if (!matcher.find())
1029 failCount++;
1030 if (!matcher.find())
1031 failCount++;
1032 matcher.region(0,3*2);
1033 if (!matcher.find())
1034 failCount++;
1035 matcher.region(1,3*2);
1036 if (matcher.find())
1037 failCount++;
1038 matcher.region(3*2,6*2);
1039 if (matcher.find())
1040 failCount++;
1041 matcher.region(0,2*2);
1042 if (matcher.find())
1043 failCount++;
1044 matcher.region(0,2*2+1);
1045 if (matcher.find())
1046 failCount++;
1047
1048 expectRegionFail(matcher, 1*2, -1);
1049 expectRegionFail(matcher, -1, -1);
1050 expectRegionFail(matcher, -1, 1*2);
1051 expectRegionFail(matcher, 5*2, 3*2);
1052 expectRegionFail(matcher, 5*2, 12*2);
1053 expectRegionFail(matcher, 12*2, 12*2);
1054
1055 pattern = Pattern.compile(toSupplementaries("^abc$"));
1056 matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1057 matcher.region(0,9*2);
1058 if (matcher.find())
1059 failCount++;
1060 matcher.region(3*2,6*2);
1061 if (!matcher.find())
1062 failCount++;
1063 matcher.region(3*2+1,6*2);
1064 if (matcher.find())
1065 failCount++;
1066 matcher.region(3*2,6*2-1);
1067 if (matcher.find())
1068 failCount++;
1069 matcher.region(3*2,6*2);
1070 matcher.useAnchoringBounds(false);
1071 if (matcher.find())
1072 failCount++;
1073
1074 // JDK-8230829
1075 pattern = Pattern.compile("\\ud800\\udc61");
1076 matcher = pattern.matcher("\ud800\udc61");
1077 matcher.region(0, 1);
1078 if (matcher.find()) {
1079 failCount++;
1080 System.out.println("Matched a surrogate pair" +
1081 " that crosses border of region");
1082 }
1083 if (!matcher.hitEnd()) {
1084 failCount++;
1085 System.out.println("Expected to hit the end when" +
1086 " matching a surrogate pair crossing region");
1087 }
1088
1089 report("Regions");
1090 }
1091
1092 private static void expectRegionFail(Matcher matcher, int index1,
1093 int index2)
1094 {
1095 try {
1096 matcher.region(index1, index2);
1097 failCount++;
1098 } catch (IndexOutOfBoundsException ioobe) {
1099 // Correct result
1100 } catch (IllegalStateException ise) {
1101 // Correct result
1102 }
1103 }
1104
1105 // This test is for 4803197
1106 private static void escapedSegmentTest() throws Exception {
1107
1108 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1109 check(pattern, "dir1\\dir2", true);
1110
1111 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1112 check(pattern, "dir1\\dir2\\", true);
1113
1114 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1115 check(pattern, "dir1\\dir2\\", true);
1116
1117 // Supplementary character test
1118 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1119 check(pattern, toSupplementaries("dir1\\dir2"), true);
1120
1121 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1122 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1123
1124 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1125 check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1126
1127 report("Escaped segment");
1128 }
1129
1130 // This test is for 4792284
1131 private static void nonCaptureRepetitionTest() throws Exception {
1132 String input = "abcdefgh;";
1133
1134 String[] patterns = new String[] {
1135 "(?:\\w{4})+;",
1136 "(?:\\w{8})*;",
1137 "(?:\\w{2}){2,4};",
1138 "(?:\\w{4}){2,};", // only matches the
1139 ".*?(?:\\w{5})+;", // specified minimum
1140 ".*?(?:\\w{9})*;", // number of reps - OK
1141 "(?:\\w{4})+?;", // lazy repetition - OK
1142 "(?:\\w{4})++;", // possessive repetition - OK
1143 "(?:\\w{2,}?)+;", // non-deterministic - OK
1144 "(\\w{4})+;", // capturing group - OK
1145 };
1146
1147 for (int i = 0; i < patterns.length; i++) {
1148 // Check find()
1149 check(patterns[i], 0, input, input, true);
1150 // Check matches()
1151 Pattern p = Pattern.compile(patterns[i]);
1152 Matcher m = p.matcher(input);
1153
1154 if (m.matches()) {
1155 if (!m.group(0).equals(input))
1156 failCount++;
1157 } else {
1158 failCount++;
1159 }
1160 }
1161
1162 report("Non capturing repetition");
1163 }
1164
1165 // This test is for 6358731
1166 private static void notCapturedGroupCurlyMatchTest() throws Exception {
1167 Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1168 Matcher matcher = pattern.matcher("abcd");
1169 if (!matcher.matches() ||
1170 matcher.group(1) != null ||
1171 !matcher.group(2).equals("abcd")) {
1172 failCount++;
1173 }
1174 report("Not captured GroupCurly");
1175 }
1176
1177 // This test is for 4706545
1178 private static void javaCharClassTest() throws Exception {
1179 for (int i=0; i<1000; i++) {
1180 char c = (char)generator.nextInt();
1181 check("{javaLowerCase}", c, Character.isLowerCase(c));
1182 check("{javaUpperCase}", c, Character.isUpperCase(c));
1183 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1184 check("{javaTitleCase}", c, Character.isTitleCase(c));
1185 check("{javaDigit}", c, Character.isDigit(c));
1186 check("{javaDefined}", c, Character.isDefined(c));
1187 check("{javaLetter}", c, Character.isLetter(c));
1188 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1189 check("{javaJavaIdentifierStart}", c,
1190 Character.isJavaIdentifierStart(c));
1191 check("{javaJavaIdentifierPart}", c,
1192 Character.isJavaIdentifierPart(c));
1193 check("{javaUnicodeIdentifierStart}", c,
1194 Character.isUnicodeIdentifierStart(c));
1195 check("{javaUnicodeIdentifierPart}", c,
1196 Character.isUnicodeIdentifierPart(c));
1197 check("{javaIdentifierIgnorable}", c,
1198 Character.isIdentifierIgnorable(c));
1199 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1200 check("{javaWhitespace}", c, Character.isWhitespace(c));
1201 check("{javaISOControl}", c, Character.isISOControl(c));
1202 check("{javaMirrored}", c, Character.isMirrored(c));
1203
1204 }
1205
1206 // Supplementary character test
1207 for (int i=0; i<1000; i++) {
1208 int c = generator.nextInt(Character.MAX_CODE_POINT
1209 - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1210 + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1211 check("{javaLowerCase}", c, Character.isLowerCase(c));
1212 check("{javaUpperCase}", c, Character.isUpperCase(c));
1213 check("{javaUpperCase}+", c, Character.isUpperCase(c));
1214 check("{javaTitleCase}", c, Character.isTitleCase(c));
1215 check("{javaDigit}", c, Character.isDigit(c));
1216 check("{javaDefined}", c, Character.isDefined(c));
1217 check("{javaLetter}", c, Character.isLetter(c));
1218 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1219 check("{javaJavaIdentifierStart}", c,
1220 Character.isJavaIdentifierStart(c));
1221 check("{javaJavaIdentifierPart}", c,
1222 Character.isJavaIdentifierPart(c));
1223 check("{javaUnicodeIdentifierStart}", c,
1224 Character.isUnicodeIdentifierStart(c));
1225 check("{javaUnicodeIdentifierPart}", c,
1226 Character.isUnicodeIdentifierPart(c));
1227 check("{javaIdentifierIgnorable}", c,
1228 Character.isIdentifierIgnorable(c));
1229 check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1230 check("{javaWhitespace}", c, Character.isWhitespace(c));
1231 check("{javaISOControl}", c, Character.isISOControl(c));
1232 check("{javaMirrored}", c, Character.isMirrored(c));
1233 }
1234
1235 report("Java character classes");
1236 }
1237
1238 // This test is for 4523620
1239 /*
1240 private static void numOccurrencesTest() throws Exception {
1241 Pattern pattern = Pattern.compile("aaa");
1242
1243 if (pattern.numOccurrences("aaaaaa", false) != 2)
1244 failCount++;
1245 if (pattern.numOccurrences("aaaaaa", true) != 4)
1246 failCount++;
1247
1248 pattern = Pattern.compile("^");
1249 if (pattern.numOccurrences("aaaaaa", false) != 1)
1250 failCount++;
1251 if (pattern.numOccurrences("aaaaaa", true) != 1)
1252 failCount++;
1253
1254 report("Number of Occurrences");
1255 }
1256 */
1257
1258 // This test is for 4776374
1259 private static void caretBetweenTerminatorsTest() throws Exception {
1260 int flags1 = Pattern.DOTALL;
1261 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1262 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1263 int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1264
1265 check("^....", flags1, "test\ntest", "test", true);
1266 check(".....^", flags1, "test\ntest", "test", false);
1267 check(".....^", flags1, "test\n", "test", false);
1268 check("....^", flags1, "test\r\n", "test", false);
1269
1270 check("^....", flags2, "test\ntest", "test", true);
1271 check("....^", flags2, "test\ntest", "test", false);
1272 check(".....^", flags2, "test\n", "test", false);
1273 check("....^", flags2, "test\r\n", "test", false);
1274
1275 check("^....", flags3, "test\ntest", "test", true);
1276 check(".....^", flags3, "test\ntest", "test\n", true);
1277 check(".....^", flags3, "test\u0085test", "test\u0085", false);
1278 check(".....^", flags3, "test\n", "test", false);
1279 check(".....^", flags3, "test\r\n", "test", false);
1280 check("......^", flags3, "test\r\ntest", "test\r\n", true);
1281
1282 check("^....", flags4, "test\ntest", "test", true);
1283 check(".....^", flags3, "test\ntest", "test\n", true);
1284 check(".....^", flags4, "test\u0085test", "test\u0085", true);
1285 check(".....^", flags4, "test\n", "test\n", false);
1286 check(".....^", flags4, "test\r\n", "test\r", false);
1287
1288 // Supplementary character test
1289 String t = toSupplementaries("test");
1290 check("^....", flags1, t+"\n"+t, t, true);
1291 check(".....^", flags1, t+"\n"+t, t, false);
1292 check(".....^", flags1, t+"\n", t, false);
1293 check("....^", flags1, t+"\r\n", t, false);
1294
1295 check("^....", flags2, t+"\n"+t, t, true);
1296 check("....^", flags2, t+"\n"+t, t, false);
1297 check(".....^", flags2, t+"\n", t, false);
1298 check("....^", flags2, t+"\r\n", t, false);
1299
1300 check("^....", flags3, t+"\n"+t, t, true);
1301 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1302 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1303 check(".....^", flags3, t+"\n", t, false);
1304 check(".....^", flags3, t+"\r\n", t, false);
1305 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1306
1307 check("^....", flags4, t+"\n"+t, t, true);
1308 check(".....^", flags3, t+"\n"+t, t+"\n", true);
1309 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1310 check(".....^", flags4, t+"\n", t+"\n", false);
1311 check(".....^", flags4, t+"\r\n", t+"\r", false);
1312
1313 report("Caret between terminators");
1314 }
1315
1316 // This test is for 4727935
1317 private static void dollarAtEndTest() throws Exception {
1318 int flags1 = Pattern.DOTALL;
1319 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1320 int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1321
1322 check("....$", flags1, "test\n", "test", true);
1323 check("....$", flags1, "test\r\n", "test", true);
1324 check(".....$", flags1, "test\n", "test\n", true);
1325 check(".....$", flags1, "test\u0085", "test\u0085", true);
1326 check("....$", flags1, "test\u0085", "test", true);
1327
1328 check("....$", flags2, "test\n", "test", true);
1329 check(".....$", flags2, "test\n", "test\n", true);
1330 check(".....$", flags2, "test\u0085", "test\u0085", true);
1331 check("....$", flags2, "test\u0085", "est\u0085", true);
1332
1333 check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1334 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1335 check("....$blah", flags3, "test\nblah", "!!!!", false);
1336 check(".....$blah", flags3, "test\nblah", "!!!!", false);
1337
1338 // Supplementary character test
1339 String t = toSupplementaries("test");
1340 String b = toSupplementaries("blah");
1341 check("....$", flags1, t+"\n", t, true);
1342 check("....$", flags1, t+"\r\n", t, true);
1343 check(".....$", flags1, t+"\n", t+"\n", true);
1344 check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1345 check("....$", flags1, t+"\u0085", t, true);
1346
1347 check("....$", flags2, t+"\n", t, true);
1348 check(".....$", flags2, t+"\n", t+"\n", true);
1349 check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1350 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1351
1352 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1353 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1354 check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1355 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1356
1357 report("Dollar at End");
1358 }
1359
1360 // This test is for 4711773
1361 private static void multilineDollarTest() throws Exception {
1362 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1363 Matcher matcher = findCR.matcher("first bit\nsecond bit");
1364 matcher.find();
1365 if (matcher.start(0) != 9)
1366 failCount++;
1367 matcher.find();
1368 if (matcher.start(0) != 20)
1369 failCount++;
1370
1371 // Supplementary character test
1372 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
1373 matcher.find();
1374 if (matcher.start(0) != 9*2)
1375 failCount++;
1376 matcher.find();
1377 if (matcher.start(0) != 20*2)
1378 failCount++;
1379
1380 report("Multiline Dollar");
1381 }
1382
1383 private static void reluctantRepetitionTest() throws Exception {
1384 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1385 check(p, "1 word word word 2", true);
1386 check(p, "1 wor wo w 2", true);
1387 check(p, "1 word word 2", true);
1388 check(p, "1 word 2", true);
1389 check(p, "1 wo w w 2", true);
1390 check(p, "1 wo w 2", true);
1391 check(p, "1 wor w 2", true);
1392
1393 p = Pattern.compile("([a-z])+?c");
1394 Matcher m = p.matcher("ababcdefdec");
1395 check(m, "ababc");
1396
1397 // Supplementary character test
1398 p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1399 m = p.matcher(toSupplementaries("ababcdefdec"));
1400 check(m, toSupplementaries("ababc"));
1401
1402 report("Reluctant Repetition");
1403 }
1404
1405 private static Pattern serializedPattern(Pattern p) throws Exception {
1406 ByteArrayOutputStream baos = new ByteArrayOutputStream();
1407 ObjectOutputStream oos = new ObjectOutputStream(baos);
1408 oos.writeObject(p);
1409 oos.close();
1410 try (ObjectInputStream ois = new ObjectInputStream(
1411 new ByteArrayInputStream(baos.toByteArray()))) {
1412 return (Pattern)ois.readObject();
1413 }
1414 }
1415
1416 private static void serializeTest() throws Exception {
1417 String patternStr = "(b)";
1418 String matchStr = "b";
1419 Pattern pattern = Pattern.compile(patternStr);
1420 Pattern serializedPattern = serializedPattern(pattern);
1421 Matcher matcher = serializedPattern.matcher(matchStr);
1422 if (!matcher.matches())
1423 failCount++;
1424 if (matcher.groupCount() != 1)
1425 failCount++;
1426
1427 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
1428 serializedPattern = serializedPattern(pattern);
1429 if (!serializedPattern.matcher("Ab").matches())
1430 failCount++;
1431 if (serializedPattern.matcher("AB").matches())
1432 failCount++;
1433
1434 report("Serialization");
1435 }
1436
1437 private static void gTest() {
1438 Pattern pattern = Pattern.compile("\\G\\w");
1439 Matcher matcher = pattern.matcher("abc#x#x");
1440 matcher.find();
1441 matcher.find();
1442 matcher.find();
1443 if (matcher.find())
1444 failCount++;
1445
1446 pattern = Pattern.compile("\\GA*");
1447 matcher = pattern.matcher("1A2AA3");
1448 matcher.find();
1449 if (matcher.find())
1450 failCount++;
1451
1452 pattern = Pattern.compile("\\GA*");
1453 matcher = pattern.matcher("1A2AA3");
1454 if (!matcher.find(1))
1455 failCount++;
1456 matcher.find();
1457 if (matcher.find())
1458 failCount++;
1459
1460 report("\\G");
1461 }
1462
1463 private static void zTest() {
1464 Pattern pattern = Pattern.compile("foo\\Z");
1465 // Positives
1466 check(pattern, "foo\u0085", true);
1467 check(pattern, "foo\u2028", true);
1468 check(pattern, "foo\u2029", true);
1469 check(pattern, "foo\n", true);
1470 check(pattern, "foo\r", true);
1471 check(pattern, "foo\r\n", true);
1472 // Negatives
1473 check(pattern, "fooo", false);
1474 check(pattern, "foo\n\r", false);
1475
1476 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1477 // Positives
1478 check(pattern, "foo", true);
1479 check(pattern, "foo\n", true);
1480 // Negatives
1481 check(pattern, "foo\r", false);
1482 check(pattern, "foo\u0085", false);
1483 check(pattern, "foo\u2028", false);
1484 check(pattern, "foo\u2029", false);
1485
1486 report("\\Z");
1487 }
1488
1489 private static void replaceFirstTest() {
1490 Pattern pattern = Pattern.compile("(ab)(c*)");
1491 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1492 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1493 failCount++;
1494
1495 matcher.reset("zzzabccczzzabcczzzabccczzz");
1496 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1497 failCount++;
1498
1499 matcher.reset("zzzabccczzzabcczzzabccczzz");
1500 String result = matcher.replaceFirst("$1");
1501 if (!result.equals("zzzabzzzabcczzzabccczzz"))
1502 failCount++;
1503
1504 matcher.reset("zzzabccczzzabcczzzabccczzz");
1505 result = matcher.replaceFirst("$2");
1506 if (!result.equals("zzzccczzzabcczzzabccczzz"))
1507 failCount++;
1508
1509 pattern = Pattern.compile("a*");
1510 matcher = pattern.matcher("aaaaaaaaaa");
1511 if (!matcher.replaceFirst("test").equals("test"))
1512 failCount++;
1513
1514 pattern = Pattern.compile("a+");
1515 matcher = pattern.matcher("zzzaaaaaaaaaa");
1516 if (!matcher.replaceFirst("test").equals("zzztest"))
1517 failCount++;
1518
1519 // Supplementary character test
1520 pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1521 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1522 if (!matcher.replaceFirst(toSupplementaries("test"))
1523 .equals(toSupplementaries("testzzzabcczzzabccc")))
1524 failCount++;
1525
1526 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1527 if (!matcher.replaceFirst(toSupplementaries("test")).
1528 equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1529 failCount++;
1530
1531 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1532 result = matcher.replaceFirst("$1");
1533 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1534 failCount++;
1535
1536 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1537 result = matcher.replaceFirst("$2");
1538 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1539 failCount++;
1540
1541 pattern = Pattern.compile(toSupplementaries("a*"));
1542 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1543 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1544 failCount++;
1545
1546 pattern = Pattern.compile(toSupplementaries("a+"));
1547 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1548 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1549 failCount++;
1550
1551 report("Replace First");
1552 }
1553
1554 private static void unixLinesTest() {
1555 Pattern pattern = Pattern.compile(".*");
1556 Matcher matcher = pattern.matcher("aa\u2028blah");
1557 matcher.find();
1558 if (!matcher.group(0).equals("aa"))
1559 failCount++;
1560
1561 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1562 matcher = pattern.matcher("aa\u2028blah");
1563 matcher.find();
1564 if (!matcher.group(0).equals("aa\u2028blah"))
1565 failCount++;
1566
1567 pattern = Pattern.compile("[az]$",
1568 Pattern.MULTILINE | Pattern.UNIX_LINES);
1569 matcher = pattern.matcher("aa\u2028zz");
1570 check(matcher, "a\u2028", false);
1571
1572 // Supplementary character test
1573 pattern = Pattern.compile(".*");
1574 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1575 matcher.find();
1576 if (!matcher.group(0).equals(toSupplementaries("aa")))
1577 failCount++;
1578
1579 pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1580 matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1581 matcher.find();
1582 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1583 failCount++;
1584
1585 pattern = Pattern.compile(toSupplementaries("[az]$"),
1586 Pattern.MULTILINE | Pattern.UNIX_LINES);
1587 matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1588 check(matcher, toSupplementaries("a\u2028"), false);
1589
1590 report("Unix Lines");
1591 }
1592
1593 private static void commentsTest() {
1594 int flags = Pattern.COMMENTS;
1595
1596 Pattern pattern = Pattern.compile("aa \\# aa", flags);
1597 Matcher matcher = pattern.matcher("aa#aa");
1598 if (!matcher.matches())
1599 failCount++;
1600
1601 pattern = Pattern.compile("aa # blah", flags);
1602 matcher = pattern.matcher("aa");
1603 if (!matcher.matches())
1604 failCount++;
1605
1606 pattern = Pattern.compile("aa blah", flags);
1607 matcher = pattern.matcher("aablah");
1608 if (!matcher.matches())
1609 failCount++;
1610
1611 pattern = Pattern.compile("aa # blah blech ", flags);
1612 matcher = pattern.matcher("aa");
1613 if (!matcher.matches())
1614 failCount++;
1615
1616 pattern = Pattern.compile("aa # blah\n ", flags);
1617 matcher = pattern.matcher("aa");
1618 if (!matcher.matches())
1619 failCount++;
1620
1621 pattern = Pattern.compile("aa # blah\nbc # blech", flags);
1622 matcher = pattern.matcher("aabc");
1623 if (!matcher.matches())
1624 failCount++;
1625
1626 pattern = Pattern.compile("aa # blah\nbc# blech", flags);
1627 matcher = pattern.matcher("aabc");
1628 if (!matcher.matches())
1629 failCount++;
1630
1631 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
1632 matcher = pattern.matcher("aabc#blech");
1633 if (!matcher.matches())
1634 failCount++;
1635
1636 // Supplementary character test
1637 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1638 matcher = pattern.matcher(toSupplementaries("aa#aa"));
1639 if (!matcher.matches())
1640 failCount++;
1641
1642 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
1643 matcher = pattern.matcher(toSupplementaries("aa"));
1644 if (!matcher.matches())
1645 failCount++;
1646
1647 pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1648 matcher = pattern.matcher(toSupplementaries("aablah"));
1649 if (!matcher.matches())
1650 failCount++;
1651
1652 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
1653 matcher = pattern.matcher(toSupplementaries("aa"));
1654 if (!matcher.matches())
1655 failCount++;
1656
1657 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
1658 matcher = pattern.matcher(toSupplementaries("aa"));
1659 if (!matcher.matches())
1660 failCount++;
1661
1662 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
1663 matcher = pattern.matcher(toSupplementaries("aabc"));
1664 if (!matcher.matches())
1665 failCount++;
1666
1667 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
1668 matcher = pattern.matcher(toSupplementaries("aabc"));
1669 if (!matcher.matches())
1670 failCount++;
1671
1672 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
1673 matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1674 if (!matcher.matches())
1675 failCount++;
1676
1677 report("Comments");
1678 }
1679
1680 private static void caseFoldingTest() { // bug 4504687
1681 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1682 Pattern pattern = Pattern.compile("aa", flags);
1683 Matcher matcher = pattern.matcher("ab");
1684 if (matcher.matches())
1685 failCount++;
1686
1687 pattern = Pattern.compile("aA", flags);
1688 matcher = pattern.matcher("ab");
1689 if (matcher.matches())
1690 failCount++;
1691
1692 pattern = Pattern.compile("aa", flags);
1693 matcher = pattern.matcher("aB");
1694 if (matcher.matches())
1695 failCount++;
1696 matcher = pattern.matcher("Ab");
1697 if (matcher.matches())
1698 failCount++;
1699
1700 // ASCII "a"
1701 // Latin-1 Supplement "a" + grave
1702 // Cyrillic "a"
1703 String[] patterns = new String[] {
1704 //single
1705 "a", "\u00e0", "\u0430",
1706 //slice
1707 "ab", "\u00e0\u00e1", "\u0430\u0431",
1708 //class single
1709 "[a]", "[\u00e0]", "[\u0430]",
1710 //class range
1711 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1712 //back reference
1713 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1714 };
1715
1716 String[] texts = new String[] {
1717 "A", "\u00c0", "\u0410",
1718 "AB", "\u00c0\u00c1", "\u0410\u0411",
1719 "A", "\u00c0", "\u0410",
1720 "B", "\u00c2", "\u0411",
1721 "aA", "\u00e0\u00c0", "\u0430\u0410"
1722 };
1723
1724 boolean[] expected = new boolean[] {
1725 true, false, false,
1726 true, false, false,
1727 true, false, false,
1728 true, false, false,
1729 true, false, false
1730 };
1731
1732 flags = Pattern.CASE_INSENSITIVE;
1733 for (int i = 0; i < patterns.length; i++) {
1734 pattern = Pattern.compile(patterns[i], flags);
1735 matcher = pattern.matcher(texts[i]);
1736 if (matcher.matches() != expected[i]) {
1737 System.out.println("<1> Failed at " + i);
1738 failCount++;
1739 }
1740 }
1741
1742 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1743 for (int i = 0; i < patterns.length; i++) {
1744 pattern = Pattern.compile(patterns[i], flags);
1745 matcher = pattern.matcher(texts[i]);
1746 if (!matcher.matches()) {
1747 System.out.println("<2> Failed at " + i);
1748 failCount++;
1749 }
1750 }
1751 // flag unicode_case alone should do nothing
1752 flags = Pattern.UNICODE_CASE;
1753 for (int i = 0; i < patterns.length; i++) {
1754 pattern = Pattern.compile(patterns[i], flags);
1755 matcher = pattern.matcher(texts[i]);
1756 if (matcher.matches()) {
1757 System.out.println("<3> Failed at " + i);
1758 failCount++;
1759 }
1760 }
1761
1762 // Special cases: i, I, u+0131 and u+0130
1763 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1764 pattern = Pattern.compile("[h-j]+", flags);
1765 if (!pattern.matcher("\u0131\u0130").matches())
1766 failCount++;
1767 report("Case Folding");
1768 }
1769
1770 private static void appendTest() {
1771 Pattern pattern = Pattern.compile("(ab)(cd)");
1772 Matcher matcher = pattern.matcher("abcd");
1773 String result = matcher.replaceAll("$2$1");
1774 if (!result.equals("cdab"))
1775 failCount++;
1776
1777 String s1 = "Swap all: first = 123, second = 456";
1778 String s2 = "Swap one: first = 123, second = 456";
1779 String r = "$3$2$1";
1780 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1781 matcher = pattern.matcher(s1);
1782
1783 result = matcher.replaceAll(r);
1784 if (!result.equals("Swap all: 123 = first, 456 = second"))
1785 failCount++;
1786
1787 matcher = pattern.matcher(s2);
1788
1789 if (matcher.find()) {
1790 StringBuffer sb = new StringBuffer();
1791 matcher.appendReplacement(sb, r);
1792 matcher.appendTail(sb);
1793 result = sb.toString();
1794 if (!result.equals("Swap one: 123 = first, second = 456"))
1795 failCount++;
1796 }
1797
1798 // Supplementary character test
1799 pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1800 matcher = pattern.matcher(toSupplementaries("abcd"));
1801 result = matcher.replaceAll("$2$1");
1802 if (!result.equals(toSupplementaries("cdab")))
1803 failCount++;
1804
1805 s1 = toSupplementaries("Swap all: first = 123, second = 456");
1806 s2 = toSupplementaries("Swap one: first = 123, second = 456");
1807 r = toSupplementaries("$3$2$1");
1808 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1809 matcher = pattern.matcher(s1);
1810
1811 result = matcher.replaceAll(r);
1812 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1813 failCount++;
1814
1815 matcher = pattern.matcher(s2);
1816
1817 if (matcher.find()) {
1818 StringBuffer sb = new StringBuffer();
1819 matcher.appendReplacement(sb, r);
1820 matcher.appendTail(sb);
1821 result = sb.toString();
1822 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1823 failCount++;
1824 }
1825 report("Append");
1826 }
1827
1828 private static void splitTest() {
1829 Pattern pattern = Pattern.compile(":");
1830 String[] result = pattern.split("foo:and:boo", 2);
1831 if (!result[0].equals("foo"))
1832 failCount++;
1833 if (!result[1].equals("and:boo"))
1834 failCount++;
1835 // Supplementary character test
1836 Pattern patternX = Pattern.compile(toSupplementaries("X"));
1837 result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1838 if (!result[0].equals(toSupplementaries("foo")))
1839 failCount++;
1840 if (!result[1].equals(toSupplementaries("andXboo")))
1841 failCount++;
1842
1843 CharBuffer cb = CharBuffer.allocate(100);
1844 cb.put("foo:and:boo");
1845 cb.flip();
1846 result = pattern.split(cb);
1847 if (!result[0].equals("foo"))
1848 failCount++;
1849 if (!result[1].equals("and"))
1850 failCount++;
1851 if (!result[2].equals("boo"))
1852 failCount++;
1853
1854 // Supplementary character test
1855 CharBuffer cbs = CharBuffer.allocate(100);
1856 cbs.put(toSupplementaries("fooXandXboo"));
1857 cbs.flip();
1858 result = patternX.split(cbs);
1859 if (!result[0].equals(toSupplementaries("foo")))
1860 failCount++;
1861 if (!result[1].equals(toSupplementaries("and")))
1862 failCount++;
1863 if (!result[2].equals(toSupplementaries("boo")))
1864 failCount++;
1865
1866 String source = "0123456789";
1867 for (int limit=-2; limit<3; limit++) {
1868 for (int x=0; x<10; x++) {
1869 result = source.split(Integer.toString(x), limit);
1870 int expectedLength = limit < 1 ? 2 : limit;
1871
1872 if ((limit == 0) && (x == 9)) {
1873 // expected dropping of ""
1874 if (result.length != 1)
1875 failCount++;
1876 if (!result[0].equals("012345678")) {
1877 failCount++;
1878 }
1879 } else {
1880 if (result.length != expectedLength) {
1881 failCount++;
1882 }
1883 if (!result[0].equals(source.substring(0,x))) {
1884 if (limit != 1) {
1885 failCount++;
1886 } else {
1887 if (!result[0].equals(source.substring(0,10))) {
1888 failCount++;
1889 }
1890 }
1891 }
1892 if (expectedLength > 1) { // Check segment 2
1893 if (!result[1].equals(source.substring(x+1,10)))
1894 failCount++;
1895 }
1896 }
1897 }
1898 }
1899 // Check the case for no match found
1900 for (int limit=-2; limit<3; limit++) {
1901 result = source.split("e", limit);
1902 if (result.length != 1)
1903 failCount++;
1904 if (!result[0].equals(source))
1905 failCount++;
1906 }
1907 // Check the case for limit == 0, source = "";
1908 // split() now returns 0-length for empty source "" see #6559590
1909 source = "";
1910 result = source.split("e", 0);
1911 if (result.length != 1)
1912 failCount++;
1913 if (!result[0].equals(source))
1914 failCount++;
1915
1916 // Check both split() and splitAsStraem(), especially for zero-lenth
1917 // input and zero-lenth match cases
1918 String[][] input = new String[][] {
1919 { " ", "Abc Efg Hij" }, // normal non-zero-match
1920 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
1921 { " ", "Abc Efg Hij" }, // non-zero-match in the middle
1922 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
1923 { "(?=\\p{Lu})", "AbcEfg" },
1924 { "(?=\\p{Lu})", "Abc" },
1925 { " ", "" }, // zero-length input
1926 { ".*", "" },
1927
1928 // some tests from PatternStreamTest.java
1929 { "4", "awgqwefg1fefw4vssv1vvv1" },
1930 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1931 { "1", "awgqwefg1fefw4vssv1vvv1" },
1932 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1933 { "\u56da", "1\u56da23\u56da456\u56da7890" },
1934 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1935 { "\u56da", "" },
1936 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1937 { "o", "boo:and:foo" },
1938 { "o", "booooo:and:fooooo" },
1939 { "o", "fooooo:" },
1940 };
1941
1942 String[][] expected = new String[][] {
1943 { "Abc", "Efg", "Hij" },
1944 { "", "Abc", "Efg", "Hij" },
1945 { "Abc", "", "Efg", "Hij" },
1946 { "Abc", "Efg", "Hij" },
1947 { "Abc", "Efg" },
1948 { "Abc" },
1949 { "" },
1950 { "" },
1951
1952 { "awgqwefg1fefw", "vssv1vvv1" },
1953 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1954 { "awgqwefg", "fefw4vssv", "vvv" },
1955 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1956 { "1", "23", "456", "7890" },
1957 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1958 { "" },
1959 { "This", "is", "testing", "", "with", "different", "separators" },
1960 { "b", "", ":and:f" },
1961 { "b", "", "", "", "", ":and:f" },
1962 { "f", "", "", "", "", ":" },
1963 };
1964 for (int i = 0; i < input.length; i++) {
1965 pattern = Pattern.compile(input[i][0]);
1966 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1967 failCount++;
1968 }
1969 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting
1970 // array for zero-length input for now
1971 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1972 expected[i])) {
1973 failCount++;
1974 }
1975 }
1976 report("Split");
1977 }
1978
1979 private static void negationTest() {
1980 Pattern pattern = Pattern.compile("[\\[@^]+");
1981 Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1982 if (!matcher.find())
1983 failCount++;
1984 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1985 failCount++;
1986 pattern = Pattern.compile("[@\\[^]+");
1987 matcher = pattern.matcher("@@@@[[[[^^^^");
1988 if (!matcher.find())
1989 failCount++;
1990 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1991 failCount++;
1992 pattern = Pattern.compile("[@\\[^@]+");
1993 matcher = pattern.matcher("@@@@[[[[^^^^");
1994 if (!matcher.find())
1995 failCount++;
1996 if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1997 failCount++;
1998
1999 pattern = Pattern.compile("\\)");
2000 matcher = pattern.matcher("xxx)xxx");
2001 if (!matcher.find())
2002 failCount++;
2003
2004 report("Negation");
2005 }
2006
2007 private static void ampersandTest() {
2008 Pattern pattern = Pattern.compile("[&@]+");
2009 check(pattern, "@@@@&&&&", true);
2010
2011 pattern = Pattern.compile("[@&]+");
2012 check(pattern, "@@@@&&&&", true);
2013
2014 pattern = Pattern.compile("[@\\&]+");
2015 check(pattern, "@@@@&&&&", true);
2016
2017 report("Ampersand");
2018 }
2019
2020 private static void octalTest() throws Exception {
2021 Pattern pattern = Pattern.compile("\\u0007");
2022 Matcher matcher = pattern.matcher("\u0007");
2023 if (!matcher.matches())
2024 failCount++;
2025 pattern = Pattern.compile("\\07");
2026 matcher = pattern.matcher("\u0007");
2027 if (!matcher.matches())
2028 failCount++;
2029 pattern = Pattern.compile("\\007");
2030 matcher = pattern.matcher("\u0007");
2031 if (!matcher.matches())
2032 failCount++;
2033 pattern = Pattern.compile("\\0007");
2034 matcher = pattern.matcher("\u0007");
2035 if (!matcher.matches())
2036 failCount++;
2037 pattern = Pattern.compile("\\040");
2038 matcher = pattern.matcher("\u0020");
2039 if (!matcher.matches())
2040 failCount++;
2041 pattern = Pattern.compile("\\0403");
2042 matcher = pattern.matcher("\u00203");
2043 if (!matcher.matches())
2044 failCount++;
2045 pattern = Pattern.compile("\\0103");
2046 matcher = pattern.matcher("\u0043");
2047 if (!matcher.matches())
2048 failCount++;
2049
2050 report("Octal");
2051 }
2052
2053 private static void longPatternTest() throws Exception {
2054 try {
2055 Pattern pattern = Pattern.compile(
2056 "a 32-character-long pattern xxxx");
2057 pattern = Pattern.compile("a 33-character-long pattern xxxxx");
2058 pattern = Pattern.compile("a thirty four character long regex");
2059 StringBuffer patternToBe = new StringBuffer(101);
2060 for (int i=0; i<100; i++)
2061 patternToBe.append((char)(97 + i%26));
2062 pattern = Pattern.compile(patternToBe.toString());
2063 } catch (PatternSyntaxException e) {
2064 failCount++;
2065 }
2066
2067 // Supplementary character test
2068 try {
2069 Pattern pattern = Pattern.compile(
2070 toSupplementaries("a 32-character-long pattern xxxx"));
2071 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2072 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2073 StringBuffer patternToBe = new StringBuffer(101*2);
2074 for (int i=0; i<100; i++)
2075 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2076 + 97 + i%26));
2077 pattern = Pattern.compile(patternToBe.toString());
2078 } catch (PatternSyntaxException e) {
2079 failCount++;
2080 }
2081 report("LongPattern");
2082 }
2083
2084 private static void group0Test() throws Exception {
2085 Pattern pattern = Pattern.compile("(tes)ting");
2086 Matcher matcher = pattern.matcher("testing");
2087 check(matcher, "testing");
2088
2089 matcher.reset("testing");
2090 if (matcher.lookingAt()) {
2091 if (!matcher.group(0).equals("testing"))
2092 failCount++;
2093 } else {
2094 failCount++;
2095 }
2096
2097 matcher.reset("testing");
2098 if (matcher.matches()) {
2099 if (!matcher.group(0).equals("testing"))
2100 failCount++;
2101 } else {
2102 failCount++;
2103 }
2104
2105 pattern = Pattern.compile("(tes)ting");
2106 matcher = pattern.matcher("testing");
2107 if (matcher.lookingAt()) {
2108 if (!matcher.group(0).equals("testing"))
2109 failCount++;
2110 } else {
2111 failCount++;
2112 }
2113
2114 pattern = Pattern.compile("^(tes)ting");
2115 matcher = pattern.matcher("testing");
2116 if (matcher.matches()) {
2117 if (!matcher.group(0).equals("testing"))
2118 failCount++;
2119 } else {
2120 failCount++;
2121 }
2122
2123 // Supplementary character test
2124 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2125 matcher = pattern.matcher(toSupplementaries("testing"));
2126 check(matcher, toSupplementaries("testing"));
2127
2128 matcher.reset(toSupplementaries("testing"));
2129 if (matcher.lookingAt()) {
2130 if (!matcher.group(0).equals(toSupplementaries("testing")))
2131 failCount++;
2132 } else {
2133 failCount++;
2134 }
2135
2136 matcher.reset(toSupplementaries("testing"));
2137 if (matcher.matches()) {
2138 if (!matcher.group(0).equals(toSupplementaries("testing")))
2139 failCount++;
2140 } else {
2141 failCount++;
2142 }
2143
2144 pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2145 matcher = pattern.matcher(toSupplementaries("testing"));
2146 if (matcher.lookingAt()) {
2147 if (!matcher.group(0).equals(toSupplementaries("testing")))
2148 failCount++;
2149 } else {
2150 failCount++;
2151 }
2152
2153 pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2154 matcher = pattern.matcher(toSupplementaries("testing"));
2155 if (matcher.matches()) {
2156 if (!matcher.group(0).equals(toSupplementaries("testing")))
2157 failCount++;
2158 } else {
2159 failCount++;
2160 }
2161
2162 report("Group0");
2163 }
2164
2165 private static void findIntTest() throws Exception {
2166 Pattern p = Pattern.compile("blah");
2167 Matcher m = p.matcher("zzzzblahzzzzzblah");
2168 boolean result = m.find(2);
2169 if (!result)
2170 failCount++;
2171
2172 p = Pattern.compile("$");
2173 m = p.matcher("1234567890");
2174 result = m.find(10);
2175 if (!result)
2176 failCount++;
2177 try {
2178 result = m.find(11);
2179 failCount++;
2180 } catch (IndexOutOfBoundsException e) {
2181 // correct result
2182 }
2183
2184 // Supplementary character test
2185 p = Pattern.compile(toSupplementaries("blah"));
2186 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2187 result = m.find(2);
2188 if (!result)
2189 failCount++;
2190
2191 report("FindInt");
2192 }
2193
2194 private static void emptyPatternTest() throws Exception {
2195 Pattern p = Pattern.compile("");
2196 Matcher m = p.matcher("foo");
2197
2198 // Should find empty pattern at beginning of input
2199 boolean result = m.find();
2200 if (result != true)
2201 failCount++;
2202 if (m.start() != 0)
2203 failCount++;
2204
2205 // Should not match entire input if input is not empty
2206 m.reset();
2207 result = m.matches();
2208 if (result == true)
2209 failCount++;
2210
2211 try {
2212 m.start(0);
2213 failCount++;
2214 } catch (IllegalStateException e) {
2215 // Correct result
2216 }
2217
2218 // Should match entire input if input is empty
2219 m.reset("");
2220 result = m.matches();
2221 if (result != true)
2222 failCount++;
2223
2224 result = Pattern.matches("", "");
2225 if (result != true)
2226 failCount++;
2227
2228 result = Pattern.matches("", "foo");
2229 if (result == true)
2230 failCount++;
2231 report("EmptyPattern");
2232 }
2233
2234 private static void charClassTest() throws Exception {
2235 Pattern pattern = Pattern.compile("blah[ab]]blech");
2236 check(pattern, "blahb]blech", true);
2237
2238 pattern = Pattern.compile("[abc[def]]");
2239 check(pattern, "b", true);
2240
2241 // Supplementary character tests
2242 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2243 check(pattern, toSupplementaries("blahb]blech"), true);
2244
2245 pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2246 check(pattern, toSupplementaries("b"), true);
2247
2248 try {
2249 // u00ff when UNICODE_CASE
2250 pattern = Pattern.compile("[ab\u00ffcd]",
2251 Pattern.CASE_INSENSITIVE|
2252 Pattern.UNICODE_CASE);
2253 check(pattern, "ab\u00ffcd", true);
2254 check(pattern, "Ab\u0178Cd", true);
2255
2256 // u00b5 when UNICODE_CASE
2257 pattern = Pattern.compile("[ab\u00b5cd]",
2258 Pattern.CASE_INSENSITIVE|
2259 Pattern.UNICODE_CASE);
2260 check(pattern, "ab\u00b5cd", true);
2261 check(pattern, "Ab\u039cCd", true);
2262 } catch (Exception e) { failCount++; }
2263
2264 /* Special cases
2265 (1)LatinSmallLetterLongS u+017f
2266 (2)LatinSmallLetterDotlessI u+0131
2267 (3)LatineCapitalLetterIWithDotAbove u+0130
2268 (4)KelvinSign u+212a
2269 (5)AngstromSign u+212b
2270 */
2271 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2272 pattern = Pattern.compile("[sik\u00c5]+", flags);
2273 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2274 failCount++;
2275
2276 report("CharClass");
2277 }
2278
2279 private static void caretTest() throws Exception {
2280 Pattern pattern = Pattern.compile("\\w*");
2281 Matcher matcher = pattern.matcher("a#bc#def##g");
2282 check(matcher, "a");
2283 check(matcher, "");
2284 check(matcher, "bc");
2285 check(matcher, "");
2286 check(matcher, "def");
2287 check(matcher, "");
2288 check(matcher, "");
2289 check(matcher, "g");
2290 check(matcher, "");
2291 if (matcher.find())
2292 failCount++;
2293
2294 pattern = Pattern.compile("^\\w*");
2295 matcher = pattern.matcher("a#bc#def##g");
2296 check(matcher, "a");
2297 if (matcher.find())
2298 failCount++;
2299
2300 pattern = Pattern.compile("\\w");
2301 matcher = pattern.matcher("abc##x");
2302 check(matcher, "a");
2303 check(matcher, "b");
2304 check(matcher, "c");
2305 check(matcher, "x");
2306 if (matcher.find())
2307 failCount++;
2308
2309 pattern = Pattern.compile("^\\w");
2310 matcher = pattern.matcher("abc##x");
2311 check(matcher, "a");
2312 if (matcher.find())
2313 failCount++;
2314
2315 pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2316 matcher = pattern.matcher("abcdef-ghi\njklmno");
2317 check(matcher, "abc");
2318 if (matcher.find())
2319 failCount++;
2320
2321 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2322 matcher = pattern.matcher("abcdef-ghi\njklmno");
2323 check(matcher, "abc");
2324 check(matcher, "jkl");
2325 if (matcher.find())
2326 failCount++;
2327
2328 pattern = Pattern.compile("^", Pattern.MULTILINE);
2329 matcher = pattern.matcher("this is some text");
2330 String result = matcher.replaceAll("X");
2331 if (!result.equals("Xthis is some text"))
2332 failCount++;
2333
2334 pattern = Pattern.compile("^");
2335 matcher = pattern.matcher("this is some text");
2336 result = matcher.replaceAll("X");
2337 if (!result.equals("Xthis is some text"))
2338 failCount++;
2339
2340 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2341 matcher = pattern.matcher("this is some text\n");
2342 result = matcher.replaceAll("X");
2343 if (!result.equals("Xthis is some text\n"))
2344 failCount++;
2345
2346 report("Caret");
2347 }
2348
2349 private static void groupCaptureTest() throws Exception {
2350 // Independent group
2351 Pattern pattern = Pattern.compile("x+(?>y+)z+");
2352 Matcher matcher = pattern.matcher("xxxyyyzzz");
2353 matcher.find();
2354 try {
2355 String blah = matcher.group(1);
2356 failCount++;
2357 } catch (IndexOutOfBoundsException ioobe) {
2358 // Good result
2359 }
2360 // Pure group
2361 pattern = Pattern.compile("x+(?:y+)z+");
2362 matcher = pattern.matcher("xxxyyyzzz");
2363 matcher.find();
2364 try {
2365 String blah = matcher.group(1);
2366 failCount++;
2367 } catch (IndexOutOfBoundsException ioobe) {
2368 // Good result
2369 }
2370
2371 // Supplementary character tests
2372 // Independent group
2373 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2374 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2375 matcher.find();
2376 try {
2377 String blah = matcher.group(1);
2378 failCount++;
2379 } catch (IndexOutOfBoundsException ioobe) {
2380 // Good result
2381 }
2382 // Pure group
2383 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2384 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2385 matcher.find();
2386 try {
2387 String blah = matcher.group(1);
2388 failCount++;
2389 } catch (IndexOutOfBoundsException ioobe) {
2390 // Good result
2391 }
2392
2393 report("GroupCapture");
2394 }
2395
2396 private static void backRefTest() throws Exception {
2397 Pattern pattern = Pattern.compile("(a*)bc\\1");
2398 check(pattern, "zzzaabcazzz", true);
2399
2400 pattern = Pattern.compile("(a*)bc\\1");
2401 check(pattern, "zzzaabcaazzz", true);
2402
2403 pattern = Pattern.compile("(abc)(def)\\1");
2404 check(pattern, "abcdefabc", true);
2405
2406 pattern = Pattern.compile("(abc)(def)\\3");
2407 check(pattern, "abcdefabc", false);
2408
2409 try {
2410 for (int i = 1; i < 10; i++) {
2411 // Make sure backref 1-9 are always accepted
2412 pattern = Pattern.compile("abcdef\\" + i);
2413 // and fail to match if the target group does not exit
2414 check(pattern, "abcdef", false);
2415 }
2416 } catch(PatternSyntaxException e) {
2417 failCount++;
2418 }
2419
2420 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2421 check(pattern, "abcdefghija", false);
2422 check(pattern, "abcdefghija1", true);
2423
2424 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2425 check(pattern, "abcdefghijkk", true);
2426
2427 pattern = Pattern.compile("(a)bcdefghij\\11");
2428 check(pattern, "abcdefghija1", true);
2429
2430 // Supplementary character tests
2431 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2432 check(pattern, toSupplementaries("zzzaabcazzz"), true);
2433
2434 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2435 check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2436
2437 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2438 check(pattern, toSupplementaries("abcdefabc"), true);
2439
2440 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2441 check(pattern, toSupplementaries("abcdefabc"), false);
2442
2443 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2444 check(pattern, toSupplementaries("abcdefghija"), false);
2445 check(pattern, toSupplementaries("abcdefghija1"), true);
2446
2447 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2448 check(pattern, toSupplementaries("abcdefghijkk"), true);
2449
2450 report("BackRef");
2451 }
2452
2453 /**
2454 * Unicode Technical Report #18, section 2.6 End of Line
2455 * There is no empty line to be matched in the sequence \u000D\u000A
2456 * but there is an empty line in the sequence \u000A\u000D.
2457 */
2458 private static void anchorTest() throws Exception {
2459 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2460 Matcher m = p.matcher("blah1\r\nblah2");
2461 m.find();
2462 m.find();
2463 if (!m.group().equals("blah2"))
2464 failCount++;
2465
2466 m.reset("blah1\n\rblah2");
2467 m.find();
2468 m.find();
2469 m.find();
2470 if (!m.group().equals("blah2"))
2471 failCount++;
2472
2473 // Test behavior of $ with \r\n at end of input
2474 p = Pattern.compile(".+$");
2475 m = p.matcher("blah1\r\n");
2476 if (!m.find())
2477 failCount++;
2478 if (!m.group().equals("blah1"))
2479 failCount++;
2480 if (m.find())
2481 failCount++;
2482
2483 // Test behavior of $ with \r\n at end of input in multiline
2484 p = Pattern.compile(".+$", Pattern.MULTILINE);
2485 m = p.matcher("blah1\r\n");
2486 if (!m.find())
2487 failCount++;
2488 if (m.find())
2489 failCount++;
2490
2491 // Test for $ recognition of \u0085 for bug 4527731
2492 p = Pattern.compile(".+$", Pattern.MULTILINE);
2493 m = p.matcher("blah1\u0085");
2494 if (!m.find())
2495 failCount++;
2496
2497 // Supplementary character test
2498 p = Pattern.compile("^.*$", Pattern.MULTILINE);
2499 m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2500 m.find();
2501 m.find();
2502 if (!m.group().equals(toSupplementaries("blah2")))
2503 failCount++;
2504
2505 m.reset(toSupplementaries("blah1\n\rblah2"));
2506 m.find();
2507 m.find();
2508 m.find();
2509 if (!m.group().equals(toSupplementaries("blah2")))
2510 failCount++;
2511
2512 // Test behavior of $ with \r\n at end of input
2513 p = Pattern.compile(".+$");
2514 m = p.matcher(toSupplementaries("blah1\r\n"));
2515 if (!m.find())
2516 failCount++;
2517 if (!m.group().equals(toSupplementaries("blah1")))
2518 failCount++;
2519 if (m.find())
2520 failCount++;
2521
2522 // Test behavior of $ with \r\n at end of input in multiline
2523 p = Pattern.compile(".+$", Pattern.MULTILINE);
2524 m = p.matcher(toSupplementaries("blah1\r\n"));
2525 if (!m.find())
2526 failCount++;
2527 if (m.find())
2528 failCount++;
2529
2530 // Test for $ recognition of \u0085 for bug 4527731
2531 p = Pattern.compile(".+$", Pattern.MULTILINE);
2532 m = p.matcher(toSupplementaries("blah1\u0085"));
2533 if (!m.find())
2534 failCount++;
2535
2536 report("Anchors");
2537 }
2538
2539 /**
2540 * A basic sanity test of Matcher.lookingAt().
2541 */
2542 private static void lookingAtTest() throws Exception {
2543 Pattern p = Pattern.compile("(ab)(c*)");
2544 Matcher m = p.matcher("abccczzzabcczzzabccc");
2545
2546 if (!m.lookingAt())
2547 failCount++;
2548
2549 if (!m.group().equals(m.group(0)))
2550 failCount++;
2551
2552 m = p.matcher("zzzabccczzzabcczzzabccczzz");
2553 if (m.lookingAt())
2554 failCount++;
2555
2556 // Supplementary character test
2557 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2558 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2559
2560 if (!m.lookingAt())
2561 failCount++;
2562
2563 if (!m.group().equals(m.group(0)))
2564 failCount++;
2565
2566 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2567 if (m.lookingAt())
2568 failCount++;
2569
2570 report("Looking At");
2571 }
2572
2573 /**
2574 * A basic sanity test of Matcher.matches().
2575 */
2576 private static void matchesTest() throws Exception {
2577 // matches()
2578 Pattern p = Pattern.compile("ulb(c*)");
2579 Matcher m = p.matcher("ulbcccccc");
2580 if (!m.matches())
2581 failCount++;
2582
2583 // find() but not matches()
2584 m.reset("zzzulbcccccc");
2585 if (m.matches())
2586 failCount++;
2587
2588 // lookingAt() but not matches()
2589 m.reset("ulbccccccdef");
2590 if (m.matches())
2591 failCount++;
2592
2593 // matches()
2594 p = Pattern.compile("a|ad");
2595 m = p.matcher("ad");
2596 if (!m.matches())
2597 failCount++;
2598
2599 // Supplementary character test
2600 // matches()
2601 p = Pattern.compile(toSupplementaries("ulb(c*)"));
2602 m = p.matcher(toSupplementaries("ulbcccccc"));
2603 if (!m.matches())
2604 failCount++;
2605
2606 // find() but not matches()
2607 m.reset(toSupplementaries("zzzulbcccccc"));
2608 if (m.matches())
2609 failCount++;
2610
2611 // lookingAt() but not matches()
2612 m.reset(toSupplementaries("ulbccccccdef"));
2613 if (m.matches())
2614 failCount++;
2615
2616 // matches()
2617 p = Pattern.compile(toSupplementaries("a|ad"));
2618 m = p.matcher(toSupplementaries("ad"));
2619 if (!m.matches())
2620 failCount++;
2621
2622 report("Matches");
2623 }
2624
2625 /**
2626 * A basic sanity test of Pattern.matches().
2627 */
2628 private static void patternMatchesTest() throws Exception {
2629 // matches()
2630 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2631 toSupplementaries("ulbcccccc")))
2632 failCount++;
2633
2634 // find() but not matches()
2635 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2636 toSupplementaries("zzzulbcccccc")))
2637 failCount++;
2638
2639 // lookingAt() but not matches()
2640 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2641 toSupplementaries("ulbccccccdef")))
2642 failCount++;
2643
2644 // Supplementary character test
2645 // matches()
2646 if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2647 toSupplementaries("ulbcccccc")))
2648 failCount++;
2649
2650 // find() but not matches()
2651 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2652 toSupplementaries("zzzulbcccccc")))
2653 failCount++;
2654
2655 // lookingAt() but not matches()
2656 if (Pattern.matches(toSupplementaries("ulb(c*)"),
2657 toSupplementaries("ulbccccccdef")))
2658 failCount++;
2659
2660 report("Pattern Matches");
2661 }
2662
2663 /**
2664 * Canonical equivalence testing. Tests the ability of the engine
2665 * to match sequences that are not explicitly specified in the
2666 * pattern when they are considered equivalent by the Unicode Standard.
2667 */
2668 private static void ceTest() throws Exception {
2669 // Decomposed char outside char classes
2670 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2671 Matcher m = p.matcher("test\u00e5");
2672 if (!m.matches())
2673 failCount++;
2674
2675 m.reset("testa\u030a");
2676 if (!m.matches())
2677 failCount++;
2678
2679 // Composed char outside char classes
2680 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2681 m = p.matcher("test\u00e5");
2682 if (!m.matches())
2683 failCount++;
2684
2685 m.reset("testa\u030a");
2686 if (!m.find())
2687 failCount++;
2688
2689 // Decomposed char inside a char class
2690 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2691 m = p.matcher("test\u00e5");
2692 if (!m.find())
2693 failCount++;
2694
2695 m.reset("testa\u030a");
2696 if (!m.find())
2697 failCount++;
2698
2699 // Composed char inside a char class
2700 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2701 m = p.matcher("test\u00e5");
2702 if (!m.find())
2703 failCount++;
2704
2705 m.reset("testa\u0300");
2706 if (!m.find())
2707 failCount++;
2708
2709 m.reset("testa\u030a");
2710 if (!m.find())
2711 failCount++;
2712
2713 // Marks that cannot legally change order and be equivalent
2714 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2715 check(p, "testa\u0308\u0300", true);
2716 check(p, "testa\u0300\u0308", false);
2717
2718 // Marks that can legally change order and be equivalent
2719 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2720 check(p, "testa\u0308\u0323", true);
2721 check(p, "testa\u0323\u0308", true);
2722
2723 // Test all equivalences of the sequence a\u0308\u0323\u0300
2724 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2725 check(p, "testa\u0308\u0323\u0300", true);
2726 check(p, "testa\u0323\u0308\u0300", true);
2727 check(p, "testa\u0308\u0300\u0323", true);
2728 check(p, "test\u00e4\u0323\u0300", true);
2729 check(p, "test\u00e4\u0300\u0323", true);
2730
2731 Object[][] data = new Object[][] {
2732
2733 // JDK-4867170
2734 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true },
2735 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true },
2736 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true },
2737 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2738 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2739 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true },
2740 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true },
2741
2742 { "\\p{IsGreek}", "ab\u1f80cd", "f", true },
2743 { "\\p{IsGreek}", "ab\u1f81cd", "f", true },
2744 { "\\p{IsGreek}", "ab\u1f82cd", "f", true },
2745 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true },
2746 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true },
2747
2748 // backtracking, force to match "\u1f80", instead of \u1f82"
2749 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2750
2751 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true },
2752 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true },
2753
2754 { "[^\u1f80-\u1f82]","\u1f81", "m", false },
2755 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false },
2756 { "[^\u1f01\u0345]", "\u1f81", "f", false },
2757
2758 { "[^\u1f81]+", "\u1f80\u1f82", "f", true },
2759 { "[\u1f80]", "ab\u1f80cd", "f", true },
2760 { "\u1f80", "ab\u1f80cd", "f", true },
2761 { "\u1f00\u0345\u0300", "\u1f82", "m", true },
2762 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true },
2763 { "\u1f82", "\u1f00\u0345\u0300", "m", true },
2764 { "\u1f82", "\u1f80\u0300", "m", true },
2765
2766 // JDK-7080302 # compile failed
2767 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2768
2769 // JDK-6728861, same cause as above one
2770 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2771
2772 // JDK-6995635
2773 { "(\u00e9)", "e\u0301", "m", true },
2774
2775 // JDK-6736245
2776 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2777 { "\u2ADC", "\u2ADC", "m", true}, // NFC
2778 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD
2779
2780 // 4916384.
2781 // Decomposed hangul (jamos) works inside clazz
2782 { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2783 { "[\u1100\u1161]", "\uac00", "m", true},
2784
2785 { "[\uac00]", "\u1100\u1161", "m", true},
2786 { "[\uac00]", "\uac00", "m", true},
2787
2788 // Decomposed hangul (jamos)
2789 { "\u1100\u1161", "\u1100\u1161", "m", true},
2790 { "\u1100\u1161", "\uac00", "m", true},
2791
2792 // Composed hangul
2793 { "\uac00", "\u1100\u1161", "m", true },
2794 { "\uac00", "\uac00", "m", true },
2795
2796 /* Need a NFDSlice to nfd the source to solve this issue
2797 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2798 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165>
2799 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2800
2801 // Decomposed supplementary outside char classes
2802 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2803 // Composed supplementary outside char classes
2804 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2805 */
2806 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2807 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2808
2809 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true },
2810 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2811 };
2812
2813 int failCount = 0;
2814 for (Object[] d : data) {
2815 String pn = (String)d[0];
2816 String tt = (String)d[1];
2817 boolean isFind = "f".equals(((String)d[2]));
2818 boolean expected = (boolean)d[3];
2819 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2820 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2821 if (ret != expected) {
2822 failCount++;
2823 continue;
2824 }
2825 }
2826 report("Canonical Equivalence");
2827 }
2828
2829 /**
2830 * A basic sanity test of Matcher.replaceAll().
2831 */
2832 private static void globalSubstitute() throws Exception {
2833 // Global substitution with a literal
2834 Pattern p = Pattern.compile("(ab)(c*)");
2835 Matcher m = p.matcher("abccczzzabcczzzabccc");
2836 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2837 failCount++;
2838
2839 m.reset("zzzabccczzzabcczzzabccczzz");
2840 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2841 failCount++;
2842
2843 // Global substitution with groups
2844 m.reset("zzzabccczzzabcczzzabccczzz");
2845 String result = m.replaceAll("$1");
2846 if (!result.equals("zzzabzzzabzzzabzzz"))
2847 failCount++;
2848
2849 // Supplementary character test
2850 // Global substitution with a literal
2851 p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2852 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2853 if (!m.replaceAll(toSupplementaries("test")).
2854 equals(toSupplementaries("testzzztestzzztest")))
2855 failCount++;
2856
2857 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2858 if (!m.replaceAll(toSupplementaries("test")).
2859 equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2860 failCount++;
2861
2862 // Global substitution with groups
2863 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2864 result = m.replaceAll("$1");
2865 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2866 failCount++;
2867
2868 report("Global Substitution");
2869 }
2870
2871 /**
2872 * Tests the usage of Matcher.appendReplacement() with literal
2873 * and group substitutions.
2874 */
2875 private static void stringbufferSubstitute() throws Exception {
2876 // SB substitution with literal
2877 String blah = "zzzblahzzz";
2878 Pattern p = Pattern.compile("blah");
2879 Matcher m = p.matcher(blah);
2880 StringBuffer result = new StringBuffer();
2881 try {
2882 m.appendReplacement(result, "blech");
2883 failCount++;
2884 } catch (IllegalStateException e) {
2885 }
2886 m.find();
2887 m.appendReplacement(result, "blech");
2888 if (!result.toString().equals("zzzblech"))
2889 failCount++;
2890
2891 m.appendTail(result);
2892 if (!result.toString().equals("zzzblechzzz"))
2893 failCount++;
2894
2895 // SB substitution with groups
2896 blah = "zzzabcdzzz";
2897 p = Pattern.compile("(ab)(cd)*");
2898 m = p.matcher(blah);
2899 result = new StringBuffer();
2900 try {
2901 m.appendReplacement(result, "$1");
2902 failCount++;
2903 } catch (IllegalStateException e) {
2904 }
2905 m.find();
2906 m.appendReplacement(result, "$1");
2907 if (!result.toString().equals("zzzab"))
2908 failCount++;
2909
2910 m.appendTail(result);
2911 if (!result.toString().equals("zzzabzzz"))
2912 failCount++;
2913
2914 // SB substitution with 3 groups
2915 blah = "zzzabcdcdefzzz";
2916 p = Pattern.compile("(ab)(cd)*(ef)");
2917 m = p.matcher(blah);
2918 result = new StringBuffer();
2919 try {
2920 m.appendReplacement(result, "$1w$2w$3");
2921 failCount++;
2922 } catch (IllegalStateException e) {
2923 }
2924 m.find();
2925 m.appendReplacement(result, "$1w$2w$3");
2926 if (!result.toString().equals("zzzabwcdwef"))
2927 failCount++;
2928
2929 m.appendTail(result);
2930 if (!result.toString().equals("zzzabwcdwefzzz"))
2931 failCount++;
2932
2933 // SB substitution with groups and three matches
2934 // skipping middle match
2935 blah = "zzzabcdzzzabcddzzzabcdzzz";
2936 p = Pattern.compile("(ab)(cd*)");
2937 m = p.matcher(blah);
2938 result = new StringBuffer();
2939 try {
2940 m.appendReplacement(result, "$1");
2941 failCount++;
2942 } catch (IllegalStateException e) {
2943 }
2944 m.find();
2945 m.appendReplacement(result, "$1");
2946 if (!result.toString().equals("zzzab"))
2947 failCount++;
2948
2949 m.find();
2950 m.find();
2951 m.appendReplacement(result, "$2");
2952 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2953 failCount++;
2954
2955 m.appendTail(result);
2956 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2957 failCount++;
2958
2959 // Check to make sure escaped $ is ignored
2960 blah = "zzzabcdcdefzzz";
2961 p = Pattern.compile("(ab)(cd)*(ef)");
2962 m = p.matcher(blah);
2963 result = new StringBuffer();
2964 m.find();
2965 m.appendReplacement(result, "$1w\\$2w$3");
2966 if (!result.toString().equals("zzzabw$2wef"))
2967 failCount++;
2968
2969 m.appendTail(result);
2970 if (!result.toString().equals("zzzabw$2wefzzz"))
2971 failCount++;
2972
2973 // Check to make sure a reference to nonexistent group causes error
2974 blah = "zzzabcdcdefzzz";
2975 p = Pattern.compile("(ab)(cd)*(ef)");
2976 m = p.matcher(blah);
2977 result = new StringBuffer();
2978 m.find();
2979 try {
2980 m.appendReplacement(result, "$1w$5w$3");
2981 failCount++;
2982 } catch (IndexOutOfBoundsException ioobe) {
2983 // Correct result
2984 }
2985
2986 // Check double digit group references
2987 blah = "zzz123456789101112zzz";
2988 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2989 m = p.matcher(blah);
2990 result = new StringBuffer();
2991 m.find();
2992 m.appendReplacement(result, "$1w$11w$3");
2993 if (!result.toString().equals("zzz1w11w3"))
2994 failCount++;
2995
2996 // Check to make sure it backs off $15 to $1 if only three groups
2997 blah = "zzzabcdcdefzzz";
2998 p = Pattern.compile("(ab)(cd)*(ef)");
2999 m = p.matcher(blah);
3000 result = new StringBuffer();
3001 m.find();
3002 m.appendReplacement(result, "$1w$15w$3");
3003 if (!result.toString().equals("zzzabwab5wef"))
3004 failCount++;
3005
3006
3007 // Supplementary character test
3008 // SB substitution with literal
3009 blah = toSupplementaries("zzzblahzzz");
3010 p = Pattern.compile(toSupplementaries("blah"));
3011 m = p.matcher(blah);
3012 result = new StringBuffer();
3013 try {
3014 m.appendReplacement(result, toSupplementaries("blech"));
3015 failCount++;
3016 } catch (IllegalStateException e) {
3017 }
3018 m.find();
3019 m.appendReplacement(result, toSupplementaries("blech"));
3020 if (!result.toString().equals(toSupplementaries("zzzblech")))
3021 failCount++;
3022
3023 m.appendTail(result);
3024 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3025 failCount++;
3026
3027 // SB substitution with groups
3028 blah = toSupplementaries("zzzabcdzzz");
3029 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3030 m = p.matcher(blah);
3031 result = new StringBuffer();
3032 try {
3033 m.appendReplacement(result, "$1");
3034 failCount++;
3035 } catch (IllegalStateException e) {
3036 }
3037 m.find();
3038 m.appendReplacement(result, "$1");
3039 if (!result.toString().equals(toSupplementaries("zzzab")))
3040 failCount++;
3041
3042 m.appendTail(result);
3043 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3044 failCount++;
3045
3046 // SB substitution with 3 groups
3047 blah = toSupplementaries("zzzabcdcdefzzz");
3048 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3049 m = p.matcher(blah);
3050 result = new StringBuffer();
3051 try {
3052 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3053 failCount++;
3054 } catch (IllegalStateException e) {
3055 }
3056 m.find();
3057 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3058 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3059 failCount++;
3060
3061 m.appendTail(result);
3062 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3063 failCount++;
3064
3065 // SB substitution with groups and three matches
3066 // skipping middle match
3067 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3068 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3069 m = p.matcher(blah);
3070 result = new StringBuffer();
3071 try {
3072 m.appendReplacement(result, "$1");
3073 failCount++;
3074 } catch (IllegalStateException e) {
3075 }
3076 m.find();
3077 m.appendReplacement(result, "$1");
3078 if (!result.toString().equals(toSupplementaries("zzzab")))
3079 failCount++;
3080
3081 m.find();
3082 m.find();
3083 m.appendReplacement(result, "$2");
3084 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3085 failCount++;
3086
3087 m.appendTail(result);
3088 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3089 failCount++;
3090
3091 // Check to make sure escaped $ is ignored
3092 blah = toSupplementaries("zzzabcdcdefzzz");
3093 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3094 m = p.matcher(blah);
3095 result = new StringBuffer();
3096 m.find();
3097 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3098 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3099 failCount++;
3100
3101 m.appendTail(result);
3102 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3103 failCount++;
3104
3105 // Check to make sure a reference to nonexistent group causes error
3106 blah = toSupplementaries("zzzabcdcdefzzz");
3107 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3108 m = p.matcher(blah);
3109 result = new StringBuffer();
3110 m.find();
3111 try {
3112 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3113 failCount++;
3114 } catch (IndexOutOfBoundsException ioobe) {
3115 // Correct result
3116 }
3117
3118 // Check double digit group references
3119 blah = toSupplementaries("zzz123456789101112zzz");
3120 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3121 m = p.matcher(blah);
3122 result = new StringBuffer();
3123 m.find();
3124 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3125 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3126 failCount++;
3127
3128 // Check to make sure it backs off $15 to $1 if only three groups
3129 blah = toSupplementaries("zzzabcdcdefzzz");
3130 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3131 m = p.matcher(blah);
3132 result = new StringBuffer();
3133 m.find();
3134 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3135 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3136 failCount++;
3137
3138 // Check nothing has been appended into the output buffer if
3139 // the replacement string triggers IllegalArgumentException.
3140 p = Pattern.compile("(abc)");
3141 m = p.matcher("abcd");
3142 result = new StringBuffer();
3143 m.find();
3144 try {
3145 m.appendReplacement(result, ("xyz$g"));
3146 failCount++;
3147 } catch (IllegalArgumentException iae) {
3148 if (result.length() != 0)
3149 failCount++;
3150 }
3151
3152 report("SB Substitution");
3153 }
3154
3155 /**
3156 * Tests the usage of Matcher.appendReplacement() with literal
3157 * and group substitutions.
3158 */
3159 private static void stringbuilderSubstitute() throws Exception {
3160 // SB substitution with literal
3161 String blah = "zzzblahzzz";
3162 Pattern p = Pattern.compile("blah");
3163 Matcher m = p.matcher(blah);
3164 StringBuilder result = new StringBuilder();
3165 try {
3166 m.appendReplacement(result, "blech");
3167 failCount++;
3168 } catch (IllegalStateException e) {
3169 }
3170 m.find();
3171 m.appendReplacement(result, "blech");
3172 if (!result.toString().equals("zzzblech"))
3173 failCount++;
3174
3175 m.appendTail(result);
3176 if (!result.toString().equals("zzzblechzzz"))
3177 failCount++;
3178
3179 // SB substitution with groups
3180 blah = "zzzabcdzzz";
3181 p = Pattern.compile("(ab)(cd)*");
3182 m = p.matcher(blah);
3183 result = new StringBuilder();
3184 try {
3185 m.appendReplacement(result, "$1");
3186 failCount++;
3187 } catch (IllegalStateException e) {
3188 }
3189 m.find();
3190 m.appendReplacement(result, "$1");
3191 if (!result.toString().equals("zzzab"))
3192 failCount++;
3193
3194 m.appendTail(result);
3195 if (!result.toString().equals("zzzabzzz"))
3196 failCount++;
3197
3198 // SB substitution with 3 groups
3199 blah = "zzzabcdcdefzzz";
3200 p = Pattern.compile("(ab)(cd)*(ef)");
3201 m = p.matcher(blah);
3202 result = new StringBuilder();
3203 try {
3204 m.appendReplacement(result, "$1w$2w$3");
3205 failCount++;
3206 } catch (IllegalStateException e) {
3207 }
3208 m.find();
3209 m.appendReplacement(result, "$1w$2w$3");
3210 if (!result.toString().equals("zzzabwcdwef"))
3211 failCount++;
3212
3213 m.appendTail(result);
3214 if (!result.toString().equals("zzzabwcdwefzzz"))
3215 failCount++;
3216
3217 // SB substitution with groups and three matches
3218 // skipping middle match
3219 blah = "zzzabcdzzzabcddzzzabcdzzz";
3220 p = Pattern.compile("(ab)(cd*)");
3221 m = p.matcher(blah);
3222 result = new StringBuilder();
3223 try {
3224 m.appendReplacement(result, "$1");
3225 failCount++;
3226 } catch (IllegalStateException e) {
3227 }
3228 m.find();
3229 m.appendReplacement(result, "$1");
3230 if (!result.toString().equals("zzzab"))
3231 failCount++;
3232
3233 m.find();
3234 m.find();
3235 m.appendReplacement(result, "$2");
3236 if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3237 failCount++;
3238
3239 m.appendTail(result);
3240 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3241 failCount++;
3242
3243 // Check to make sure escaped $ is ignored
3244 blah = "zzzabcdcdefzzz";
3245 p = Pattern.compile("(ab)(cd)*(ef)");
3246 m = p.matcher(blah);
3247 result = new StringBuilder();
3248 m.find();
3249 m.appendReplacement(result, "$1w\\$2w$3");
3250 if (!result.toString().equals("zzzabw$2wef"))
3251 failCount++;
3252
3253 m.appendTail(result);
3254 if (!result.toString().equals("zzzabw$2wefzzz"))
3255 failCount++;
3256
3257 // Check to make sure a reference to nonexistent group causes error
3258 blah = "zzzabcdcdefzzz";
3259 p = Pattern.compile("(ab)(cd)*(ef)");
3260 m = p.matcher(blah);
3261 result = new StringBuilder();
3262 m.find();
3263 try {
3264 m.appendReplacement(result, "$1w$5w$3");
3265 failCount++;
3266 } catch (IndexOutOfBoundsException ioobe) {
3267 // Correct result
3268 }
3269
3270 // Check double digit group references
3271 blah = "zzz123456789101112zzz";
3272 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3273 m = p.matcher(blah);
3274 result = new StringBuilder();
3275 m.find();
3276 m.appendReplacement(result, "$1w$11w$3");
3277 if (!result.toString().equals("zzz1w11w3"))
3278 failCount++;
3279
3280 // Check to make sure it backs off $15 to $1 if only three groups
3281 blah = "zzzabcdcdefzzz";
3282 p = Pattern.compile("(ab)(cd)*(ef)");
3283 m = p.matcher(blah);
3284 result = new StringBuilder();
3285 m.find();
3286 m.appendReplacement(result, "$1w$15w$3");
3287 if (!result.toString().equals("zzzabwab5wef"))
3288 failCount++;
3289
3290
3291 // Supplementary character test
3292 // SB substitution with literal
3293 blah = toSupplementaries("zzzblahzzz");
3294 p = Pattern.compile(toSupplementaries("blah"));
3295 m = p.matcher(blah);
3296 result = new StringBuilder();
3297 try {
3298 m.appendReplacement(result, toSupplementaries("blech"));
3299 failCount++;
3300 } catch (IllegalStateException e) {
3301 }
3302 m.find();
3303 m.appendReplacement(result, toSupplementaries("blech"));
3304 if (!result.toString().equals(toSupplementaries("zzzblech")))
3305 failCount++;
3306 m.appendTail(result);
3307 if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3308 failCount++;
3309
3310 // SB substitution with groups
3311 blah = toSupplementaries("zzzabcdzzz");
3312 p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3313 m = p.matcher(blah);
3314 result = new StringBuilder();
3315 try {
3316 m.appendReplacement(result, "$1");
3317 failCount++;
3318 } catch (IllegalStateException e) {
3319 }
3320 m.find();
3321 m.appendReplacement(result, "$1");
3322 if (!result.toString().equals(toSupplementaries("zzzab")))
3323 failCount++;
3324
3325 m.appendTail(result);
3326 if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3327 failCount++;
3328
3329 // SB substitution with 3 groups
3330 blah = toSupplementaries("zzzabcdcdefzzz");
3331 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3332 m = p.matcher(blah);
3333 result = new StringBuilder();
3334 try {
3335 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3336 failCount++;
3337 } catch (IllegalStateException e) {
3338 }
3339 m.find();
3340 m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3341 if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3342 failCount++;
3343
3344 m.appendTail(result);
3345 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3346 failCount++;
3347
3348 // SB substitution with groups and three matches
3349 // skipping middle match
3350 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3351 p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3352 m = p.matcher(blah);
3353 result = new StringBuilder();
3354 try {
3355 m.appendReplacement(result, "$1");
3356 failCount++;
3357 } catch (IllegalStateException e) {
3358 }
3359 m.find();
3360 m.appendReplacement(result, "$1");
3361 if (!result.toString().equals(toSupplementaries("zzzab")))
3362 failCount++;
3363
3364 m.find();
3365 m.find();
3366 m.appendReplacement(result, "$2");
3367 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3368 failCount++;
3369
3370 m.appendTail(result);
3371 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3372 failCount++;
3373
3374 // Check to make sure escaped $ is ignored
3375 blah = toSupplementaries("zzzabcdcdefzzz");
3376 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3377 m = p.matcher(blah);
3378 result = new StringBuilder();
3379 m.find();
3380 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3381 if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3382 failCount++;
3383
3384 m.appendTail(result);
3385 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3386 failCount++;
3387
3388 // Check to make sure a reference to nonexistent group causes error
3389 blah = toSupplementaries("zzzabcdcdefzzz");
3390 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3391 m = p.matcher(blah);
3392 result = new StringBuilder();
3393 m.find();
3394 try {
3395 m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3396 failCount++;
3397 } catch (IndexOutOfBoundsException ioobe) {
3398 // Correct result
3399 }
3400 // Check double digit group references
3401 blah = toSupplementaries("zzz123456789101112zzz");
3402 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3403 m = p.matcher(blah);
3404 result = new StringBuilder();
3405 m.find();
3406 m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3407 if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3408 failCount++;
3409
3410 // Check to make sure it backs off $15 to $1 if only three groups
3411 blah = toSupplementaries("zzzabcdcdefzzz");
3412 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3413 m = p.matcher(blah);
3414 result = new StringBuilder();
3415 m.find();
3416 m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3417 if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3418 failCount++;
3419 // Check nothing has been appended into the output buffer if
3420 // the replacement string triggers IllegalArgumentException.
3421 p = Pattern.compile("(abc)");
3422 m = p.matcher("abcd");
3423 result = new StringBuilder();
3424 m.find();
3425 try {
3426 m.appendReplacement(result, ("xyz$g"));
3427 failCount++;
3428 } catch (IllegalArgumentException iae) {
3429 if (result.length() != 0)
3430 failCount++;
3431 }
3432 report("SB Substitution 2");
3433 }
3434
3435 /*
3436 * 5 groups of characters are created to make a substitution string.
3437 * A base string will be created including random lead chars, the
3438 * substitution string, and random trailing chars.
3439 * A pattern containing the 5 groups is searched for and replaced with:
3440 * random group + random string + random group.
3441 * The results are checked for correctness.
3442 */
3443 private static void substitutionBasher() {
3444 for (int runs = 0; runs<1000; runs++) {
3445 // Create a base string to work in
3446 int leadingChars = generator.nextInt(10);
3447 StringBuffer baseBuffer = new StringBuffer(100);
3448 String leadingString = getRandomAlphaString(leadingChars);
3449 baseBuffer.append(leadingString);
3450
3451 // Create 5 groups of random number of random chars
3452 // Create the string to substitute
3453 // Create the pattern string to search for
3454 StringBuffer bufferToSub = new StringBuffer(25);
3455 StringBuffer bufferToPat = new StringBuffer(50);
3456 String[] groups = new String[5];
3457 for(int i=0; i<5; i++) {
3458 int aGroupSize = generator.nextInt(5)+1;
3459 groups[i] = getRandomAlphaString(aGroupSize);
3460 bufferToSub.append(groups[i]);
3461 bufferToPat.append('(');
3462 bufferToPat.append(groups[i]);
3463 bufferToPat.append(')');
3464 }
3465 String stringToSub = bufferToSub.toString();
3466 String pattern = bufferToPat.toString();
3467
3468 // Place sub string into working string at random index
3469 baseBuffer.append(stringToSub);
3470
3471 // Append random chars to end
3472 int trailingChars = generator.nextInt(10);
3473 String trailingString = getRandomAlphaString(trailingChars);
3474 baseBuffer.append(trailingString);
3475 String baseString = baseBuffer.toString();
3476
3477 // Create test pattern and matcher
3478 Pattern p = Pattern.compile(pattern);
3479 Matcher m = p.matcher(baseString);
3480
3481 // Reject candidate if pattern happens to start early
3482 m.find();
3483 if (m.start() < leadingChars)
3484 continue;
3485
3486 // Reject candidate if more than one match
3487 if (m.find())
3488 continue;
3489
3490 // Construct a replacement string with :
3491 // random group + random string + random group
3492 StringBuffer bufferToRep = new StringBuffer();
3493 int groupIndex1 = generator.nextInt(5);
3494 bufferToRep.append("$" + (groupIndex1 + 1));
3495 String randomMidString = getRandomAlphaString(5);
3496 bufferToRep.append(randomMidString);
3497 int groupIndex2 = generator.nextInt(5);
3498 bufferToRep.append("$" + (groupIndex2 + 1));
3499 String replacement = bufferToRep.toString();
3500
3501 // Do the replacement
3502 String result = m.replaceAll(replacement);
3503
3504 // Construct expected result
3505 StringBuffer bufferToRes = new StringBuffer();
3506 bufferToRes.append(leadingString);
3507 bufferToRes.append(groups[groupIndex1]);
3508 bufferToRes.append(randomMidString);
3509 bufferToRes.append(groups[groupIndex2]);
3510 bufferToRes.append(trailingString);
3511 String expectedResult = bufferToRes.toString();
3512
3513 // Check results
3514 if (!result.equals(expectedResult))
3515 failCount++;
3516 }
3517
3518 report("Substitution Basher");
3519 }
3520
3521 /*
3522 * 5 groups of characters are created to make a substitution string.
3523 * A base string will be created including random lead chars, the
3524 * substitution string, and random trailing chars.
3525 * A pattern containing the 5 groups is searched for and replaced with:
3526 * random group + random string + random group.
3527 * The results are checked for correctness.
3528 */
3529 private static void substitutionBasher2() {
3530 for (int runs = 0; runs<1000; runs++) {
3531 // Create a base string to work in
3532 int leadingChars = generator.nextInt(10);
3533 StringBuilder baseBuffer = new StringBuilder(100);
3534 String leadingString = getRandomAlphaString(leadingChars);
3535 baseBuffer.append(leadingString);
3536
3537 // Create 5 groups of random number of random chars
3538 // Create the string to substitute
3539 // Create the pattern string to search for
3540 StringBuilder bufferToSub = new StringBuilder(25);
3541 StringBuilder bufferToPat = new StringBuilder(50);
3542 String[] groups = new String[5];
3543 for(int i=0; i<5; i++) {
3544 int aGroupSize = generator.nextInt(5)+1;
3545 groups[i] = getRandomAlphaString(aGroupSize);
3546 bufferToSub.append(groups[i]);
3547 bufferToPat.append('(');
3548 bufferToPat.append(groups[i]);
3549 bufferToPat.append(')');
3550 }
3551 String stringToSub = bufferToSub.toString();
3552 String pattern = bufferToPat.toString();
3553
3554 // Place sub string into working string at random index
3555 baseBuffer.append(stringToSub);
3556
3557 // Append random chars to end
3558 int trailingChars = generator.nextInt(10);
3559 String trailingString = getRandomAlphaString(trailingChars);
3560 baseBuffer.append(trailingString);
3561 String baseString = baseBuffer.toString();
3562
3563 // Create test pattern and matcher
3564 Pattern p = Pattern.compile(pattern);
3565 Matcher m = p.matcher(baseString);
3566
3567 // Reject candidate if pattern happens to start early
3568 m.find();
3569 if (m.start() < leadingChars)
3570 continue;
3571
3572 // Reject candidate if more than one match
3573 if (m.find())
3574 continue;
3575
3576 // Construct a replacement string with :
3577 // random group + random string + random group
3578 StringBuilder bufferToRep = new StringBuilder();
3579 int groupIndex1 = generator.nextInt(5);
3580 bufferToRep.append("$" + (groupIndex1 + 1));
3581 String randomMidString = getRandomAlphaString(5);
3582 bufferToRep.append(randomMidString);
3583 int groupIndex2 = generator.nextInt(5);
3584 bufferToRep.append("$" + (groupIndex2 + 1));
3585 String replacement = bufferToRep.toString();
3586
3587 // Do the replacement
3588 String result = m.replaceAll(replacement);
3589
3590 // Construct expected result
3591 StringBuilder bufferToRes = new StringBuilder();
3592 bufferToRes.append(leadingString);
3593 bufferToRes.append(groups[groupIndex1]);
3594 bufferToRes.append(randomMidString);
3595 bufferToRes.append(groups[groupIndex2]);
3596 bufferToRes.append(trailingString);
3597 String expectedResult = bufferToRes.toString();
3598
3599 // Check results
3600 if (!result.equals(expectedResult)) {
3601 failCount++;
3602 }
3603 }
3604
3605 report("Substitution Basher 2");
3606 }
3607
3608 /**
3609 * Checks the handling of some escape sequences that the Pattern
3610 * class should process instead of the java compiler. These are
3611 * not in the file because the escapes should be be processed
3612 * by the Pattern class when the regex is compiled.
3613 */
3614 private static void escapes() throws Exception {
3615 Pattern p = Pattern.compile("\\043");
3616 Matcher m = p.matcher("#");
3617 if (!m.find())
3618 failCount++;
3619
3620 p = Pattern.compile("\\x23");
3621 m = p.matcher("#");
3622 if (!m.find())
3623 failCount++;
3624
3625 p = Pattern.compile("\\u0023");
3626 m = p.matcher("#");
3627 if (!m.find())
3628 failCount++;
3629
3630 report("Escape sequences");
3631 }
3632
3633 /**
3634 * Checks the handling of blank input situations. These
3635 * tests are incompatible with my test file format.
3636 */
3637 private static void blankInput() throws Exception {
3638 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3639 Matcher m = p.matcher("");
3640 if (m.find())
3641 failCount++;
3642
3643 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3644 m = p.matcher("");
3645 if (!m.find())
3646 failCount++;
3647
3648 p = Pattern.compile("abc");
3649 m = p.matcher("");
3650 if (m.find())
3651 failCount++;
3652
3653 p = Pattern.compile("a*");
3654 m = p.matcher("");
3655 if (!m.find())
3656 failCount++;
3657
3658 report("Blank input");
3659 }
3660
3661 /**
3662 * Tests the Boyer-Moore pattern matching of a character sequence
3663 * on randomly generated patterns.
3664 */
3665 private static void bm() throws Exception {
3666 doBnM('a');
3667 report("Boyer Moore (ASCII)");
3668
3669 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3670 report("Boyer Moore (Supplementary)");
3671 }
3672
3673 private static void doBnM(int baseCharacter) throws Exception {
3674 int achar=0;
3675
3676 for (int i=0; i<100; i++) {
3677 // Create a short pattern to search for
3678 int patternLength = generator.nextInt(7) + 4;
3679 StringBuffer patternBuffer = new StringBuffer(patternLength);
3680 String pattern;
3681 retry: for (;;) {
3682 for (int x=0; x<patternLength; x++) {
3683 int ch = baseCharacter + generator.nextInt(26);
3684 if (Character.isSupplementaryCodePoint(ch)) {
3685 patternBuffer.append(Character.toChars(ch));
3686 } else {
3687 patternBuffer.append((char)ch);
3688 }
3689 }
3690 pattern = patternBuffer.toString();
3691
3692 // Avoid patterns that start and end with the same substring
3693 // See JDK-6854417
3694 for (int x=1; x < pattern.length(); x++) {
3695 if (pattern.startsWith(pattern.substring(x)))
3696 continue retry;
3697 }
3698 break;
3699 }
3700 Pattern p = Pattern.compile(pattern);
3701
3702 // Create a buffer with random ASCII chars that does
3703 // not match the sample
3704 String toSearch = null;
3705 StringBuffer s = null;
3706 Matcher m = p.matcher("");
3707 do {
3708 s = new StringBuffer(100);
3709 for (int x=0; x<100; x++) {
3710 int ch = baseCharacter + generator.nextInt(26);
3711 if (Character.isSupplementaryCodePoint(ch)) {
3712 s.append(Character.toChars(ch));
3713 } else {
3714 s.append((char)ch);
3715 }
3716 }
3717 toSearch = s.toString();
3718 m.reset(toSearch);
3719 } while (m.find());
3720
3721 // Insert the pattern at a random spot
3722 int insertIndex = generator.nextInt(99);
3723 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3724 insertIndex++;
3725 s = s.insert(insertIndex, pattern);
3726 toSearch = s.toString();
3727
3728 // Make sure that the pattern is found
3729 m.reset(toSearch);
3730 if (!m.find())
3731 failCount++;
3732
3733 // Make sure that the match text is the pattern
3734 if (!m.group().equals(pattern))
3735 failCount++;
3736
3737 // Make sure match occured at insertion point
3738 if (m.start() != insertIndex)
3739 failCount++;
3740 }
3741 }
3742
3743 /**
3744 * Tests the matching of slices on randomly generated patterns.
3745 * The Boyer-Moore optimization is not done on these patterns
3746 * because it uses unicode case folding.
3747 */
3748 private static void slice() throws Exception {
3749 doSlice(Character.MAX_VALUE);
3750 report("Slice");
3751
3752 doSlice(Character.MAX_CODE_POINT);
3753 report("Slice (Supplementary)");
3754 }
3755
3756 private static void doSlice(int maxCharacter) throws Exception {
3757 Random generator = new Random();
3758 int achar=0;
3759
3760 for (int i=0; i<100; i++) {
3761 // Create a short pattern to search for
3762 int patternLength = generator.nextInt(7) + 4;
3763 StringBuffer patternBuffer = new StringBuffer(patternLength);
3764 for (int x=0; x<patternLength; x++) {
3765 int randomChar = 0;
3766 while (!Character.isLetterOrDigit(randomChar))
3767 randomChar = generator.nextInt(maxCharacter);
3768 if (Character.isSupplementaryCodePoint(randomChar)) {
3769 patternBuffer.append(Character.toChars(randomChar));
3770 } else {
3771 patternBuffer.append((char) randomChar);
3772 }
3773 }
3774 String pattern = patternBuffer.toString();
3775 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3776
3777 // Create a buffer with random chars that does not match the sample
3778 String toSearch = null;
3779 StringBuffer s = null;
3780 Matcher m = p.matcher("");
3781 do {
3782 s = new StringBuffer(100);
3783 for (int x=0; x<100; x++) {
3784 int randomChar = 0;
3785 while (!Character.isLetterOrDigit(randomChar))
3786 randomChar = generator.nextInt(maxCharacter);
3787 if (Character.isSupplementaryCodePoint(randomChar)) {
3788 s.append(Character.toChars(randomChar));
3789 } else {
3790 s.append((char) randomChar);
3791 }
3792 }
3793 toSearch = s.toString();
3794 m.reset(toSearch);
3795 } while (m.find());
3796
3797 // Insert the pattern at a random spot
3798 int insertIndex = generator.nextInt(99);
3799 if (Character.isLowSurrogate(s.charAt(insertIndex)))
3800 insertIndex++;
3801 s = s.insert(insertIndex, pattern);
3802 toSearch = s.toString();
3803
3804 // Make sure that the pattern is found
3805 m.reset(toSearch);
3806 if (!m.find())
3807 failCount++;
3808
3809 // Make sure that the match text is the pattern
3810 if (!m.group().equals(pattern))
3811 failCount++;
3812
3813 // Make sure match occured at insertion point
3814 if (m.start() != insertIndex)
3815 failCount++;
3816 }
3817 }
3818
3819 private static void explainFailure(String pattern, String data,
3820 String expected, String actual) {
3821 System.err.println("----------------------------------------");
3822 System.err.println("Pattern = "+pattern);
3823 System.err.println("Data = "+data);
3824 System.err.println("Expected = " + expected);
3825 System.err.println("Actual = " + actual);
3826 }
3827
3828 private static void explainFailure(String pattern, String data,
3829 Throwable t) {
3830 System.err.println("----------------------------------------");
3831 System.err.println("Pattern = "+pattern);
3832 System.err.println("Data = "+data);
3833 t.printStackTrace(System.err);
3834 }
3835
3836 // Testing examples from a file
3837
3838 /**
3839 * Goes through the file "TestCases.txt" and creates many patterns
3840 * described in the file, matching the patterns against input lines in
3841 * the file, and comparing the results against the correct results
3842 * also found in the file. The file format is described in comments
3843 * at the head of the file.
3844 */
3845 private static void processFile(String fileName) throws Exception {
3846 File testCases = new File(System.getProperty("test.src", "."),
3847 fileName);
3848 FileInputStream in = new FileInputStream(testCases);
3849 BufferedReader r = new BufferedReader(new InputStreamReader(in));
3850
3851 // Process next test case.
3852 String aLine;
3853 while((aLine = r.readLine()) != null) {
3854 // Read a line for pattern
3855 String patternString = grabLine(r);
3856 Pattern p = null;
3857 try {
3858 p = compileTestPattern(patternString);
3859 } catch (PatternSyntaxException e) {
3860 String dataString = grabLine(r);
3861 String expectedResult = grabLine(r);
3862 if (expectedResult.startsWith("error"))
3863 continue;
3864 explainFailure(patternString, dataString, e);
3865 failCount++;
3866 continue;
3867 }
3868
3869 // Read a line for input string
3870 String dataString = grabLine(r);
3871 Matcher m = p.matcher(dataString);
3872 StringBuffer result = new StringBuffer();
3873
3874 // Check for IllegalStateExceptions before a match
3875 failCount += preMatchInvariants(m);
3876
3877 boolean found = m.find();
3878
3879 if (found)
3880 failCount += postTrueMatchInvariants(m);
3881 else
3882 failCount += postFalseMatchInvariants(m);
3883
3884 if (found) {
3885 result.append("true ");
3886 result.append(m.group(0) + " ");
3887 } else {
3888 result.append("false ");
3889 }
3890
3891 result.append(m.groupCount());
3892
3893 if (found) {
3894 for (int i=1; i<m.groupCount()+1; i++)
3895 if (m.group(i) != null)
3896 result.append(" " +m.group(i));
3897 }
3898
3899 // Read a line for the expected result
3900 String expectedResult = grabLine(r);
3901
3902 if (!result.toString().equals(expectedResult)) {
3903 explainFailure(patternString, dataString, expectedResult, result.toString());
3904 failCount++;
3905 }
3906 }
3907
3908 report(fileName);
3909 }
3910
3911 private static int preMatchInvariants(Matcher m) {
3912 int failCount = 0;
3913 try {
3914 m.start();
3915 failCount++;
3916 } catch (IllegalStateException ise) {}
3917 try {
3918 m.end();
3919 failCount++;
3920 } catch (IllegalStateException ise) {}
3921 try {
3922 m.group();
3923 failCount++;
3924 } catch (IllegalStateException ise) {}
3925 return failCount;
3926 }
3927
3928 private static int postFalseMatchInvariants(Matcher m) {
3929 int failCount = 0;
3930 try {
3931 m.group();
3932 failCount++;
3933 } catch (IllegalStateException ise) {}
3934 try {
3935 m.start();
3936 failCount++;
3937 } catch (IllegalStateException ise) {}
3938 try {
3939 m.end();
3940 failCount++;
3941 } catch (IllegalStateException ise) {}
3942 return failCount;
3943 }
3944
3945 private static int postTrueMatchInvariants(Matcher m) {
3946 int failCount = 0;
3947 //assert(m.start() = m.start(0);
3948 if (m.start() != m.start(0))
3949 failCount++;
3950 //assert(m.end() = m.end(0);
3951 if (m.start() != m.start(0))
3952 failCount++;
3953 //assert(m.group() = m.group(0);
3954 if (!m.group().equals(m.group(0)))
3955 failCount++;
3956 try {
3957 m.group(50);
3958 failCount++;
3959 } catch (IndexOutOfBoundsException ise) {}
3960
3961 return failCount;
3962 }
3963
3964 private static Pattern compileTestPattern(String patternString) {
3965 if (!patternString.startsWith("'")) {
3966 return Pattern.compile(patternString);
3967 }
3968 int break1 = patternString.lastIndexOf("'");
3969 String flagString = patternString.substring(
3970 break1+1, patternString.length());
3971 patternString = patternString.substring(1, break1);
3972
3973 if (flagString.equals("i"))
3974 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3975
3976 if (flagString.equals("m"))
3977 return Pattern.compile(patternString, Pattern.MULTILINE);
3978
3979 return Pattern.compile(patternString);
3980 }
3981
3982 /**
3983 * Reads a line from the input file. Keeps reading lines until a non
3984 * empty non comment line is read. If the line contains a \n then
3985 * these two characters are replaced by a newline char. If a \\uxxxx
3986 * sequence is read then the sequence is replaced by the unicode char.
3987 */
3988 private static String grabLine(BufferedReader r) throws Exception {
3989 int index = 0;
3990 String line = r.readLine();
3991 while (line.startsWith("//") || line.length() < 1)
3992 line = r.readLine();
3993 while ((index = line.indexOf("\\n")) != -1) {
3994 StringBuffer temp = new StringBuffer(line);
3995 temp.replace(index, index+2, "\n");
3996 line = temp.toString();
3997 }
3998 while ((index = line.indexOf("\\u")) != -1) {
3999 StringBuffer temp = new StringBuffer(line);
4000 String value = temp.substring(index+2, index+6);
4001 char aChar = (char)Integer.parseInt(value, 16);
4002 String unicodeChar = "" + aChar;
4003 temp.replace(index, index+6, unicodeChar);
4004 line = temp.toString();
4005 }
4006
4007 return line;
4008 }
4009
4010 private static void check(Pattern p, String s, String g, String expected) {
4011 Matcher m = p.matcher(s);
4012 m.find();
4013 if (!m.group(g).equals(expected) ||
4014 s.charAt(m.start(g)) != expected.charAt(0) ||
4015 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
4016 failCount++;
4017 }
4018
4019 private static void checkReplaceFirst(String p, String s, String r, String expected)
4020 {
4021 if (!expected.equals(Pattern.compile(p)
4022 .matcher(s)
4023 .replaceFirst(r)))
4024 failCount++;
4025 }
4026
4027 private static void checkReplaceAll(String p, String s, String r, String expected)
4028 {
4029 if (!expected.equals(Pattern.compile(p)
4030 .matcher(s)
4031 .replaceAll(r)))
4032 failCount++;
4033 }
4034
4035 private static void checkExpectedFail(String p) {
4036 try {
4037 Pattern.compile(p);
4038 } catch (PatternSyntaxException pse) {
4039 //pse.printStackTrace();
4040 return;
4041 }
4042 failCount++;
4043 }
4044
4045 private static void checkExpectedIAE(Matcher m, String g) {
4046 m.find();
4047 try {
4048 m.group(g);
4049 } catch (IllegalArgumentException x) {
4050 //iae.printStackTrace();
4051 try {
4052 m.start(g);
4053 } catch (IllegalArgumentException xx) {
4054 try {
4055 m.start(g);
4056 } catch (IllegalArgumentException xxx) {
4057 return;
4058 }
4059 }
4060 }
4061 failCount++;
4062 }
4063
4064 private static void checkExpectedNPE(Matcher m) {
4065 m.find();
4066 try {
4067 m.group(null);
4068 } catch (NullPointerException x) {
4069 try {
4070 m.start(null);
4071 } catch (NullPointerException xx) {
4072 try {
4073 m.end(null);
4074 } catch (NullPointerException xxx) {
4075 return;
4076 }
4077 }
4078 }
4079 failCount++;
4080 }
4081
4082 private static void namedGroupCaptureTest() throws Exception {
4083 check(Pattern.compile("x+(?<gname>y+)z+"),
4084 "xxxyyyzzz",
4085 "gname",
4086 "yyy");
4087
4088 check(Pattern.compile("x+(?<gname8>y+)z+"),
4089 "xxxyyyzzz",
4090 "gname8",
4091 "yyy");
4092
4093 //backref
4094 Pattern pattern = Pattern.compile("(a*)bc\\1");
4095 check(pattern, "zzzaabcazzz", true); // found "abca"
4096
4097 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
4098 "zzzaabcaazzz", true);
4099
4100 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
4101 "abcdefabc", true);
4102
4103 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
4104 "abcdefghijkk", true);
4105
4106 // Supplementary character tests
4107 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4108 toSupplementaries("zzzaabcazzz"), true);
4109
4110 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4111 toSupplementaries("zzzaabcaazzz"), true);
4112
4113 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
4114 toSupplementaries("abcdefabc"), true);
4115
4116 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4117 "(?<gname>" +
4118 toSupplementaries("k)") + "\\k<gname>"),
4119 toSupplementaries("abcdefghijkk"), true);
4120
4121 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4122 "xxxyyyzzzyyy",
4123 "gname",
4124 "yyy");
4125
4126 //replaceFirst/All
4127 checkReplaceFirst("(?<gn>ab)(c*)",
4128 "abccczzzabcczzzabccc",
4129 "${gn}",
4130 "abzzzabcczzzabccc");
4131
4132 checkReplaceAll("(?<gn>ab)(c*)",
4133 "abccczzzabcczzzabccc",
4134 "${gn}",
4135 "abzzzabzzzab");
4136
4137
4138 checkReplaceFirst("(?<gn>ab)(c*)",
4139 "zzzabccczzzabcczzzabccczzz",
4140 "${gn}",
4141 "zzzabzzzabcczzzabccczzz");
4142
4143 checkReplaceAll("(?<gn>ab)(c*)",
4144 "zzzabccczzzabcczzzabccczzz",
4145 "${gn}",
4146 "zzzabzzzabzzzabzzz");
4147
4148 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4149 "zzzabccczzzabcczzzabccczzz",
4150 "${gn2}",
4151 "zzzccczzzabcczzzabccczzz");
4152
4153 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4154 "zzzabccczzzabcczzzabccczzz",
4155 "${gn2}",
4156 "zzzccczzzcczzzccczzz");
4157
4158 //toSupplementaries("(ab)(c*)"));
4159 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4160 ")(?<gn2>" + toSupplementaries("c") + "*)",
4161 toSupplementaries("abccczzzabcczzzabccc"),
4162 "${gn1}",
4163 toSupplementaries("abzzzabcczzzabccc"));
4164
4165
4166 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4167 ")(?<gn2>" + toSupplementaries("c") + "*)",
4168 toSupplementaries("abccczzzabcczzzabccc"),
4169 "${gn1}",
4170 toSupplementaries("abzzzabzzzab"));
4171
4172 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4173 ")(?<gn2>" + toSupplementaries("c") + "*)",
4174 toSupplementaries("abccczzzabcczzzabccc"),
4175 "${gn2}",
4176 toSupplementaries("ccczzzabcczzzabccc"));
4177
4178
4179 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4180 ")(?<gn2>" + toSupplementaries("c") + "*)",
4181 toSupplementaries("abccczzzabcczzzabccc"),
4182 "${gn2}",
4183 toSupplementaries("ccczzzcczzzccc"));
4184
4185 checkReplaceFirst("(?<dog>Dog)AndCat",
4186 "zzzDogAndCatzzzDogAndCatzzz",
4187 "${dog}",
4188 "zzzDogzzzDogAndCatzzz");
4189
4190
4191 checkReplaceAll("(?<dog>Dog)AndCat",
4192 "zzzDogAndCatzzzDogAndCatzzz",
4193 "${dog}",
4194 "zzzDogzzzDogzzz");
4195
4196 // backref in Matcher & String
4197 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4198 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4199 failCount++;
4200
4201 // negative
4202 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4203 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4204 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4205 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4206 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4207 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4208 "gnameX");
4209 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4210 report("NamedGroupCapture");
4211 }
4212
4213 // This is for bug 6919132
4214 private static void nonBmpClassComplementTest() throws Exception {
4215 Pattern p = Pattern.compile("\\P{Lu}");
4216 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4217
4218 if (m.find() && m.start() == 1)
4219 failCount++;
4220
4221 // from a unicode category
4222 p = Pattern.compile("\\P{Lu}");
4223 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4224 if (m.find())
4225 failCount++;
4226 if (!m.hitEnd())
4227 failCount++;
4228
4229 // block
4230 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4231 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4232 if (m.find() && m.start() == 1)
4233 failCount++;
4234
4235 p = Pattern.compile("\\P{sc=GRANTHA}");
4236 m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4237 if (m.find() && m.start() == 1)
4238 failCount++;
4239
4240 report("NonBmpClassComplement");
4241 }
4242
4243 private static void unicodePropertiesTest() throws Exception {
4244 // different forms
4245 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4246 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4247 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4248 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4249 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4250 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4251 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4252 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4253 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4254 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4255 failCount++;
4256
4257 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
4258 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4259 Matcher lastSM = common;
4260 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4261
4262 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
4263 Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
4264 Matcher lastBM = latin;
4265 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4266
4267 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4268 if (cp >= 0x30000 && (cp & 0x70) == 0){
4269 continue; // only pick couple code points, they are the same
4270 }
4271
4272 // Unicode Script
4273 Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4274 Matcher m;
4275 String str = new String(Character.toChars(cp));
4276 if (script == lastScript) {
4277 m = lastSM;
4278 m.reset(str);
4279 } else {
4280 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4281 }
4282 if (!m.matches()) {
4283 failCount++;
4284 }
4285 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4286 other.reset(str);
4287 if (other.matches()) {
4288 failCount++;
4289 }
4290 lastSM = m;
4291 lastScript = script;
4292
4293 // Unicode Block
4294 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4295 if (block == null) {
4296 //System.out.printf("Not a Block: cp=%x%n", cp);
4297 continue;
4298 }
4299 if (block == lastBlock) {
4300 m = lastBM;
4301 m.reset(str);
4302 } else {
4303 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4304 }
4305 if (!m.matches()) {
4306 failCount++;
4307 }
4308 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4309 other.reset(str);
4310 if (other.matches()) {
4311 failCount++;
4312 }
4313 lastBM = m;
4314 lastBlock = block;
4315 }
4316 report("unicodeProperties");
4317 }
4318
4319 private static void unicodeHexNotationTest() throws Exception {
4320
4321 // negative
4322 checkExpectedFail("\\x{-23}");
4323 checkExpectedFail("\\x{110000}");
4324 checkExpectedFail("\\x{}");
4325 checkExpectedFail("\\x{AB[ef]");
4326
4327 // codepoint
4328 check("^\\x{1033c}$", "\uD800\uDF3C", true);
4329 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
4330 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false);
4331 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false);
4332
4333 // in class
4334 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false);
4335 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4336 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false);
4337 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false);
4338 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true);
4339 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
4340
4341 for (int cp = 0; cp <= 0x10FFFF; cp++) {
4342 String s = "A" + new String(Character.toChars(cp)) + "B";
4343 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4344 : String.format("\\u%04x\\u%04x",
4345 (int) Character.toChars(cp)[0],
4346 (int) Character.toChars(cp)[1]);
4347 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4348 if (!Pattern.matches("A" + hexUTF16 + "B", s))
4349 failCount++;
4350 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4351 failCount++;
4352 if (!Pattern.matches("A" + hexCodePoint + "B", s))
4353 failCount++;
4354 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4355 failCount++;
4356 }
4357 report("unicodeHexNotation");
4358 }
4359
4360 private static void unicodeClassesTest() throws Exception {
4361
4362 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
4363 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
4364 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
4365 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
4366 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
4367 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
4368 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
4369 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
4370 Matcher print = Pattern.compile("\\p{Print}").matcher("");
4371 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
4372 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
4373 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4374 Matcher space = Pattern.compile("\\p{Space}").matcher("");
4375 Matcher bound = Pattern.compile("\\b").matcher("");
4376 Matcher word = Pattern.compile("\\w++").matcher("");
4377 // UNICODE_CHARACTER_CLASS
4378 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4379 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4380 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4381 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4382 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4383 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4384 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4385 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4386 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4387 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4388 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4389 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4390 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4391 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4392 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4393 // embedded flag (?U)
4394 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4395 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4396 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4397
4398 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
4399 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4400 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4401 // properties
4402 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
4403 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
4404 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
4405 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4406 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4407 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
4408 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
4409 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4410 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4411 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4412 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4413 // javaMethod
4414 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
4415 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
4416 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4417 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
4418 // GC/C
4419 Matcher gcC = Pattern.compile("\\p{C}").matcher("");
4420
4421 for (int cp = 1; cp < 0x30000; cp++) {
4422 String str = new String(Character.toChars(cp));
4423 int type = Character.getType(cp);
4424 if (// lower
4425 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
4426 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4427 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4428 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4429 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4430 // upper
4431 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
4432 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4433 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4434 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4435 // alpha
4436 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
4437 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4438 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4439 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4440 // digit
4441 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
4442 Character.isDigit(cp) != digitU.reset(str).matches() ||
4443 // alnum
4444 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
4445 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4446 // punct
4447 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
4448 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4449 // graph
4450 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
4451 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4452 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4453 // blank
4454 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4455 != blank.reset(str).matches() ||
4456 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4457 // print
4458 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
4459 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4460 // cntrl
4461 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
4462 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4463 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4464 // hexdigit
4465 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
4466 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4467 // space
4468 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
4469 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4470 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4471 // word
4472 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
4473 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4474 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4475 // bwordb
4476 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4477 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4478 // properties
4479 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4480 Character.isLetter(cp) != letterP.reset(str).matches()||
4481 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4482 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4483 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4484 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4485 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
4486 // gc_C
4487 (Character.CONTROL == type || Character.FORMAT == type ||
4488 Character.PRIVATE_USE == type || Character.SURROGATE == type ||
4489 Character.UNASSIGNED == type)
4490 != gcC.reset(str).matches()) {
4491 failCount++;
4492 }
4493 }
4494
4495 // bounds/word align
4496 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4497 if (!bwbU.reset("\u0180sherman\u0400").matches())
4498 failCount++;
4499 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4500 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4501 failCount++;
4502 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4503 if (!bwbU.reset("\u0724\u0739\u0724").matches())
4504 failCount++;
4505 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4506 failCount++;
4507 report("unicodePredefinedClasses");
4508 }
4509
4510 private static void unicodeCharacterNameTest() throws Exception {
4511
4512 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4513 if (!Character.isValidCodePoint(cp) ||
4514 Character.getType(cp) == Character.UNASSIGNED)
4515 continue;
4516 String str = new String(Character.toChars(cp));
4517 // single
4518 String p = "\\N{" + Character.getName(cp) + "}";
4519 if (!Pattern.compile(p).matcher(str).matches()) {
4520 failCount++;
4521 }
4522 // class[c]
4523 p = "[\\N{" + Character.getName(cp) + "}]";
4524 if (!Pattern.compile(p).matcher(str).matches()) {
4525 failCount++;
4526 }
4527 }
4528
4529 // range
4530 for (int i = 0; i < 10; i++) {
4531 int start = generator.nextInt(20);
4532 int end = start + generator.nextInt(200);
4533 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4534 String str;
4535 for (int cp = start; cp < end; cp++) {
4536 str = new String(Character.toChars(cp));
4537 if (!Pattern.compile(p).matcher(str).matches()) {
4538 failCount++;
4539 }
4540 }
4541 str = new String(Character.toChars(end + 10));
4542 if (Pattern.compile(p).matcher(str).matches()) {
4543 failCount++;
4544 }
4545 }
4546
4547 // slice
4548 for (int i = 0; i < 10; i++) {
4549 int n = generator.nextInt(256);
4550 int[] buf = new int[n];
4551 StringBuffer sb = new StringBuffer(1024);
4552 for (int j = 0; j < n; j++) {
4553 int cp = generator.nextInt(1000);
4554 if (!Character.isValidCodePoint(cp) ||
4555 Character.getType(cp) == Character.UNASSIGNED)
4556 cp = 0x4e00; // just use 4e00
4557 sb.append("\\N{" + Character.getName(cp) + "}");
4558 buf[j] = cp;
4559 }
4560 String p = sb.toString();
4561 String str = new String(buf, 0, buf.length);
4562 if (!Pattern.compile(p).matcher(str).matches()) {
4563 failCount++;
4564 }
4565 }
4566 report("unicodeCharacterName");
4567 }
4568
4569 private static void horizontalAndVerticalWSTest() throws Exception {
4570 String hws = new String (new char[] {
4571 0x09, 0x20, 0xa0, 0x1680, 0x180e,
4572 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4573 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4574 0x202f, 0x205f, 0x3000 });
4575 String vws = new String (new char[] {
4576 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4577 if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4578 !Pattern.compile("[\\h]+").matcher(hws).matches())
4579 failCount++;
4580 if (Pattern.compile("\\H").matcher(hws).find() ||
4581 Pattern.compile("[\\H]").matcher(hws).find())
4582 failCount++;
4583 if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4584 !Pattern.compile("[\\v]+").matcher(vws).matches())
4585 failCount++;
4586 if (Pattern.compile("\\V").matcher(vws).find() ||
4587 Pattern.compile("[\\V]").matcher(vws).find())
4588 failCount++;
4589 String prefix = "abcd";
4590 String suffix = "efgh";
4591 String ng = "A";
4592 for (int i = 0; i < hws.length(); i++) {
4593 String c = String.valueOf(hws.charAt(i));
4594 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4595 if (!m.find() || !c.equals(m.group()))
4596 failCount++;
4597 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4598 if (!m.find() || !c.equals(m.group()))
4599 failCount++;
4600
4601 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4602 if (!m.find() || !ng.equals(m.group()))
4603 failCount++;
4604 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4605 if (!m.find() || !ng.equals(m.group()))
4606 failCount++;
4607 }
4608 for (int i = 0; i < vws.length(); i++) {
4609 String c = String.valueOf(vws.charAt(i));
4610 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4611 if (!m.find() || !c.equals(m.group()))
4612 failCount++;
4613 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4614 if (!m.find() || !c.equals(m.group()))
4615 failCount++;
4616
4617 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4618 if (!m.find() || !ng.equals(m.group()))
4619 failCount++;
4620 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4621 if (!m.find() || !ng.equals(m.group()))
4622 failCount++;
4623 }
4624 // \v in range is interpreted as 0x0B. This is the undocumented behavior
4625 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4626 failCount++;
4627 report("horizontalAndVerticalWSTest");
4628 }
4629
4630 private static void linebreakTest() throws Exception {
4631 String linebreaks = new String (new char[] {
4632 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4633 String crnl = "\r\n";
4634 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() &&
4635 Pattern.compile("\\R").matcher(crnl).matches() &&
4636 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() &&
4637 Pattern.compile("\\Rabc").matcher("\rabc").matches() &&
4638 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking
4639 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking
4640 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029
4641 failCount++;
4642 }
4643 report("linebreakTest");
4644 }
4645
4646 // #7189363
4647 private static void branchTest() throws Exception {
4648 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy
4649 !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4650 !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4651 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant
4652 !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4653 !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4654 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive
4655 !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4656 !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4657 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy
4658 !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4659 !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4660 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4661 !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4662 !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4663 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4664 !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4665 !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4666 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others
4667 !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4668 !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4669 !Pattern.compile("(a)??bc|de").matcher("de").matches())
4670 failCount++;
4671 report("branchTest");
4672 }
4673
4674 // This test is for 8007395
4675 private static void groupCurlyNotFoundSuppTest() throws Exception {
4676 String input = "test this as \ud83d\ude0d";
4677 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4678 "test(.)*(@[a-zA-Z.]+)",
4679 "test([^B])+(@[a-zA-Z.]+)",
4680 "test([^B])*(@[a-zA-Z.]+)",
4681 "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4682 "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4683 }) {
4684 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4685 .matcher(input);
4686 try {
4687 if (m.find()) {
4688 failCount++;
4689 }
4690 } catch (Exception x) {
4691 failCount++;
4692 }
4693 }
4694 report("GroupCurly NotFoundSupp");
4695 }
4696
4697 // This test is for 8023647
4698 private static void groupCurlyBackoffTest() throws Exception {
4699 if (!"abc1c".matches("(\\w)+1\\1") ||
4700 "abc11".matches("(\\w)+1\\1")) {
4701 failCount++;
4702 }
4703 report("GroupCurly backoff");
4704 }
4705
4706 // This test is for 8012646
4707 private static void patternAsPredicate() throws Exception {
4708 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4709
4710 if (p.test("")) {
4711 failCount++;
4712 }
4713 if (!p.test("word")) {
4714 failCount++;
4715 }
4716 if (p.test("1234")) {
4717 failCount++;
4718 }
4719 if (!p.test("word1234")) {
4720 failCount++;
4721 }
4722 report("Pattern.asPredicate");
4723 }
4724
4725 // This test is for 8184692
4726 private static void patternAsMatchPredicate() throws Exception {
4727 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate();
4728
4729 if (p.test("")) {
4730 failCount++;
4731 }
4732 if (!p.test("word")) {
4733 failCount++;
4734 }
4735 if (p.test("1234word")) {
4736 failCount++;
4737 }
4738 if (p.test("1234")) {
4739 failCount++;
4740 }
4741 report("Pattern.asMatchPredicate");
4742 }
4743
4744
4745 // This test is for 8035975
4746 private static void invalidFlags() throws Exception {
4747 for (int flag = 1; flag != 0; flag <<= 1) {
4748 switch (flag) {
4749 case Pattern.CASE_INSENSITIVE:
4750 case Pattern.MULTILINE:
4751 case Pattern.DOTALL:
4752 case Pattern.UNICODE_CASE:
4753 case Pattern.CANON_EQ:
4754 case Pattern.UNIX_LINES:
4755 case Pattern.LITERAL:
4756 case Pattern.UNICODE_CHARACTER_CLASS:
4757 case Pattern.COMMENTS:
4758 // valid flag, continue
4759 break;
4760 default:
4761 try {
4762 Pattern.compile(".", flag);
4763 failCount++;
4764 } catch (IllegalArgumentException expected) {
4765 }
4766 }
4767 }
4768 report("Invalid compile flags");
4769 }
4770
4771 // This test is for 8158482
4772 private static void embeddedFlags() throws Exception {
4773 try {
4774 Pattern.compile("(?i).(?-i).");
4775 Pattern.compile("(?m).(?-m).");
4776 Pattern.compile("(?s).(?-s).");
4777 Pattern.compile("(?d).(?-d).");
4778 Pattern.compile("(?u).(?-u).");
4779 Pattern.compile("(?c).(?-c).");
4780 Pattern.compile("(?x).(?-x).");
4781 Pattern.compile("(?U).(?-U).");
4782 Pattern.compile("(?imsducxU).(?-imsducxU).");
4783 } catch (PatternSyntaxException x) {
4784 failCount++;
4785 }
4786 report("Embedded flags");
4787 }
4788
4789 private static void grapheme() throws Exception {
4790 Files.lines(UCDFiles.GRAPHEME_BREAK_TEST)
4791 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4792 .forEach( ln -> {
4793 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4794 // System.out.println(str);
4795 String[] strs = ln.split("\u00f7|\u00d7");
4796 StringBuilder src = new StringBuilder();
4797 ArrayList<String> graphemes = new ArrayList<>();
4798 StringBuilder buf = new StringBuilder();
4799 int offBk = 0;
4800 for (String str : strs) {
4801 if (str.length() == 0) // first empty str
4802 continue;
4803 int cp = Integer.parseInt(str, 16);
4804 src.appendCodePoint(cp);
4805 buf.appendCodePoint(cp);
4806 offBk += (str.length() + 1);
4807 if (ln.charAt(offBk) == '\u00f7') { // DIV
4808 graphemes.add(buf.toString());
4809 buf = new StringBuilder();
4810 }
4811 }
4812 Pattern p = Pattern.compile("\\X");
4813 Matcher m = p.matcher(src.toString());
4814 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4815 for (String g : graphemes) {
4816 // System.out.printf(" grapheme:=[%s]%n", g);
4817 // (1) test \\X directly
4818 if (!m.find() || !m.group().equals(g)) {
4819 System.out.println("Failed \\X [" + ln + "] : " + g);
4820 failCount++;
4821 }
4822 // (2) test \\b{g} + \\X via Scanner
4823 boolean hasNext = s.hasNext(p);
4824 // if (!s.hasNext() || !s.next().equals(next)) {
4825 if (!s.hasNext(p) || !s.next(p).equals(g)) {
4826 System.out.println("Failed b{g} [" + ln + "] : " + g);
4827 failCount++;
4828 }
4829 }
4830 });
4831 // some sanity checks
4832 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4833 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4834 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4835 failCount++;
4836 // make sure "\b{n}" still works
4837 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4838 failCount++;
4839 report("Unicode extended grapheme cluster");
4840 }
4841
4842 // hangup/timeout if go into exponential backtracking
4843 private static void expoBacktracking() throws Exception {
4844
4845 Object[][] patternMatchers = {
4846 // 6328855
4847 { "(.*\n*)*",
4848 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4849 false },
4850 // 6192895
4851 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4852 "Hello World this is a test this is a test this is a test A",
4853 true },
4854 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4855 "Hello World this is a test this is a test this is a test \u4e00 ",
4856 false },
4857 { " *([a-z0-9]+ *)+",
4858 "hello world this is a test this is a test this is a test A",
4859 false },
4860 // 4771934 [FIXED] #5013651?
4861 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4862 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4863 true },
4864 // 4866249 [FIXED]
4865 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4866 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4867 true },
4868 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4869 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4870 false },
4871 // 6345469
4872 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+",
4873 " < br/> < / p> <p> <html> <adfasfdasdf> </p>",
4874 true }, // --> matched
4875 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+",
4876 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>",
4877 false },
4878 // 5026912
4879 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4880 "156580451111112225588087755221111111566969655555555",
4881 false},
4882 // 6988218
4883 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4884 "'%)) order by ANGEBOT.ID",
4885 false}, // find
4886 // 6693451
4887 { "^(\\s*foo\\s*)*$",
4888 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4889 true },
4890 { "^(\\s*foo\\s*)*$",
4891 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4892 false
4893 },
4894 // 7006761
4895 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4896 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4897 // 8140212
4898 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4899 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4900 false
4901 },
4902 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4903 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4904
4905 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4906 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4907
4908 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4909 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4910
4911 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4912
4913 /* not fixed
4914 //8132141 ---> second level exponential backtracking
4915 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4916 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4917 */
4918 };
4919
4920 for (Object[] pm : patternMatchers) {
4921 String p = (String)pm[0];
4922 String s = (String)pm[1];
4923 boolean r = (Boolean)pm[2];
4924 if (r != Pattern.compile(p).matcher(s).matches()) {
4925 failCount++;
4926 }
4927 }
4928 }
4929
4930 private static void invalidGroupName() {
4931 // Invalid start of a group name
4932 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
4933 "\u0060", "\u007b", "\u0416")) {
4934 for (String pat : List.of("(?<" + groupName + ">)",
4935 "\\k<" + groupName + ">")) {
4936 try {
4937 Pattern.compile(pat);
4938 failCount++;
4939 } catch (PatternSyntaxException e) {
4940 if (!e.getMessage().startsWith(
4941 "capturing group name does not start with a"
4942 + " Latin letter")) {
4943 failCount++;
4944 }
4945 }
4946 }
4947 }
4948 // Invalid char in a group name
4949 for (String groupName : List.of("a.", "b\u0040", "c\u005b",
4950 "d\u0060", "e\u007b", "f\u0416")) {
4951 for (String pat : List.of("(?<" + groupName + ">)",
4952 "\\k<" + groupName + ">")) {
4953 try {
4954 Pattern.compile(pat);
4955 failCount++;
4956 } catch (PatternSyntaxException e) {
4957 if (!e.getMessage().startsWith(
4958 "named capturing group is missing trailing '>'")) {
4959 failCount++;
4960 }
4961 }
4962 }
4963 }
4964 report("Invalid capturing group names");
4965 }
4966
4967 private static void illegalRepetitionRange() {
4968 // huge integers > (2^31 - 1)
4969 String n = BigInteger.valueOf(1L << 32)
4970 .toString();
4971 String m = BigInteger.valueOf(1L << 31)
4972 .add(new BigInteger(80, generator))
4973 .toString();
4974 for (String rep : List.of("", "x", ".", ",", "-1", "2,1",
4975 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) {
4976 String pat = ".{" + rep + "}";
4977 try {
4978 Pattern.compile(pat);
4979 failCount++;
4980 System.out.println("Expected to fail. Pattern: " + pat);
4981 } catch (PatternSyntaxException e) {
4982 if (!e.getMessage().startsWith("Illegal repetition")) {
4983 failCount++;
4984 System.out.println("Unexpected error message: " + e.getMessage());
4985 }
4986 } catch (Throwable t) {
4987 failCount++;
4988 System.out.println("Unexpected exception: " + t);
4989 }
4990 }
4991 report("illegalRepetitionRange");
4992 }
4993
4994 private static void surrogatePairWithCanonEq() {
4995 try {
4996 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ);
4997 } catch (Throwable t) {
4998 failCount++;
4999 System.out.println("Unexpected exception: " + t);
5000 }
5001 report("surrogatePairWithCanonEq");
5002 }
5003 }
--- EOF ---