rev 57965 : [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes
1 /* 2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8214245 39 * 40 * @library /test/lib 41 * @library /lib/testlibrary/java/lang 42 * @build jdk.test.lib.RandomFactory 43 * @run main RegExTest 44 * @key randomness 45 */ 46 47 import java.io.BufferedReader; 48 import java.io.ByteArrayInputStream; 49 import java.io.ByteArrayOutputStream; 50 import java.io.File; 51 import java.io.FileInputStream; 52 import java.io.InputStreamReader; 53 import java.io.ObjectInputStream; 54 import java.io.ObjectOutputStream; 55 import java.math.BigInteger; 56 import java.nio.CharBuffer; 57 import java.nio.file.Files; 58 import java.util.ArrayList; 59 import java.util.Arrays; 60 import java.util.List; 61 import java.util.Random; 62 import java.util.Scanner; 63 import java.util.function.Function; 64 import java.util.function.Predicate; 65 import java.util.regex.Matcher; 66 import java.util.regex.MatchResult; 67 import java.util.regex.Pattern; 68 import java.util.regex.PatternSyntaxException; 69 import jdk.test.lib.RandomFactory; 70 71 /** 72 * This is a test class created to check the operation of 73 * the Pattern and Matcher classes. 74 */ 75 public class RegExTest { 76 77 private static Random generator = RandomFactory.getRandom(); 78 private static boolean failure = false; 79 private static int failCount = 0; 80 private static String firstFailure = null; 81 82 /** 83 * Main to interpret arguments and run several tests. 84 * 85 */ 86 public static void main(String[] args) throws Exception { 87 // Most of the tests are in a file 88 processFile("TestCases.txt"); 89 //processFile("PerlCases.txt"); 90 processFile("BMPTestCases.txt"); 91 processFile("SupplementaryTestCases.txt"); 92 93 // These test many randomly generated char patterns 94 bm(); 95 slice(); 96 97 // These are hard to put into the file 98 escapes(); 99 blankInput(); 100 101 // Substitition tests on randomly generated sequences 102 globalSubstitute(); 103 stringbufferSubstitute(); 104 stringbuilderSubstitute(); 105 106 substitutionBasher(); 107 substitutionBasher2(); 108 109 // Canonical Equivalence 110 ceTest(); 111 112 // Anchors 113 anchorTest(); 114 115 // boolean match calls 116 matchesTest(); 117 lookingAtTest(); 118 119 // Pattern API 120 patternMatchesTest(); 121 122 // Misc 123 lookbehindTest(); 124 nullArgumentTest(); 125 backRefTest(); 126 groupCaptureTest(); 127 caretTest(); 128 charClassTest(); 129 emptyPatternTest(); 130 findIntTest(); 131 group0Test(); 132 longPatternTest(); 133 octalTest(); 134 ampersandTest(); 135 negationTest(); 136 splitTest(); 137 appendTest(); 138 caseFoldingTest(); 139 commentsTest(); 140 unixLinesTest(); 141 replaceFirstTest(); 142 gTest(); 143 zTest(); 144 serializeTest(); 145 reluctantRepetitionTest(); 146 multilineDollarTest(); 147 dollarAtEndTest(); 148 caretBetweenTerminatorsTest(); 149 // This RFE rejected in Tiger numOccurrencesTest(); 150 javaCharClassTest(); 151 nonCaptureRepetitionTest(); 152 notCapturedGroupCurlyMatchTest(); 153 escapedSegmentTest(); 154 literalPatternTest(); 155 literalReplacementTest(); 156 regionTest(); 157 toStringTest(); 158 negatedCharClassTest(); 159 findFromTest(); 160 boundsTest(); 161 unicodeWordBoundsTest(); 162 caretAtEndTest(); 163 wordSearchTest(); 164 hitEndTest(); 165 toMatchResultTest(); 166 toMatchResultTest2(); 167 surrogatesInClassTest(); 168 removeQEQuotingTest(); 169 namedGroupCaptureTest(); 170 nonBmpClassComplementTest(); 171 unicodePropertiesTest(); 172 unicodeHexNotationTest(); 173 unicodeClassesTest(); 174 unicodeCharacterNameTest(); 175 horizontalAndVerticalWSTest(); 176 linebreakTest(); 177 branchTest(); 178 groupCurlyNotFoundSuppTest(); 179 groupCurlyBackoffTest(); 180 patternAsPredicate(); 181 patternAsMatchPredicate(); 182 invalidFlags(); 183 embeddedFlags(); 184 grapheme(); 185 expoBacktracking(); 186 invalidGroupName(); 187 illegalRepetitionRange(); 188 surrogatePairWithCanonEq(); 189 caseInsensitivePMatch(); 190 191 if (failure) { 192 throw new 193 RuntimeException("RegExTest failed, 1st failure: " + 194 firstFailure); 195 } else { 196 System.err.println("OKAY: All tests passed."); 197 } 198 } 199 200 // Utility functions 201 202 private static String getRandomAlphaString(int length) { 203 StringBuffer buf = new StringBuffer(length); 204 for (int i=0; i<length; i++) { 205 char randChar = (char)(97 + generator.nextInt(26)); 206 buf.append(randChar); 207 } 208 return buf.toString(); 209 } 210 211 private static void check(Matcher m, String expected) { 212 m.find(); 213 if (!m.group().equals(expected)) 214 failCount++; 215 } 216 217 private static void check(Matcher m, String result, boolean expected) { 218 m.find(); 219 if (m.group().equals(result) != expected) 220 failCount++; 221 } 222 223 private static void check(Pattern p, String s, boolean expected) { 224 if (p.matcher(s).find() != expected) 225 failCount++; 226 } 227 228 private static void check(String p, String s, boolean expected) { 229 Matcher matcher = Pattern.compile(p).matcher(s); 230 if (matcher.find() != expected) 231 failCount++; 232 } 233 234 private static void check(String p, char c, boolean expected) { 235 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 236 Pattern pattern = Pattern.compile(propertyPattern); 237 char[] ca = new char[1]; ca[0] = c; 238 Matcher matcher = pattern.matcher(new String(ca)); 239 if (!matcher.find()) 240 failCount++; 241 } 242 243 private static void check(String p, int codePoint, boolean expected) { 244 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 245 Pattern pattern = Pattern.compile(propertyPattern); 246 char[] ca = Character.toChars(codePoint); 247 Matcher matcher = pattern.matcher(new String(ca)); 248 if (!matcher.find()) 249 failCount++; 250 } 251 252 private static void check(String p, int flag, String input, String s, 253 boolean expected) 254 { 255 Pattern pattern = Pattern.compile(p, flag); 256 Matcher matcher = pattern.matcher(input); 257 if (expected) 258 check(matcher, s, expected); 259 else 260 check(pattern, input, false); 261 } 262 263 private static void report(String testName) { 264 int spacesToAdd = 30 - testName.length(); 265 StringBuffer paddedNameBuffer = new StringBuffer(testName); 266 for (int i=0; i<spacesToAdd; i++) 267 paddedNameBuffer.append(" "); 268 String paddedName = paddedNameBuffer.toString(); 269 System.err.println(paddedName + ": " + 270 (failCount==0 ? "Passed":"Failed("+failCount+")")); 271 if (failCount > 0) { 272 failure = true; 273 274 if (firstFailure == null) { 275 firstFailure = testName; 276 } 277 } 278 279 failCount = 0; 280 } 281 282 /** 283 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 284 * supplementary characters. This method does NOT fully take care 285 * of the regex syntax. 286 */ 287 private static String toSupplementaries(String s) { 288 int length = s.length(); 289 StringBuffer sb = new StringBuffer(length * 2); 290 291 for (int i = 0; i < length; ) { 292 char c = s.charAt(i++); 293 if (c == '\\') { 294 sb.append(c); 295 if (i < length) { 296 c = s.charAt(i++); 297 sb.append(c); 298 if (c == 'u') { 299 // assume no syntax error 300 sb.append(s.charAt(i++)); 301 sb.append(s.charAt(i++)); 302 sb.append(s.charAt(i++)); 303 sb.append(s.charAt(i++)); 304 } 305 } 306 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 307 sb.append('\ud800').append((char)('\udc00'+c)); 308 } else { 309 sb.append(c); 310 } 311 } 312 return sb.toString(); 313 } 314 315 // Regular expression tests 316 317 // This is for bug 6178785 318 // Test if an expected NPE gets thrown when passing in a null argument 319 private static boolean check(Runnable test) { 320 try { 321 test.run(); 322 failCount++; 323 return false; 324 } catch (NullPointerException npe) { 325 return true; 326 } 327 } 328 329 private static void nullArgumentTest() { 330 check(() -> Pattern.compile(null)); 331 check(() -> Pattern.matches(null, null)); 332 check(() -> Pattern.matches("xyz", null)); 333 check(() -> Pattern.quote(null)); 334 check(() -> Pattern.compile("xyz").split(null)); 335 check(() -> Pattern.compile("xyz").matcher(null)); 336 337 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 338 m.matches(); 339 check(() -> m.appendTail((StringBuffer) null)); 340 check(() -> m.appendTail((StringBuilder)null)); 341 check(() -> m.replaceAll((String) null)); 342 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 343 check(() -> m.replaceFirst((String)null)); 344 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 345 check(() -> m.appendReplacement((StringBuffer)null, null)); 346 check(() -> m.appendReplacement((StringBuilder)null, null)); 347 check(() -> m.reset(null)); 348 check(() -> Matcher.quoteReplacement(null)); 349 //check(() -> m.usePattern(null)); 350 351 report("Null Argument"); 352 } 353 354 // This is for bug6635133 355 // Test if surrogate pair in Unicode escapes can be handled correctly. 356 private static void surrogatesInClassTest() throws Exception { 357 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 358 Matcher matcher = pattern.matcher("\ud834\udd22"); 359 if (!matcher.find()) 360 failCount++; 361 362 report("Surrogate pair in Unicode escape"); 363 } 364 365 // This is for bug6990617 366 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 367 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 368 // char is an octal digit. 369 private static void removeQEQuotingTest() throws Exception { 370 Pattern pattern = 371 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 372 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 373 if (!matcher.find()) 374 failCount++; 375 376 report("Remove Q/E Quoting"); 377 } 378 379 // This is for bug 4988891 380 // Test toMatchResult to see that it is a copy of the Matcher 381 // that is not affected by subsequent operations on the original 382 private static void toMatchResultTest() throws Exception { 383 Pattern pattern = Pattern.compile("squid"); 384 Matcher matcher = pattern.matcher( 385 "agiantsquidofdestinyasmallsquidoffate"); 386 matcher.find(); 387 int matcherStart1 = matcher.start(); 388 MatchResult mr = matcher.toMatchResult(); 389 if (mr == matcher) 390 failCount++; 391 int resultStart1 = mr.start(); 392 if (matcherStart1 != resultStart1) 393 failCount++; 394 matcher.find(); 395 int matcherStart2 = matcher.start(); 396 int resultStart2 = mr.start(); 397 if (matcherStart2 == resultStart2) 398 failCount++; 399 if (resultStart1 != resultStart2) 400 failCount++; 401 MatchResult mr2 = matcher.toMatchResult(); 402 if (mr == mr2) 403 failCount++; 404 if (mr2.start() != matcherStart2) 405 failCount++; 406 report("toMatchResult is a copy"); 407 } 408 409 private static void checkExpectedISE(Runnable test) { 410 try { 411 test.run(); 412 failCount++; 413 } catch (IllegalStateException x) { 414 } catch (IndexOutOfBoundsException xx) { 415 failCount++; 416 } 417 } 418 419 private static void checkExpectedIOOE(Runnable test) { 420 try { 421 test.run(); 422 failCount++; 423 } catch (IndexOutOfBoundsException x) {} 424 } 425 426 // This is for bug 8074678 427 // Test the result of toMatchResult throws ISE if no match is availble 428 private static void toMatchResultTest2() throws Exception { 429 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 430 matcher.find(); 431 MatchResult mr = matcher.toMatchResult(); 432 433 checkExpectedISE(() -> mr.start()); 434 checkExpectedISE(() -> mr.start(2)); 435 checkExpectedISE(() -> mr.end()); 436 checkExpectedISE(() -> mr.end(2)); 437 checkExpectedISE(() -> mr.group()); 438 checkExpectedISE(() -> mr.group(2)); 439 440 matcher = Pattern.compile("(match)").matcher("there is a match"); 441 matcher.find(); 442 MatchResult mr2 = matcher.toMatchResult(); 443 checkExpectedIOOE(() -> mr2.start(2)); 444 checkExpectedIOOE(() -> mr2.end(2)); 445 checkExpectedIOOE(() -> mr2.group(2)); 446 447 report("toMatchResult2 appropriate exceptions"); 448 } 449 450 // This is for bug 5013885 451 // Must test a slice to see if it reports hitEnd correctly 452 private static void hitEndTest() throws Exception { 453 // Basic test of Slice node 454 Pattern p = Pattern.compile("^squidattack"); 455 Matcher m = p.matcher("squack"); 456 m.find(); 457 if (m.hitEnd()) 458 failCount++; 459 m.reset("squid"); 460 m.find(); 461 if (!m.hitEnd()) 462 failCount++; 463 464 // Test Slice, SliceA and SliceU nodes 465 for (int i=0; i<3; i++) { 466 int flags = 0; 467 if (i==1) flags = Pattern.CASE_INSENSITIVE; 468 if (i==2) flags = Pattern.UNICODE_CASE; 469 p = Pattern.compile("^abc", flags); 470 m = p.matcher("ad"); 471 m.find(); 472 if (m.hitEnd()) 473 failCount++; 474 m.reset("ab"); 475 m.find(); 476 if (!m.hitEnd()) 477 failCount++; 478 } 479 480 // Test Boyer-Moore node 481 p = Pattern.compile("catattack"); 482 m = p.matcher("attack"); 483 m.find(); 484 if (!m.hitEnd()) 485 failCount++; 486 487 p = Pattern.compile("catattack"); 488 m = p.matcher("attackattackattackcatatta"); 489 m.find(); 490 if (!m.hitEnd()) 491 failCount++; 492 493 // 8184706: Matching u+0d at EOL against \R should hit-end 494 p = Pattern.compile("...\\R"); 495 m = p.matcher("cat" + (char)0x0a); 496 m.find(); 497 if (m.hitEnd()) 498 failCount++; 499 500 m = p.matcher("cat" + (char)0x0d); 501 m.find(); 502 if (!m.hitEnd()) 503 failCount++; 504 505 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 506 m.find(); 507 if (m.hitEnd()) 508 failCount++; 509 510 report("hitEnd"); 511 } 512 513 // This is for bug 4997476 514 // It is weird code submitted by customer demonstrating a regression 515 private static void wordSearchTest() throws Exception { 516 String testString = new String("word1 word2 word3"); 517 Pattern p = Pattern.compile("\\b"); 518 Matcher m = p.matcher(testString); 519 int position = 0; 520 int start = 0; 521 while (m.find(position)) { 522 start = m.start(); 523 if (start == testString.length()) 524 break; 525 if (m.find(start+1)) { 526 position = m.start(); 527 } else { 528 position = testString.length(); 529 } 530 if (testString.substring(start, position).equals(" ")) 531 continue; 532 if (!testString.substring(start, position-1).startsWith("word")) 533 failCount++; 534 } 535 report("Customer word search"); 536 } 537 538 // This is for bug 4994840 539 private static void caretAtEndTest() throws Exception { 540 // Problem only occurs with multiline patterns 541 // containing a beginning-of-line caret "^" followed 542 // by an expression that also matches the empty string. 543 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 544 Matcher matcher = pattern.matcher("\r"); 545 matcher.find(); 546 matcher.find(); 547 report("Caret at end"); 548 } 549 550 // This test is for 4979006 551 // Check to see if word boundary construct properly handles unicode 552 // non spacing marks 553 private static void unicodeWordBoundsTest() throws Exception { 554 String spaces = " "; 555 String wordChar = "a"; 556 String nsm = "\u030a"; 557 558 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 559 560 Pattern pattern = Pattern.compile("\\b"); 561 Matcher matcher = pattern.matcher(""); 562 // S=other B=word character N=non spacing mark .=word boundary 563 // SS.BB.SS 564 String input = spaces + wordChar + wordChar + spaces; 565 twoFindIndexes(input, matcher, 2, 4); 566 // SS.BBN.SS 567 input = spaces + wordChar +wordChar + nsm + spaces; 568 twoFindIndexes(input, matcher, 2, 5); 569 // SS.BN.SS 570 input = spaces + wordChar + nsm + spaces; 571 twoFindIndexes(input, matcher, 2, 4); 572 // SS.BNN.SS 573 input = spaces + wordChar + nsm + nsm + spaces; 574 twoFindIndexes(input, matcher, 2, 5); 575 // SSN.BB.SS 576 input = spaces + nsm + wordChar + wordChar + spaces; 577 twoFindIndexes(input, matcher, 3, 5); 578 // SS.BNB.SS 579 input = spaces + wordChar + nsm + wordChar + spaces; 580 twoFindIndexes(input, matcher, 2, 5); 581 // SSNNSS 582 input = spaces + nsm + nsm + spaces; 583 matcher.reset(input); 584 if (matcher.find()) 585 failCount++; 586 // SSN.BBN.SS 587 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 588 twoFindIndexes(input, matcher, 3, 6); 589 590 report("Unicode word boundary"); 591 } 592 593 private static void twoFindIndexes(String input, Matcher matcher, int a, 594 int b) throws Exception 595 { 596 matcher.reset(input); 597 matcher.find(); 598 if (matcher.start() != a) 599 failCount++; 600 matcher.find(); 601 if (matcher.start() != b) 602 failCount++; 603 } 604 605 // This test is for 6284152 606 static void check(String regex, String input, String[] expected) { 607 List<String> result = new ArrayList<String>(); 608 Pattern p = Pattern.compile(regex); 609 Matcher m = p.matcher(input); 610 while (m.find()) { 611 result.add(m.group()); 612 } 613 if (!Arrays.asList(expected).equals(result)) 614 failCount++; 615 } 616 617 private static void lookbehindTest() throws Exception { 618 //Positive 619 check("(?<=%.{0,5})foo\\d", 620 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 621 new String[]{"foo1", "foo2", "foo3"}); 622 623 //boundary at end of the lookbehind sub-regex should work consistently 624 //with the boundary just after the lookbehind sub-regex 625 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 626 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 627 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 628 check("(?<!abc \\b)foo", "abc foo", new String[0]); 629 630 //Negative 631 check("(?<!%.{0,5})foo\\d", 632 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 633 new String[] {"foo4", "foo5"}); 634 635 //Positive greedy 636 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 637 638 //Positive reluctant 639 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 640 641 //supplementary 642 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 643 new String[] {"fo\ud800\udc00o"}); 644 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 645 new String[] {"fo\ud800\udc00o"}); 646 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 647 new String[] {"fo\ud800\udc00o"}); 648 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 649 new String[] {"fo\ud800\udc00o"}); 650 report("Lookbehind"); 651 } 652 653 // This test is for 4938995 654 // Check to see if weak region boundaries are transparent to 655 // lookahead and lookbehind constructs 656 private static void boundsTest() throws Exception { 657 String fullMessage = "catdogcat"; 658 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 659 Matcher matcher = pattern.matcher("catdogca"); 660 matcher.useTransparentBounds(true); 661 if (matcher.find()) 662 failCount++; 663 matcher.reset("atdogcat"); 664 if (matcher.find()) 665 failCount++; 666 matcher.reset(fullMessage); 667 if (!matcher.find()) 668 failCount++; 669 matcher.reset(fullMessage); 670 matcher.region(0,9); 671 if (!matcher.find()) 672 failCount++; 673 matcher.reset(fullMessage); 674 matcher.region(0,6); 675 if (!matcher.find()) 676 failCount++; 677 matcher.reset(fullMessage); 678 matcher.region(3,6); 679 if (!matcher.find()) 680 failCount++; 681 matcher.useTransparentBounds(false); 682 if (matcher.find()) 683 failCount++; 684 685 // Negative lookahead/lookbehind 686 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 687 matcher = pattern.matcher("dogcat"); 688 matcher.useTransparentBounds(true); 689 matcher.region(0,3); 690 if (matcher.find()) 691 failCount++; 692 matcher.reset("catdog"); 693 matcher.region(3,6); 694 if (matcher.find()) 695 failCount++; 696 matcher.useTransparentBounds(false); 697 matcher.reset("dogcat"); 698 matcher.region(0,3); 699 if (!matcher.find()) 700 failCount++; 701 matcher.reset("catdog"); 702 matcher.region(3,6); 703 if (!matcher.find()) 704 failCount++; 705 706 report("Region bounds transparency"); 707 } 708 709 // This test is for 4945394 710 private static void findFromTest() throws Exception { 711 String message = "This is 40 $0 message."; 712 Pattern pat = Pattern.compile("\\$0"); 713 Matcher match = pat.matcher(message); 714 if (!match.find()) 715 failCount++; 716 if (match.find()) 717 failCount++; 718 if (match.find()) 719 failCount++; 720 report("Check for alternating find"); 721 } 722 723 // This test is for 4872664 and 4892980 724 private static void negatedCharClassTest() throws Exception { 725 Pattern pattern = Pattern.compile("[^>]"); 726 Matcher matcher = pattern.matcher("\u203A"); 727 if (!matcher.matches()) 728 failCount++; 729 pattern = Pattern.compile("[^fr]"); 730 matcher = pattern.matcher("a"); 731 if (!matcher.find()) 732 failCount++; 733 matcher.reset("\u203A"); 734 if (!matcher.find()) 735 failCount++; 736 String s = "for"; 737 String result[] = s.split("[^fr]"); 738 if (!result[0].equals("f")) 739 failCount++; 740 if (!result[1].equals("r")) 741 failCount++; 742 s = "f\u203Ar"; 743 result = s.split("[^fr]"); 744 if (!result[0].equals("f")) 745 failCount++; 746 if (!result[1].equals("r")) 747 failCount++; 748 749 // Test adding to bits, subtracting a node, then adding to bits again 750 pattern = Pattern.compile("[^f\u203Ar]"); 751 matcher = pattern.matcher("a"); 752 if (!matcher.find()) 753 failCount++; 754 matcher.reset("f"); 755 if (matcher.find()) 756 failCount++; 757 matcher.reset("\u203A"); 758 if (matcher.find()) 759 failCount++; 760 matcher.reset("r"); 761 if (matcher.find()) 762 failCount++; 763 matcher.reset("\u203B"); 764 if (!matcher.find()) 765 failCount++; 766 767 // Test subtracting a node, adding to bits, subtracting again 768 pattern = Pattern.compile("[^\u203Ar\u203B]"); 769 matcher = pattern.matcher("a"); 770 if (!matcher.find()) 771 failCount++; 772 matcher.reset("\u203A"); 773 if (matcher.find()) 774 failCount++; 775 matcher.reset("r"); 776 if (matcher.find()) 777 failCount++; 778 matcher.reset("\u203B"); 779 if (matcher.find()) 780 failCount++; 781 matcher.reset("\u203C"); 782 if (!matcher.find()) 783 failCount++; 784 785 report("Negated Character Class"); 786 } 787 788 // This test is for 4628291 789 private static void toStringTest() throws Exception { 790 Pattern pattern = Pattern.compile("b+"); 791 if (pattern.toString() != "b+") 792 failCount++; 793 Matcher matcher = pattern.matcher("aaabbbccc"); 794 String matcherString = matcher.toString(); // unspecified 795 matcher.find(); 796 matcherString = matcher.toString(); // unspecified 797 matcher.region(0,3); 798 matcherString = matcher.toString(); // unspecified 799 matcher.reset(); 800 matcherString = matcher.toString(); // unspecified 801 report("toString"); 802 } 803 804 // This test is for 4808962 805 private static void literalPatternTest() throws Exception { 806 int flags = Pattern.LITERAL; 807 808 Pattern pattern = Pattern.compile("abc\\t$^", flags); 809 check(pattern, "abc\\t$^", true); 810 811 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 812 check(pattern, "abc\\t$^", true); 813 814 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 815 check(pattern, "\\Qa^$bcabc\\E", true); 816 check(pattern, "a^$bcabc", false); 817 818 pattern = Pattern.compile("\\\\Q\\\\E"); 819 check(pattern, "\\Q\\E", true); 820 821 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 822 check(pattern, "abcefg\\Q\\Ehij", true); 823 824 pattern = Pattern.compile("\\\\\\Q\\\\E"); 825 check(pattern, "\\\\\\\\", true); 826 827 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 828 check(pattern, "\\Qa^$bcabc\\E", true); 829 check(pattern, "a^$bcabc", false); 830 831 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 832 check(pattern, "\\Qabc\\Edef", true); 833 check(pattern, "abcdef", false); 834 835 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 836 check(pattern, "abc\\Edef", true); 837 check(pattern, "abcdef", false); 838 839 pattern = Pattern.compile(Pattern.quote("\\E")); 840 check(pattern, "\\E", true); 841 842 pattern = Pattern.compile("((((abc.+?:)", flags); 843 check(pattern, "((((abc.+?:)", true); 844 845 flags |= Pattern.MULTILINE; 846 847 pattern = Pattern.compile("^cat$", flags); 848 check(pattern, "abc^cat$def", true); 849 check(pattern, "cat", false); 850 851 flags |= Pattern.CASE_INSENSITIVE; 852 853 pattern = Pattern.compile("abcdef", flags); 854 check(pattern, "ABCDEF", true); 855 check(pattern, "AbCdEf", true); 856 857 flags |= Pattern.DOTALL; 858 859 pattern = Pattern.compile("a...b", flags); 860 check(pattern, "A...b", true); 861 check(pattern, "Axxxb", false); 862 863 flags |= Pattern.CANON_EQ; 864 865 Pattern p = Pattern.compile("testa\u030a", flags); 866 check(pattern, "testa\u030a", false); 867 check(pattern, "test\u00e5", false); 868 869 // Supplementary character test 870 flags = Pattern.LITERAL; 871 872 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 873 check(pattern, toSupplementaries("abc\\t$^"), true); 874 875 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 876 check(pattern, toSupplementaries("abc\\t$^"), true); 877 878 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 879 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 880 check(pattern, toSupplementaries("a^$bcabc"), false); 881 882 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 883 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 884 check(pattern, toSupplementaries("a^$bcabc"), false); 885 886 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 887 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 888 check(pattern, toSupplementaries("abcdef"), false); 889 890 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 891 check(pattern, toSupplementaries("abc\\Edef"), true); 892 check(pattern, toSupplementaries("abcdef"), false); 893 894 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 895 check(pattern, toSupplementaries("((((abc.+?:)"), true); 896 897 flags |= Pattern.MULTILINE; 898 899 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 900 check(pattern, toSupplementaries("abc^cat$def"), true); 901 check(pattern, toSupplementaries("cat"), false); 902 903 flags |= Pattern.DOTALL; 904 905 // note: this is case-sensitive. 906 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 907 check(pattern, toSupplementaries("a...b"), true); 908 check(pattern, toSupplementaries("axxxb"), false); 909 910 flags |= Pattern.CANON_EQ; 911 912 String t = toSupplementaries("test"); 913 p = Pattern.compile(t + "a\u030a", flags); 914 check(pattern, t + "a\u030a", false); 915 check(pattern, t + "\u00e5", false); 916 917 report("Literal pattern"); 918 } 919 920 // This test is for 4803179 921 // This test is also for 4808962, replacement parts 922 private static void literalReplacementTest() throws Exception { 923 int flags = Pattern.LITERAL; 924 925 Pattern pattern = Pattern.compile("abc", flags); 926 Matcher matcher = pattern.matcher("zzzabczzz"); 927 String replaceTest = "$0"; 928 String result = matcher.replaceAll(replaceTest); 929 if (!result.equals("zzzabczzz")) 930 failCount++; 931 932 matcher.reset(); 933 String literalReplacement = matcher.quoteReplacement(replaceTest); 934 result = matcher.replaceAll(literalReplacement); 935 if (!result.equals("zzz$0zzz")) 936 failCount++; 937 938 matcher.reset(); 939 replaceTest = "\\t$\\$"; 940 literalReplacement = matcher.quoteReplacement(replaceTest); 941 result = matcher.replaceAll(literalReplacement); 942 if (!result.equals("zzz\\t$\\$zzz")) 943 failCount++; 944 945 // Supplementary character test 946 pattern = Pattern.compile(toSupplementaries("abc"), flags); 947 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 948 replaceTest = "$0"; 949 result = matcher.replaceAll(replaceTest); 950 if (!result.equals(toSupplementaries("zzzabczzz"))) 951 failCount++; 952 953 matcher.reset(); 954 literalReplacement = matcher.quoteReplacement(replaceTest); 955 result = matcher.replaceAll(literalReplacement); 956 if (!result.equals(toSupplementaries("zzz$0zzz"))) 957 failCount++; 958 959 matcher.reset(); 960 replaceTest = "\\t$\\$"; 961 literalReplacement = matcher.quoteReplacement(replaceTest); 962 result = matcher.replaceAll(literalReplacement); 963 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 964 failCount++; 965 966 // IAE should be thrown if backslash or '$' is the last character 967 // in replacement string 968 try { 969 "\uac00".replaceAll("\uac00", "$"); 970 failCount++; 971 } catch (IllegalArgumentException iie) { 972 } catch (Exception e) { 973 failCount++; 974 } 975 try { 976 "\uac00".replaceAll("\uac00", "\\"); 977 failCount++; 978 } catch (IllegalArgumentException iie) { 979 } catch (Exception e) { 980 failCount++; 981 } 982 report("Literal replacement"); 983 } 984 985 // This test is for 4757029 986 private static void regionTest() throws Exception { 987 Pattern pattern = Pattern.compile("abc"); 988 Matcher matcher = pattern.matcher("abcdefabc"); 989 990 matcher.region(0,9); 991 if (!matcher.find()) 992 failCount++; 993 if (!matcher.find()) 994 failCount++; 995 matcher.region(0,3); 996 if (!matcher.find()) 997 failCount++; 998 matcher.region(3,6); 999 if (matcher.find()) 1000 failCount++; 1001 matcher.region(0,2); 1002 if (matcher.find()) 1003 failCount++; 1004 1005 expectRegionFail(matcher, 1, -1); 1006 expectRegionFail(matcher, -1, -1); 1007 expectRegionFail(matcher, -1, 1); 1008 expectRegionFail(matcher, 5, 3); 1009 expectRegionFail(matcher, 5, 12); 1010 expectRegionFail(matcher, 12, 12); 1011 1012 pattern = Pattern.compile("^abc$"); 1013 matcher = pattern.matcher("zzzabczzz"); 1014 matcher.region(0,9); 1015 if (matcher.find()) 1016 failCount++; 1017 matcher.region(3,6); 1018 if (!matcher.find()) 1019 failCount++; 1020 matcher.region(3,6); 1021 matcher.useAnchoringBounds(false); 1022 if (matcher.find()) 1023 failCount++; 1024 1025 // Supplementary character test 1026 pattern = Pattern.compile(toSupplementaries("abc")); 1027 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1028 matcher.region(0,9*2); 1029 if (!matcher.find()) 1030 failCount++; 1031 if (!matcher.find()) 1032 failCount++; 1033 matcher.region(0,3*2); 1034 if (!matcher.find()) 1035 failCount++; 1036 matcher.region(1,3*2); 1037 if (matcher.find()) 1038 failCount++; 1039 matcher.region(3*2,6*2); 1040 if (matcher.find()) 1041 failCount++; 1042 matcher.region(0,2*2); 1043 if (matcher.find()) 1044 failCount++; 1045 matcher.region(0,2*2+1); 1046 if (matcher.find()) 1047 failCount++; 1048 1049 expectRegionFail(matcher, 1*2, -1); 1050 expectRegionFail(matcher, -1, -1); 1051 expectRegionFail(matcher, -1, 1*2); 1052 expectRegionFail(matcher, 5*2, 3*2); 1053 expectRegionFail(matcher, 5*2, 12*2); 1054 expectRegionFail(matcher, 12*2, 12*2); 1055 1056 pattern = Pattern.compile(toSupplementaries("^abc$")); 1057 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1058 matcher.region(0,9*2); 1059 if (matcher.find()) 1060 failCount++; 1061 matcher.region(3*2,6*2); 1062 if (!matcher.find()) 1063 failCount++; 1064 matcher.region(3*2+1,6*2); 1065 if (matcher.find()) 1066 failCount++; 1067 matcher.region(3*2,6*2-1); 1068 if (matcher.find()) 1069 failCount++; 1070 matcher.region(3*2,6*2); 1071 matcher.useAnchoringBounds(false); 1072 if (matcher.find()) 1073 failCount++; 1074 1075 // JDK-8230829 1076 pattern = Pattern.compile("\\ud800\\udc61"); 1077 matcher = pattern.matcher("\ud800\udc61"); 1078 matcher.region(0, 1); 1079 if (matcher.find()) { 1080 failCount++; 1081 System.out.println("Matched a surrogate pair" + 1082 " that crosses border of region"); 1083 } 1084 if (!matcher.hitEnd()) { 1085 failCount++; 1086 System.out.println("Expected to hit the end when" + 1087 " matching a surrogate pair crossing region"); 1088 } 1089 1090 report("Regions"); 1091 } 1092 1093 private static void expectRegionFail(Matcher matcher, int index1, 1094 int index2) 1095 { 1096 try { 1097 matcher.region(index1, index2); 1098 failCount++; 1099 } catch (IndexOutOfBoundsException ioobe) { 1100 // Correct result 1101 } catch (IllegalStateException ise) { 1102 // Correct result 1103 } 1104 } 1105 1106 // This test is for 4803197 1107 private static void escapedSegmentTest() throws Exception { 1108 1109 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1110 check(pattern, "dir1\\dir2", true); 1111 1112 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1113 check(pattern, "dir1\\dir2\\", true); 1114 1115 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1116 check(pattern, "dir1\\dir2\\", true); 1117 1118 // Supplementary character test 1119 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1120 check(pattern, toSupplementaries("dir1\\dir2"), true); 1121 1122 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1123 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1124 1125 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1126 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1127 1128 report("Escaped segment"); 1129 } 1130 1131 // This test is for 4792284 1132 private static void nonCaptureRepetitionTest() throws Exception { 1133 String input = "abcdefgh;"; 1134 1135 String[] patterns = new String[] { 1136 "(?:\\w{4})+;", 1137 "(?:\\w{8})*;", 1138 "(?:\\w{2}){2,4};", 1139 "(?:\\w{4}){2,};", // only matches the 1140 ".*?(?:\\w{5})+;", // specified minimum 1141 ".*?(?:\\w{9})*;", // number of reps - OK 1142 "(?:\\w{4})+?;", // lazy repetition - OK 1143 "(?:\\w{4})++;", // possessive repetition - OK 1144 "(?:\\w{2,}?)+;", // non-deterministic - OK 1145 "(\\w{4})+;", // capturing group - OK 1146 }; 1147 1148 for (int i = 0; i < patterns.length; i++) { 1149 // Check find() 1150 check(patterns[i], 0, input, input, true); 1151 // Check matches() 1152 Pattern p = Pattern.compile(patterns[i]); 1153 Matcher m = p.matcher(input); 1154 1155 if (m.matches()) { 1156 if (!m.group(0).equals(input)) 1157 failCount++; 1158 } else { 1159 failCount++; 1160 } 1161 } 1162 1163 report("Non capturing repetition"); 1164 } 1165 1166 // This test is for 6358731 1167 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1168 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1169 Matcher matcher = pattern.matcher("abcd"); 1170 if (!matcher.matches() || 1171 matcher.group(1) != null || 1172 !matcher.group(2).equals("abcd")) { 1173 failCount++; 1174 } 1175 report("Not captured GroupCurly"); 1176 } 1177 1178 // This test is for 4706545 1179 private static void javaCharClassTest() throws Exception { 1180 for (int i=0; i<1000; i++) { 1181 char c = (char)generator.nextInt(); 1182 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1183 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1184 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1185 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1186 check("{javaDigit}", c, Character.isDigit(c)); 1187 check("{javaDefined}", c, Character.isDefined(c)); 1188 check("{javaLetter}", c, Character.isLetter(c)); 1189 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1190 check("{javaJavaIdentifierStart}", c, 1191 Character.isJavaIdentifierStart(c)); 1192 check("{javaJavaIdentifierPart}", c, 1193 Character.isJavaIdentifierPart(c)); 1194 check("{javaUnicodeIdentifierStart}", c, 1195 Character.isUnicodeIdentifierStart(c)); 1196 check("{javaUnicodeIdentifierPart}", c, 1197 Character.isUnicodeIdentifierPart(c)); 1198 check("{javaIdentifierIgnorable}", c, 1199 Character.isIdentifierIgnorable(c)); 1200 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1201 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1202 check("{javaISOControl}", c, Character.isISOControl(c)); 1203 check("{javaMirrored}", c, Character.isMirrored(c)); 1204 1205 } 1206 1207 // Supplementary character test 1208 for (int i=0; i<1000; i++) { 1209 int c = generator.nextInt(Character.MAX_CODE_POINT 1210 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1211 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1212 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1213 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1214 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1215 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1216 check("{javaDigit}", c, Character.isDigit(c)); 1217 check("{javaDefined}", c, Character.isDefined(c)); 1218 check("{javaLetter}", c, Character.isLetter(c)); 1219 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1220 check("{javaJavaIdentifierStart}", c, 1221 Character.isJavaIdentifierStart(c)); 1222 check("{javaJavaIdentifierPart}", c, 1223 Character.isJavaIdentifierPart(c)); 1224 check("{javaUnicodeIdentifierStart}", c, 1225 Character.isUnicodeIdentifierStart(c)); 1226 check("{javaUnicodeIdentifierPart}", c, 1227 Character.isUnicodeIdentifierPart(c)); 1228 check("{javaIdentifierIgnorable}", c, 1229 Character.isIdentifierIgnorable(c)); 1230 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1231 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1232 check("{javaISOControl}", c, Character.isISOControl(c)); 1233 check("{javaMirrored}", c, Character.isMirrored(c)); 1234 } 1235 1236 report("Java character classes"); 1237 } 1238 1239 // This test is for 4523620 1240 /* 1241 private static void numOccurrencesTest() throws Exception { 1242 Pattern pattern = Pattern.compile("aaa"); 1243 1244 if (pattern.numOccurrences("aaaaaa", false) != 2) 1245 failCount++; 1246 if (pattern.numOccurrences("aaaaaa", true) != 4) 1247 failCount++; 1248 1249 pattern = Pattern.compile("^"); 1250 if (pattern.numOccurrences("aaaaaa", false) != 1) 1251 failCount++; 1252 if (pattern.numOccurrences("aaaaaa", true) != 1) 1253 failCount++; 1254 1255 report("Number of Occurrences"); 1256 } 1257 */ 1258 1259 // This test is for 4776374 1260 private static void caretBetweenTerminatorsTest() throws Exception { 1261 int flags1 = Pattern.DOTALL; 1262 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1263 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1264 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1265 1266 check("^....", flags1, "test\ntest", "test", true); 1267 check(".....^", flags1, "test\ntest", "test", false); 1268 check(".....^", flags1, "test\n", "test", false); 1269 check("....^", flags1, "test\r\n", "test", false); 1270 1271 check("^....", flags2, "test\ntest", "test", true); 1272 check("....^", flags2, "test\ntest", "test", false); 1273 check(".....^", flags2, "test\n", "test", false); 1274 check("....^", flags2, "test\r\n", "test", false); 1275 1276 check("^....", flags3, "test\ntest", "test", true); 1277 check(".....^", flags3, "test\ntest", "test\n", true); 1278 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1279 check(".....^", flags3, "test\n", "test", false); 1280 check(".....^", flags3, "test\r\n", "test", false); 1281 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1282 1283 check("^....", flags4, "test\ntest", "test", true); 1284 check(".....^", flags3, "test\ntest", "test\n", true); 1285 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1286 check(".....^", flags4, "test\n", "test\n", false); 1287 check(".....^", flags4, "test\r\n", "test\r", false); 1288 1289 // Supplementary character test 1290 String t = toSupplementaries("test"); 1291 check("^....", flags1, t+"\n"+t, t, true); 1292 check(".....^", flags1, t+"\n"+t, t, false); 1293 check(".....^", flags1, t+"\n", t, false); 1294 check("....^", flags1, t+"\r\n", t, false); 1295 1296 check("^....", flags2, t+"\n"+t, t, true); 1297 check("....^", flags2, t+"\n"+t, t, false); 1298 check(".....^", flags2, t+"\n", t, false); 1299 check("....^", flags2, t+"\r\n", t, false); 1300 1301 check("^....", flags3, t+"\n"+t, t, true); 1302 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1303 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1304 check(".....^", flags3, t+"\n", t, false); 1305 check(".....^", flags3, t+"\r\n", t, false); 1306 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1307 1308 check("^....", flags4, t+"\n"+t, t, true); 1309 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1310 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1311 check(".....^", flags4, t+"\n", t+"\n", false); 1312 check(".....^", flags4, t+"\r\n", t+"\r", false); 1313 1314 report("Caret between terminators"); 1315 } 1316 1317 // This test is for 4727935 1318 private static void dollarAtEndTest() throws Exception { 1319 int flags1 = Pattern.DOTALL; 1320 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1321 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1322 1323 check("....$", flags1, "test\n", "test", true); 1324 check("....$", flags1, "test\r\n", "test", true); 1325 check(".....$", flags1, "test\n", "test\n", true); 1326 check(".....$", flags1, "test\u0085", "test\u0085", true); 1327 check("....$", flags1, "test\u0085", "test", true); 1328 1329 check("....$", flags2, "test\n", "test", true); 1330 check(".....$", flags2, "test\n", "test\n", true); 1331 check(".....$", flags2, "test\u0085", "test\u0085", true); 1332 check("....$", flags2, "test\u0085", "est\u0085", true); 1333 1334 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1335 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1336 check("....$blah", flags3, "test\nblah", "!!!!", false); 1337 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1338 1339 // Supplementary character test 1340 String t = toSupplementaries("test"); 1341 String b = toSupplementaries("blah"); 1342 check("....$", flags1, t+"\n", t, true); 1343 check("....$", flags1, t+"\r\n", t, true); 1344 check(".....$", flags1, t+"\n", t+"\n", true); 1345 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1346 check("....$", flags1, t+"\u0085", t, true); 1347 1348 check("....$", flags2, t+"\n", t, true); 1349 check(".....$", flags2, t+"\n", t+"\n", true); 1350 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1351 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1352 1353 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1354 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1355 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1356 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1357 1358 report("Dollar at End"); 1359 } 1360 1361 // This test is for 4711773 1362 private static void multilineDollarTest() throws Exception { 1363 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1364 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1365 matcher.find(); 1366 if (matcher.start(0) != 9) 1367 failCount++; 1368 matcher.find(); 1369 if (matcher.start(0) != 20) 1370 failCount++; 1371 1372 // Supplementary character test 1373 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1374 matcher.find(); 1375 if (matcher.start(0) != 9*2) 1376 failCount++; 1377 matcher.find(); 1378 if (matcher.start(0) != 20*2) 1379 failCount++; 1380 1381 report("Multiline Dollar"); 1382 } 1383 1384 private static void reluctantRepetitionTest() throws Exception { 1385 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1386 check(p, "1 word word word 2", true); 1387 check(p, "1 wor wo w 2", true); 1388 check(p, "1 word word 2", true); 1389 check(p, "1 word 2", true); 1390 check(p, "1 wo w w 2", true); 1391 check(p, "1 wo w 2", true); 1392 check(p, "1 wor w 2", true); 1393 1394 p = Pattern.compile("([a-z])+?c"); 1395 Matcher m = p.matcher("ababcdefdec"); 1396 check(m, "ababc"); 1397 1398 // Supplementary character test 1399 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1400 m = p.matcher(toSupplementaries("ababcdefdec")); 1401 check(m, toSupplementaries("ababc")); 1402 1403 report("Reluctant Repetition"); 1404 } 1405 1406 private static Pattern serializedPattern(Pattern p) throws Exception { 1407 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1408 ObjectOutputStream oos = new ObjectOutputStream(baos); 1409 oos.writeObject(p); 1410 oos.close(); 1411 try (ObjectInputStream ois = new ObjectInputStream( 1412 new ByteArrayInputStream(baos.toByteArray()))) { 1413 return (Pattern)ois.readObject(); 1414 } 1415 } 1416 1417 private static void serializeTest() throws Exception { 1418 String patternStr = "(b)"; 1419 String matchStr = "b"; 1420 Pattern pattern = Pattern.compile(patternStr); 1421 Pattern serializedPattern = serializedPattern(pattern); 1422 Matcher matcher = serializedPattern.matcher(matchStr); 1423 if (!matcher.matches()) 1424 failCount++; 1425 if (matcher.groupCount() != 1) 1426 failCount++; 1427 1428 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1429 serializedPattern = serializedPattern(pattern); 1430 if (!serializedPattern.matcher("Ab").matches()) 1431 failCount++; 1432 if (serializedPattern.matcher("AB").matches()) 1433 failCount++; 1434 1435 report("Serialization"); 1436 } 1437 1438 private static void gTest() { 1439 Pattern pattern = Pattern.compile("\\G\\w"); 1440 Matcher matcher = pattern.matcher("abc#x#x"); 1441 matcher.find(); 1442 matcher.find(); 1443 matcher.find(); 1444 if (matcher.find()) 1445 failCount++; 1446 1447 pattern = Pattern.compile("\\GA*"); 1448 matcher = pattern.matcher("1A2AA3"); 1449 matcher.find(); 1450 if (matcher.find()) 1451 failCount++; 1452 1453 pattern = Pattern.compile("\\GA*"); 1454 matcher = pattern.matcher("1A2AA3"); 1455 if (!matcher.find(1)) 1456 failCount++; 1457 matcher.find(); 1458 if (matcher.find()) 1459 failCount++; 1460 1461 report("\\G"); 1462 } 1463 1464 private static void zTest() { 1465 Pattern pattern = Pattern.compile("foo\\Z"); 1466 // Positives 1467 check(pattern, "foo\u0085", true); 1468 check(pattern, "foo\u2028", true); 1469 check(pattern, "foo\u2029", true); 1470 check(pattern, "foo\n", true); 1471 check(pattern, "foo\r", true); 1472 check(pattern, "foo\r\n", true); 1473 // Negatives 1474 check(pattern, "fooo", false); 1475 check(pattern, "foo\n\r", false); 1476 1477 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1478 // Positives 1479 check(pattern, "foo", true); 1480 check(pattern, "foo\n", true); 1481 // Negatives 1482 check(pattern, "foo\r", false); 1483 check(pattern, "foo\u0085", false); 1484 check(pattern, "foo\u2028", false); 1485 check(pattern, "foo\u2029", false); 1486 1487 report("\\Z"); 1488 } 1489 1490 private static void replaceFirstTest() { 1491 Pattern pattern = Pattern.compile("(ab)(c*)"); 1492 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1493 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1494 failCount++; 1495 1496 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1497 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1498 failCount++; 1499 1500 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1501 String result = matcher.replaceFirst("$1"); 1502 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1503 failCount++; 1504 1505 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1506 result = matcher.replaceFirst("$2"); 1507 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1508 failCount++; 1509 1510 pattern = Pattern.compile("a*"); 1511 matcher = pattern.matcher("aaaaaaaaaa"); 1512 if (!matcher.replaceFirst("test").equals("test")) 1513 failCount++; 1514 1515 pattern = Pattern.compile("a+"); 1516 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1517 if (!matcher.replaceFirst("test").equals("zzztest")) 1518 failCount++; 1519 1520 // Supplementary character test 1521 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1522 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1523 if (!matcher.replaceFirst(toSupplementaries("test")) 1524 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1525 failCount++; 1526 1527 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1528 if (!matcher.replaceFirst(toSupplementaries("test")). 1529 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1530 failCount++; 1531 1532 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1533 result = matcher.replaceFirst("$1"); 1534 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1535 failCount++; 1536 1537 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1538 result = matcher.replaceFirst("$2"); 1539 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1540 failCount++; 1541 1542 pattern = Pattern.compile(toSupplementaries("a*")); 1543 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1544 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1545 failCount++; 1546 1547 pattern = Pattern.compile(toSupplementaries("a+")); 1548 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1549 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1550 failCount++; 1551 1552 report("Replace First"); 1553 } 1554 1555 private static void unixLinesTest() { 1556 Pattern pattern = Pattern.compile(".*"); 1557 Matcher matcher = pattern.matcher("aa\u2028blah"); 1558 matcher.find(); 1559 if (!matcher.group(0).equals("aa")) 1560 failCount++; 1561 1562 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1563 matcher = pattern.matcher("aa\u2028blah"); 1564 matcher.find(); 1565 if (!matcher.group(0).equals("aa\u2028blah")) 1566 failCount++; 1567 1568 pattern = Pattern.compile("[az]$", 1569 Pattern.MULTILINE | Pattern.UNIX_LINES); 1570 matcher = pattern.matcher("aa\u2028zz"); 1571 check(matcher, "a\u2028", false); 1572 1573 // Supplementary character test 1574 pattern = Pattern.compile(".*"); 1575 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1576 matcher.find(); 1577 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1578 failCount++; 1579 1580 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1581 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1582 matcher.find(); 1583 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1584 failCount++; 1585 1586 pattern = Pattern.compile(toSupplementaries("[az]$"), 1587 Pattern.MULTILINE | Pattern.UNIX_LINES); 1588 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1589 check(matcher, toSupplementaries("a\u2028"), false); 1590 1591 report("Unix Lines"); 1592 } 1593 1594 private static void commentsTest() { 1595 int flags = Pattern.COMMENTS; 1596 1597 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1598 Matcher matcher = pattern.matcher("aa#aa"); 1599 if (!matcher.matches()) 1600 failCount++; 1601 1602 pattern = Pattern.compile("aa # blah", flags); 1603 matcher = pattern.matcher("aa"); 1604 if (!matcher.matches()) 1605 failCount++; 1606 1607 pattern = Pattern.compile("aa blah", flags); 1608 matcher = pattern.matcher("aablah"); 1609 if (!matcher.matches()) 1610 failCount++; 1611 1612 pattern = Pattern.compile("aa # blah blech ", flags); 1613 matcher = pattern.matcher("aa"); 1614 if (!matcher.matches()) 1615 failCount++; 1616 1617 pattern = Pattern.compile("aa # blah\n ", flags); 1618 matcher = pattern.matcher("aa"); 1619 if (!matcher.matches()) 1620 failCount++; 1621 1622 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1623 matcher = pattern.matcher("aabc"); 1624 if (!matcher.matches()) 1625 failCount++; 1626 1627 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1628 matcher = pattern.matcher("aabc"); 1629 if (!matcher.matches()) 1630 failCount++; 1631 1632 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1633 matcher = pattern.matcher("aabc#blech"); 1634 if (!matcher.matches()) 1635 failCount++; 1636 1637 // Supplementary character test 1638 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1639 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1640 if (!matcher.matches()) 1641 failCount++; 1642 1643 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1644 matcher = pattern.matcher(toSupplementaries("aa")); 1645 if (!matcher.matches()) 1646 failCount++; 1647 1648 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1649 matcher = pattern.matcher(toSupplementaries("aablah")); 1650 if (!matcher.matches()) 1651 failCount++; 1652 1653 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1654 matcher = pattern.matcher(toSupplementaries("aa")); 1655 if (!matcher.matches()) 1656 failCount++; 1657 1658 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1659 matcher = pattern.matcher(toSupplementaries("aa")); 1660 if (!matcher.matches()) 1661 failCount++; 1662 1663 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1664 matcher = pattern.matcher(toSupplementaries("aabc")); 1665 if (!matcher.matches()) 1666 failCount++; 1667 1668 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1669 matcher = pattern.matcher(toSupplementaries("aabc")); 1670 if (!matcher.matches()) 1671 failCount++; 1672 1673 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1674 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1675 if (!matcher.matches()) 1676 failCount++; 1677 1678 report("Comments"); 1679 } 1680 1681 private static void caseFoldingTest() { // bug 4504687 1682 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1683 Pattern pattern = Pattern.compile("aa", flags); 1684 Matcher matcher = pattern.matcher("ab"); 1685 if (matcher.matches()) 1686 failCount++; 1687 1688 pattern = Pattern.compile("aA", flags); 1689 matcher = pattern.matcher("ab"); 1690 if (matcher.matches()) 1691 failCount++; 1692 1693 pattern = Pattern.compile("aa", flags); 1694 matcher = pattern.matcher("aB"); 1695 if (matcher.matches()) 1696 failCount++; 1697 matcher = pattern.matcher("Ab"); 1698 if (matcher.matches()) 1699 failCount++; 1700 1701 // ASCII "a" 1702 // Latin-1 Supplement "a" + grave 1703 // Cyrillic "a" 1704 String[] patterns = new String[] { 1705 //single 1706 "a", "\u00e0", "\u0430", 1707 //slice 1708 "ab", "\u00e0\u00e1", "\u0430\u0431", 1709 //class single 1710 "[a]", "[\u00e0]", "[\u0430]", 1711 //class range 1712 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1713 //back reference 1714 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1715 }; 1716 1717 String[] texts = new String[] { 1718 "A", "\u00c0", "\u0410", 1719 "AB", "\u00c0\u00c1", "\u0410\u0411", 1720 "A", "\u00c0", "\u0410", 1721 "B", "\u00c2", "\u0411", 1722 "aA", "\u00e0\u00c0", "\u0430\u0410" 1723 }; 1724 1725 boolean[] expected = new boolean[] { 1726 true, false, false, 1727 true, false, false, 1728 true, false, false, 1729 true, false, false, 1730 true, false, false 1731 }; 1732 1733 flags = Pattern.CASE_INSENSITIVE; 1734 for (int i = 0; i < patterns.length; i++) { 1735 pattern = Pattern.compile(patterns[i], flags); 1736 matcher = pattern.matcher(texts[i]); 1737 if (matcher.matches() != expected[i]) { 1738 System.out.println("<1> Failed at " + i); 1739 failCount++; 1740 } 1741 } 1742 1743 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1744 for (int i = 0; i < patterns.length; i++) { 1745 pattern = Pattern.compile(patterns[i], flags); 1746 matcher = pattern.matcher(texts[i]); 1747 if (!matcher.matches()) { 1748 System.out.println("<2> Failed at " + i); 1749 failCount++; 1750 } 1751 } 1752 // flag unicode_case alone should do nothing 1753 flags = Pattern.UNICODE_CASE; 1754 for (int i = 0; i < patterns.length; i++) { 1755 pattern = Pattern.compile(patterns[i], flags); 1756 matcher = pattern.matcher(texts[i]); 1757 if (matcher.matches()) { 1758 System.out.println("<3> Failed at " + i); 1759 failCount++; 1760 } 1761 } 1762 1763 // Special cases: i, I, u+0131 and u+0130 1764 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1765 pattern = Pattern.compile("[h-j]+", flags); 1766 if (!pattern.matcher("\u0131\u0130").matches()) 1767 failCount++; 1768 report("Case Folding"); 1769 } 1770 1771 private static void appendTest() { 1772 Pattern pattern = Pattern.compile("(ab)(cd)"); 1773 Matcher matcher = pattern.matcher("abcd"); 1774 String result = matcher.replaceAll("$2$1"); 1775 if (!result.equals("cdab")) 1776 failCount++; 1777 1778 String s1 = "Swap all: first = 123, second = 456"; 1779 String s2 = "Swap one: first = 123, second = 456"; 1780 String r = "$3$2$1"; 1781 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1782 matcher = pattern.matcher(s1); 1783 1784 result = matcher.replaceAll(r); 1785 if (!result.equals("Swap all: 123 = first, 456 = second")) 1786 failCount++; 1787 1788 matcher = pattern.matcher(s2); 1789 1790 if (matcher.find()) { 1791 StringBuffer sb = new StringBuffer(); 1792 matcher.appendReplacement(sb, r); 1793 matcher.appendTail(sb); 1794 result = sb.toString(); 1795 if (!result.equals("Swap one: 123 = first, second = 456")) 1796 failCount++; 1797 } 1798 1799 // Supplementary character test 1800 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1801 matcher = pattern.matcher(toSupplementaries("abcd")); 1802 result = matcher.replaceAll("$2$1"); 1803 if (!result.equals(toSupplementaries("cdab"))) 1804 failCount++; 1805 1806 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1807 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1808 r = toSupplementaries("$3$2$1"); 1809 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1810 matcher = pattern.matcher(s1); 1811 1812 result = matcher.replaceAll(r); 1813 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1814 failCount++; 1815 1816 matcher = pattern.matcher(s2); 1817 1818 if (matcher.find()) { 1819 StringBuffer sb = new StringBuffer(); 1820 matcher.appendReplacement(sb, r); 1821 matcher.appendTail(sb); 1822 result = sb.toString(); 1823 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1824 failCount++; 1825 } 1826 report("Append"); 1827 } 1828 1829 private static void splitTest() { 1830 Pattern pattern = Pattern.compile(":"); 1831 String[] result = pattern.split("foo:and:boo", 2); 1832 if (!result[0].equals("foo")) 1833 failCount++; 1834 if (!result[1].equals("and:boo")) 1835 failCount++; 1836 // Supplementary character test 1837 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1838 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1839 if (!result[0].equals(toSupplementaries("foo"))) 1840 failCount++; 1841 if (!result[1].equals(toSupplementaries("andXboo"))) 1842 failCount++; 1843 1844 CharBuffer cb = CharBuffer.allocate(100); 1845 cb.put("foo:and:boo"); 1846 cb.flip(); 1847 result = pattern.split(cb); 1848 if (!result[0].equals("foo")) 1849 failCount++; 1850 if (!result[1].equals("and")) 1851 failCount++; 1852 if (!result[2].equals("boo")) 1853 failCount++; 1854 1855 // Supplementary character test 1856 CharBuffer cbs = CharBuffer.allocate(100); 1857 cbs.put(toSupplementaries("fooXandXboo")); 1858 cbs.flip(); 1859 result = patternX.split(cbs); 1860 if (!result[0].equals(toSupplementaries("foo"))) 1861 failCount++; 1862 if (!result[1].equals(toSupplementaries("and"))) 1863 failCount++; 1864 if (!result[2].equals(toSupplementaries("boo"))) 1865 failCount++; 1866 1867 String source = "0123456789"; 1868 for (int limit=-2; limit<3; limit++) { 1869 for (int x=0; x<10; x++) { 1870 result = source.split(Integer.toString(x), limit); 1871 int expectedLength = limit < 1 ? 2 : limit; 1872 1873 if ((limit == 0) && (x == 9)) { 1874 // expected dropping of "" 1875 if (result.length != 1) 1876 failCount++; 1877 if (!result[0].equals("012345678")) { 1878 failCount++; 1879 } 1880 } else { 1881 if (result.length != expectedLength) { 1882 failCount++; 1883 } 1884 if (!result[0].equals(source.substring(0,x))) { 1885 if (limit != 1) { 1886 failCount++; 1887 } else { 1888 if (!result[0].equals(source.substring(0,10))) { 1889 failCount++; 1890 } 1891 } 1892 } 1893 if (expectedLength > 1) { // Check segment 2 1894 if (!result[1].equals(source.substring(x+1,10))) 1895 failCount++; 1896 } 1897 } 1898 } 1899 } 1900 // Check the case for no match found 1901 for (int limit=-2; limit<3; limit++) { 1902 result = source.split("e", limit); 1903 if (result.length != 1) 1904 failCount++; 1905 if (!result[0].equals(source)) 1906 failCount++; 1907 } 1908 // Check the case for limit == 0, source = ""; 1909 // split() now returns 0-length for empty source "" see #6559590 1910 source = ""; 1911 result = source.split("e", 0); 1912 if (result.length != 1) 1913 failCount++; 1914 if (!result[0].equals(source)) 1915 failCount++; 1916 1917 // Check both split() and splitAsStraem(), especially for zero-lenth 1918 // input and zero-lenth match cases 1919 String[][] input = new String[][] { 1920 { " ", "Abc Efg Hij" }, // normal non-zero-match 1921 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1922 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1923 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1924 { "(?=\\p{Lu})", "AbcEfg" }, 1925 { "(?=\\p{Lu})", "Abc" }, 1926 { " ", "" }, // zero-length input 1927 { ".*", "" }, 1928 1929 // some tests from PatternStreamTest.java 1930 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1931 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1932 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1933 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1934 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1935 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1936 { "\u56da", "" }, 1937 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1938 { "o", "boo:and:foo" }, 1939 { "o", "booooo:and:fooooo" }, 1940 { "o", "fooooo:" }, 1941 }; 1942 1943 String[][] expected = new String[][] { 1944 { "Abc", "Efg", "Hij" }, 1945 { "", "Abc", "Efg", "Hij" }, 1946 { "Abc", "", "Efg", "Hij" }, 1947 { "Abc", "Efg", "Hij" }, 1948 { "Abc", "Efg" }, 1949 { "Abc" }, 1950 { "" }, 1951 { "" }, 1952 1953 { "awgqwefg1fefw", "vssv1vvv1" }, 1954 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1955 { "awgqwefg", "fefw4vssv", "vvv" }, 1956 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1957 { "1", "23", "456", "7890" }, 1958 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1959 { "" }, 1960 { "This", "is", "testing", "", "with", "different", "separators" }, 1961 { "b", "", ":and:f" }, 1962 { "b", "", "", "", "", ":and:f" }, 1963 { "f", "", "", "", "", ":" }, 1964 }; 1965 for (int i = 0; i < input.length; i++) { 1966 pattern = Pattern.compile(input[i][0]); 1967 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1968 failCount++; 1969 } 1970 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1971 // array for zero-length input for now 1972 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1973 expected[i])) { 1974 failCount++; 1975 } 1976 } 1977 report("Split"); 1978 } 1979 1980 private static void negationTest() { 1981 Pattern pattern = Pattern.compile("[\\[@^]+"); 1982 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1983 if (!matcher.find()) 1984 failCount++; 1985 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1986 failCount++; 1987 pattern = Pattern.compile("[@\\[^]+"); 1988 matcher = pattern.matcher("@@@@[[[[^^^^"); 1989 if (!matcher.find()) 1990 failCount++; 1991 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1992 failCount++; 1993 pattern = Pattern.compile("[@\\[^@]+"); 1994 matcher = pattern.matcher("@@@@[[[[^^^^"); 1995 if (!matcher.find()) 1996 failCount++; 1997 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1998 failCount++; 1999 2000 pattern = Pattern.compile("\\)"); 2001 matcher = pattern.matcher("xxx)xxx"); 2002 if (!matcher.find()) 2003 failCount++; 2004 2005 report("Negation"); 2006 } 2007 2008 private static void ampersandTest() { 2009 Pattern pattern = Pattern.compile("[&@]+"); 2010 check(pattern, "@@@@&&&&", true); 2011 2012 pattern = Pattern.compile("[@&]+"); 2013 check(pattern, "@@@@&&&&", true); 2014 2015 pattern = Pattern.compile("[@\\&]+"); 2016 check(pattern, "@@@@&&&&", true); 2017 2018 report("Ampersand"); 2019 } 2020 2021 private static void octalTest() throws Exception { 2022 Pattern pattern = Pattern.compile("\\u0007"); 2023 Matcher matcher = pattern.matcher("\u0007"); 2024 if (!matcher.matches()) 2025 failCount++; 2026 pattern = Pattern.compile("\\07"); 2027 matcher = pattern.matcher("\u0007"); 2028 if (!matcher.matches()) 2029 failCount++; 2030 pattern = Pattern.compile("\\007"); 2031 matcher = pattern.matcher("\u0007"); 2032 if (!matcher.matches()) 2033 failCount++; 2034 pattern = Pattern.compile("\\0007"); 2035 matcher = pattern.matcher("\u0007"); 2036 if (!matcher.matches()) 2037 failCount++; 2038 pattern = Pattern.compile("\\040"); 2039 matcher = pattern.matcher("\u0020"); 2040 if (!matcher.matches()) 2041 failCount++; 2042 pattern = Pattern.compile("\\0403"); 2043 matcher = pattern.matcher("\u00203"); 2044 if (!matcher.matches()) 2045 failCount++; 2046 pattern = Pattern.compile("\\0103"); 2047 matcher = pattern.matcher("\u0043"); 2048 if (!matcher.matches()) 2049 failCount++; 2050 2051 report("Octal"); 2052 } 2053 2054 private static void longPatternTest() throws Exception { 2055 try { 2056 Pattern pattern = Pattern.compile( 2057 "a 32-character-long pattern xxxx"); 2058 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2059 pattern = Pattern.compile("a thirty four character long regex"); 2060 StringBuffer patternToBe = new StringBuffer(101); 2061 for (int i=0; i<100; i++) 2062 patternToBe.append((char)(97 + i%26)); 2063 pattern = Pattern.compile(patternToBe.toString()); 2064 } catch (PatternSyntaxException e) { 2065 failCount++; 2066 } 2067 2068 // Supplementary character test 2069 try { 2070 Pattern pattern = Pattern.compile( 2071 toSupplementaries("a 32-character-long pattern xxxx")); 2072 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2073 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2074 StringBuffer patternToBe = new StringBuffer(101*2); 2075 for (int i=0; i<100; i++) 2076 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2077 + 97 + i%26)); 2078 pattern = Pattern.compile(patternToBe.toString()); 2079 } catch (PatternSyntaxException e) { 2080 failCount++; 2081 } 2082 report("LongPattern"); 2083 } 2084 2085 private static void group0Test() throws Exception { 2086 Pattern pattern = Pattern.compile("(tes)ting"); 2087 Matcher matcher = pattern.matcher("testing"); 2088 check(matcher, "testing"); 2089 2090 matcher.reset("testing"); 2091 if (matcher.lookingAt()) { 2092 if (!matcher.group(0).equals("testing")) 2093 failCount++; 2094 } else { 2095 failCount++; 2096 } 2097 2098 matcher.reset("testing"); 2099 if (matcher.matches()) { 2100 if (!matcher.group(0).equals("testing")) 2101 failCount++; 2102 } else { 2103 failCount++; 2104 } 2105 2106 pattern = Pattern.compile("(tes)ting"); 2107 matcher = pattern.matcher("testing"); 2108 if (matcher.lookingAt()) { 2109 if (!matcher.group(0).equals("testing")) 2110 failCount++; 2111 } else { 2112 failCount++; 2113 } 2114 2115 pattern = Pattern.compile("^(tes)ting"); 2116 matcher = pattern.matcher("testing"); 2117 if (matcher.matches()) { 2118 if (!matcher.group(0).equals("testing")) 2119 failCount++; 2120 } else { 2121 failCount++; 2122 } 2123 2124 // Supplementary character test 2125 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2126 matcher = pattern.matcher(toSupplementaries("testing")); 2127 check(matcher, toSupplementaries("testing")); 2128 2129 matcher.reset(toSupplementaries("testing")); 2130 if (matcher.lookingAt()) { 2131 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2132 failCount++; 2133 } else { 2134 failCount++; 2135 } 2136 2137 matcher.reset(toSupplementaries("testing")); 2138 if (matcher.matches()) { 2139 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2140 failCount++; 2141 } else { 2142 failCount++; 2143 } 2144 2145 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2146 matcher = pattern.matcher(toSupplementaries("testing")); 2147 if (matcher.lookingAt()) { 2148 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2149 failCount++; 2150 } else { 2151 failCount++; 2152 } 2153 2154 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2155 matcher = pattern.matcher(toSupplementaries("testing")); 2156 if (matcher.matches()) { 2157 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2158 failCount++; 2159 } else { 2160 failCount++; 2161 } 2162 2163 report("Group0"); 2164 } 2165 2166 private static void findIntTest() throws Exception { 2167 Pattern p = Pattern.compile("blah"); 2168 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2169 boolean result = m.find(2); 2170 if (!result) 2171 failCount++; 2172 2173 p = Pattern.compile("$"); 2174 m = p.matcher("1234567890"); 2175 result = m.find(10); 2176 if (!result) 2177 failCount++; 2178 try { 2179 result = m.find(11); 2180 failCount++; 2181 } catch (IndexOutOfBoundsException e) { 2182 // correct result 2183 } 2184 2185 // Supplementary character test 2186 p = Pattern.compile(toSupplementaries("blah")); 2187 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2188 result = m.find(2); 2189 if (!result) 2190 failCount++; 2191 2192 report("FindInt"); 2193 } 2194 2195 private static void emptyPatternTest() throws Exception { 2196 Pattern p = Pattern.compile(""); 2197 Matcher m = p.matcher("foo"); 2198 2199 // Should find empty pattern at beginning of input 2200 boolean result = m.find(); 2201 if (result != true) 2202 failCount++; 2203 if (m.start() != 0) 2204 failCount++; 2205 2206 // Should not match entire input if input is not empty 2207 m.reset(); 2208 result = m.matches(); 2209 if (result == true) 2210 failCount++; 2211 2212 try { 2213 m.start(0); 2214 failCount++; 2215 } catch (IllegalStateException e) { 2216 // Correct result 2217 } 2218 2219 // Should match entire input if input is empty 2220 m.reset(""); 2221 result = m.matches(); 2222 if (result != true) 2223 failCount++; 2224 2225 result = Pattern.matches("", ""); 2226 if (result != true) 2227 failCount++; 2228 2229 result = Pattern.matches("", "foo"); 2230 if (result == true) 2231 failCount++; 2232 report("EmptyPattern"); 2233 } 2234 2235 private static void charClassTest() throws Exception { 2236 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2237 check(pattern, "blahb]blech", true); 2238 2239 pattern = Pattern.compile("[abc[def]]"); 2240 check(pattern, "b", true); 2241 2242 // Supplementary character tests 2243 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2244 check(pattern, toSupplementaries("blahb]blech"), true); 2245 2246 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2247 check(pattern, toSupplementaries("b"), true); 2248 2249 try { 2250 // u00ff when UNICODE_CASE 2251 pattern = Pattern.compile("[ab\u00ffcd]", 2252 Pattern.CASE_INSENSITIVE| 2253 Pattern.UNICODE_CASE); 2254 check(pattern, "ab\u00ffcd", true); 2255 check(pattern, "Ab\u0178Cd", true); 2256 2257 // u00b5 when UNICODE_CASE 2258 pattern = Pattern.compile("[ab\u00b5cd]", 2259 Pattern.CASE_INSENSITIVE| 2260 Pattern.UNICODE_CASE); 2261 check(pattern, "ab\u00b5cd", true); 2262 check(pattern, "Ab\u039cCd", true); 2263 } catch (Exception e) { failCount++; } 2264 2265 /* Special cases 2266 (1)LatinSmallLetterLongS u+017f 2267 (2)LatinSmallLetterDotlessI u+0131 2268 (3)LatineCapitalLetterIWithDotAbove u+0130 2269 (4)KelvinSign u+212a 2270 (5)AngstromSign u+212b 2271 */ 2272 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2273 pattern = Pattern.compile("[sik\u00c5]+", flags); 2274 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2275 failCount++; 2276 2277 report("CharClass"); 2278 } 2279 2280 private static void caretTest() throws Exception { 2281 Pattern pattern = Pattern.compile("\\w*"); 2282 Matcher matcher = pattern.matcher("a#bc#def##g"); 2283 check(matcher, "a"); 2284 check(matcher, ""); 2285 check(matcher, "bc"); 2286 check(matcher, ""); 2287 check(matcher, "def"); 2288 check(matcher, ""); 2289 check(matcher, ""); 2290 check(matcher, "g"); 2291 check(matcher, ""); 2292 if (matcher.find()) 2293 failCount++; 2294 2295 pattern = Pattern.compile("^\\w*"); 2296 matcher = pattern.matcher("a#bc#def##g"); 2297 check(matcher, "a"); 2298 if (matcher.find()) 2299 failCount++; 2300 2301 pattern = Pattern.compile("\\w"); 2302 matcher = pattern.matcher("abc##x"); 2303 check(matcher, "a"); 2304 check(matcher, "b"); 2305 check(matcher, "c"); 2306 check(matcher, "x"); 2307 if (matcher.find()) 2308 failCount++; 2309 2310 pattern = Pattern.compile("^\\w"); 2311 matcher = pattern.matcher("abc##x"); 2312 check(matcher, "a"); 2313 if (matcher.find()) 2314 failCount++; 2315 2316 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2317 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2318 check(matcher, "abc"); 2319 if (matcher.find()) 2320 failCount++; 2321 2322 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2323 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2324 check(matcher, "abc"); 2325 check(matcher, "jkl"); 2326 if (matcher.find()) 2327 failCount++; 2328 2329 pattern = Pattern.compile("^", Pattern.MULTILINE); 2330 matcher = pattern.matcher("this is some text"); 2331 String result = matcher.replaceAll("X"); 2332 if (!result.equals("Xthis is some text")) 2333 failCount++; 2334 2335 pattern = Pattern.compile("^"); 2336 matcher = pattern.matcher("this is some text"); 2337 result = matcher.replaceAll("X"); 2338 if (!result.equals("Xthis is some text")) 2339 failCount++; 2340 2341 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2342 matcher = pattern.matcher("this is some text\n"); 2343 result = matcher.replaceAll("X"); 2344 if (!result.equals("Xthis is some text\n")) 2345 failCount++; 2346 2347 report("Caret"); 2348 } 2349 2350 private static void groupCaptureTest() throws Exception { 2351 // Independent group 2352 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2353 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2354 matcher.find(); 2355 try { 2356 String blah = matcher.group(1); 2357 failCount++; 2358 } catch (IndexOutOfBoundsException ioobe) { 2359 // Good result 2360 } 2361 // Pure group 2362 pattern = Pattern.compile("x+(?:y+)z+"); 2363 matcher = pattern.matcher("xxxyyyzzz"); 2364 matcher.find(); 2365 try { 2366 String blah = matcher.group(1); 2367 failCount++; 2368 } catch (IndexOutOfBoundsException ioobe) { 2369 // Good result 2370 } 2371 2372 // Supplementary character tests 2373 // Independent group 2374 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2375 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2376 matcher.find(); 2377 try { 2378 String blah = matcher.group(1); 2379 failCount++; 2380 } catch (IndexOutOfBoundsException ioobe) { 2381 // Good result 2382 } 2383 // Pure group 2384 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2385 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2386 matcher.find(); 2387 try { 2388 String blah = matcher.group(1); 2389 failCount++; 2390 } catch (IndexOutOfBoundsException ioobe) { 2391 // Good result 2392 } 2393 2394 report("GroupCapture"); 2395 } 2396 2397 private static void backRefTest() throws Exception { 2398 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2399 check(pattern, "zzzaabcazzz", true); 2400 2401 pattern = Pattern.compile("(a*)bc\\1"); 2402 check(pattern, "zzzaabcaazzz", true); 2403 2404 pattern = Pattern.compile("(abc)(def)\\1"); 2405 check(pattern, "abcdefabc", true); 2406 2407 pattern = Pattern.compile("(abc)(def)\\3"); 2408 check(pattern, "abcdefabc", false); 2409 2410 try { 2411 for (int i = 1; i < 10; i++) { 2412 // Make sure backref 1-9 are always accepted 2413 pattern = Pattern.compile("abcdef\\" + i); 2414 // and fail to match if the target group does not exit 2415 check(pattern, "abcdef", false); 2416 } 2417 } catch(PatternSyntaxException e) { 2418 failCount++; 2419 } 2420 2421 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2422 check(pattern, "abcdefghija", false); 2423 check(pattern, "abcdefghija1", true); 2424 2425 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2426 check(pattern, "abcdefghijkk", true); 2427 2428 pattern = Pattern.compile("(a)bcdefghij\\11"); 2429 check(pattern, "abcdefghija1", true); 2430 2431 // Supplementary character tests 2432 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2433 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2434 2435 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2436 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2437 2438 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2439 check(pattern, toSupplementaries("abcdefabc"), true); 2440 2441 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2442 check(pattern, toSupplementaries("abcdefabc"), false); 2443 2444 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2445 check(pattern, toSupplementaries("abcdefghija"), false); 2446 check(pattern, toSupplementaries("abcdefghija1"), true); 2447 2448 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2449 check(pattern, toSupplementaries("abcdefghijkk"), true); 2450 2451 report("BackRef"); 2452 } 2453 2454 /** 2455 * Unicode Technical Report #18, section 2.6 End of Line 2456 * There is no empty line to be matched in the sequence \u000D\u000A 2457 * but there is an empty line in the sequence \u000A\u000D. 2458 */ 2459 private static void anchorTest() throws Exception { 2460 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2461 Matcher m = p.matcher("blah1\r\nblah2"); 2462 m.find(); 2463 m.find(); 2464 if (!m.group().equals("blah2")) 2465 failCount++; 2466 2467 m.reset("blah1\n\rblah2"); 2468 m.find(); 2469 m.find(); 2470 m.find(); 2471 if (!m.group().equals("blah2")) 2472 failCount++; 2473 2474 // Test behavior of $ with \r\n at end of input 2475 p = Pattern.compile(".+$"); 2476 m = p.matcher("blah1\r\n"); 2477 if (!m.find()) 2478 failCount++; 2479 if (!m.group().equals("blah1")) 2480 failCount++; 2481 if (m.find()) 2482 failCount++; 2483 2484 // Test behavior of $ with \r\n at end of input in multiline 2485 p = Pattern.compile(".+$", Pattern.MULTILINE); 2486 m = p.matcher("blah1\r\n"); 2487 if (!m.find()) 2488 failCount++; 2489 if (m.find()) 2490 failCount++; 2491 2492 // Test for $ recognition of \u0085 for bug 4527731 2493 p = Pattern.compile(".+$", Pattern.MULTILINE); 2494 m = p.matcher("blah1\u0085"); 2495 if (!m.find()) 2496 failCount++; 2497 2498 // Supplementary character test 2499 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2500 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2501 m.find(); 2502 m.find(); 2503 if (!m.group().equals(toSupplementaries("blah2"))) 2504 failCount++; 2505 2506 m.reset(toSupplementaries("blah1\n\rblah2")); 2507 m.find(); 2508 m.find(); 2509 m.find(); 2510 if (!m.group().equals(toSupplementaries("blah2"))) 2511 failCount++; 2512 2513 // Test behavior of $ with \r\n at end of input 2514 p = Pattern.compile(".+$"); 2515 m = p.matcher(toSupplementaries("blah1\r\n")); 2516 if (!m.find()) 2517 failCount++; 2518 if (!m.group().equals(toSupplementaries("blah1"))) 2519 failCount++; 2520 if (m.find()) 2521 failCount++; 2522 2523 // Test behavior of $ with \r\n at end of input in multiline 2524 p = Pattern.compile(".+$", Pattern.MULTILINE); 2525 m = p.matcher(toSupplementaries("blah1\r\n")); 2526 if (!m.find()) 2527 failCount++; 2528 if (m.find()) 2529 failCount++; 2530 2531 // Test for $ recognition of \u0085 for bug 4527731 2532 p = Pattern.compile(".+$", Pattern.MULTILINE); 2533 m = p.matcher(toSupplementaries("blah1\u0085")); 2534 if (!m.find()) 2535 failCount++; 2536 2537 report("Anchors"); 2538 } 2539 2540 /** 2541 * A basic sanity test of Matcher.lookingAt(). 2542 */ 2543 private static void lookingAtTest() throws Exception { 2544 Pattern p = Pattern.compile("(ab)(c*)"); 2545 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2546 2547 if (!m.lookingAt()) 2548 failCount++; 2549 2550 if (!m.group().equals(m.group(0))) 2551 failCount++; 2552 2553 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2554 if (m.lookingAt()) 2555 failCount++; 2556 2557 // Supplementary character test 2558 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2559 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2560 2561 if (!m.lookingAt()) 2562 failCount++; 2563 2564 if (!m.group().equals(m.group(0))) 2565 failCount++; 2566 2567 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2568 if (m.lookingAt()) 2569 failCount++; 2570 2571 report("Looking At"); 2572 } 2573 2574 /** 2575 * A basic sanity test of Matcher.matches(). 2576 */ 2577 private static void matchesTest() throws Exception { 2578 // matches() 2579 Pattern p = Pattern.compile("ulb(c*)"); 2580 Matcher m = p.matcher("ulbcccccc"); 2581 if (!m.matches()) 2582 failCount++; 2583 2584 // find() but not matches() 2585 m.reset("zzzulbcccccc"); 2586 if (m.matches()) 2587 failCount++; 2588 2589 // lookingAt() but not matches() 2590 m.reset("ulbccccccdef"); 2591 if (m.matches()) 2592 failCount++; 2593 2594 // matches() 2595 p = Pattern.compile("a|ad"); 2596 m = p.matcher("ad"); 2597 if (!m.matches()) 2598 failCount++; 2599 2600 // Supplementary character test 2601 // matches() 2602 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2603 m = p.matcher(toSupplementaries("ulbcccccc")); 2604 if (!m.matches()) 2605 failCount++; 2606 2607 // find() but not matches() 2608 m.reset(toSupplementaries("zzzulbcccccc")); 2609 if (m.matches()) 2610 failCount++; 2611 2612 // lookingAt() but not matches() 2613 m.reset(toSupplementaries("ulbccccccdef")); 2614 if (m.matches()) 2615 failCount++; 2616 2617 // matches() 2618 p = Pattern.compile(toSupplementaries("a|ad")); 2619 m = p.matcher(toSupplementaries("ad")); 2620 if (!m.matches()) 2621 failCount++; 2622 2623 report("Matches"); 2624 } 2625 2626 /** 2627 * A basic sanity test of Pattern.matches(). 2628 */ 2629 private static void patternMatchesTest() throws Exception { 2630 // matches() 2631 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2632 toSupplementaries("ulbcccccc"))) 2633 failCount++; 2634 2635 // find() but not matches() 2636 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2637 toSupplementaries("zzzulbcccccc"))) 2638 failCount++; 2639 2640 // lookingAt() but not matches() 2641 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2642 toSupplementaries("ulbccccccdef"))) 2643 failCount++; 2644 2645 // Supplementary character test 2646 // matches() 2647 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2648 toSupplementaries("ulbcccccc"))) 2649 failCount++; 2650 2651 // find() but not matches() 2652 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2653 toSupplementaries("zzzulbcccccc"))) 2654 failCount++; 2655 2656 // lookingAt() but not matches() 2657 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2658 toSupplementaries("ulbccccccdef"))) 2659 failCount++; 2660 2661 report("Pattern Matches"); 2662 } 2663 2664 /** 2665 * Canonical equivalence testing. Tests the ability of the engine 2666 * to match sequences that are not explicitly specified in the 2667 * pattern when they are considered equivalent by the Unicode Standard. 2668 */ 2669 private static void ceTest() throws Exception { 2670 // Decomposed char outside char classes 2671 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2672 Matcher m = p.matcher("test\u00e5"); 2673 if (!m.matches()) 2674 failCount++; 2675 2676 m.reset("testa\u030a"); 2677 if (!m.matches()) 2678 failCount++; 2679 2680 // Composed char outside char classes 2681 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2682 m = p.matcher("test\u00e5"); 2683 if (!m.matches()) 2684 failCount++; 2685 2686 m.reset("testa\u030a"); 2687 if (!m.find()) 2688 failCount++; 2689 2690 // Decomposed char inside a char class 2691 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2692 m = p.matcher("test\u00e5"); 2693 if (!m.find()) 2694 failCount++; 2695 2696 m.reset("testa\u030a"); 2697 if (!m.find()) 2698 failCount++; 2699 2700 // Composed char inside a char class 2701 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2702 m = p.matcher("test\u00e5"); 2703 if (!m.find()) 2704 failCount++; 2705 2706 m.reset("testa\u0300"); 2707 if (!m.find()) 2708 failCount++; 2709 2710 m.reset("testa\u030a"); 2711 if (!m.find()) 2712 failCount++; 2713 2714 // Marks that cannot legally change order and be equivalent 2715 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2716 check(p, "testa\u0308\u0300", true); 2717 check(p, "testa\u0300\u0308", false); 2718 2719 // Marks that can legally change order and be equivalent 2720 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2721 check(p, "testa\u0308\u0323", true); 2722 check(p, "testa\u0323\u0308", true); 2723 2724 // Test all equivalences of the sequence a\u0308\u0323\u0300 2725 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2726 check(p, "testa\u0308\u0323\u0300", true); 2727 check(p, "testa\u0323\u0308\u0300", true); 2728 check(p, "testa\u0308\u0300\u0323", true); 2729 check(p, "test\u00e4\u0323\u0300", true); 2730 check(p, "test\u00e4\u0300\u0323", true); 2731 2732 Object[][] data = new Object[][] { 2733 2734 // JDK-4867170 2735 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2736 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2737 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2738 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2739 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2740 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2741 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2742 2743 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2744 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2745 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2746 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2747 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2748 2749 // backtracking, force to match "\u1f80", instead of \u1f82" 2750 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2751 2752 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2753 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2754 2755 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2756 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2757 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2758 2759 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2760 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2761 { "\u1f80", "ab\u1f80cd", "f", true }, 2762 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2763 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2764 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2765 { "\u1f82", "\u1f80\u0300", "m", true }, 2766 2767 // JDK-7080302 # compile failed 2768 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2769 2770 // JDK-6728861, same cause as above one 2771 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2772 2773 // JDK-6995635 2774 { "(\u00e9)", "e\u0301", "m", true }, 2775 2776 // JDK-6736245 2777 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2778 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2779 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2780 2781 // 4916384. 2782 // Decomposed hangul (jamos) works inside clazz 2783 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2784 { "[\u1100\u1161]", "\uac00", "m", true}, 2785 2786 { "[\uac00]", "\u1100\u1161", "m", true}, 2787 { "[\uac00]", "\uac00", "m", true}, 2788 2789 // Decomposed hangul (jamos) 2790 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2791 { "\u1100\u1161", "\uac00", "m", true}, 2792 2793 // Composed hangul 2794 { "\uac00", "\u1100\u1161", "m", true }, 2795 { "\uac00", "\uac00", "m", true }, 2796 2797 /* Need a NFDSlice to nfd the source to solve this issue 2798 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2799 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2800 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2801 2802 // Decomposed supplementary outside char classes 2803 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2804 // Composed supplementary outside char classes 2805 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2806 */ 2807 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2808 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2809 2810 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2811 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2812 }; 2813 2814 int failCount = 0; 2815 for (Object[] d : data) { 2816 String pn = (String)d[0]; 2817 String tt = (String)d[1]; 2818 boolean isFind = "f".equals(((String)d[2])); 2819 boolean expected = (boolean)d[3]; 2820 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2821 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2822 if (ret != expected) { 2823 failCount++; 2824 continue; 2825 } 2826 } 2827 report("Canonical Equivalence"); 2828 } 2829 2830 /** 2831 * A basic sanity test of Matcher.replaceAll(). 2832 */ 2833 private static void globalSubstitute() throws Exception { 2834 // Global substitution with a literal 2835 Pattern p = Pattern.compile("(ab)(c*)"); 2836 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2837 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2838 failCount++; 2839 2840 m.reset("zzzabccczzzabcczzzabccczzz"); 2841 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2842 failCount++; 2843 2844 // Global substitution with groups 2845 m.reset("zzzabccczzzabcczzzabccczzz"); 2846 String result = m.replaceAll("$1"); 2847 if (!result.equals("zzzabzzzabzzzabzzz")) 2848 failCount++; 2849 2850 // Supplementary character test 2851 // Global substitution with a literal 2852 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2853 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2854 if (!m.replaceAll(toSupplementaries("test")). 2855 equals(toSupplementaries("testzzztestzzztest"))) 2856 failCount++; 2857 2858 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2859 if (!m.replaceAll(toSupplementaries("test")). 2860 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2861 failCount++; 2862 2863 // Global substitution with groups 2864 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2865 result = m.replaceAll("$1"); 2866 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2867 failCount++; 2868 2869 report("Global Substitution"); 2870 } 2871 2872 /** 2873 * Tests the usage of Matcher.appendReplacement() with literal 2874 * and group substitutions. 2875 */ 2876 private static void stringbufferSubstitute() throws Exception { 2877 // SB substitution with literal 2878 String blah = "zzzblahzzz"; 2879 Pattern p = Pattern.compile("blah"); 2880 Matcher m = p.matcher(blah); 2881 StringBuffer result = new StringBuffer(); 2882 try { 2883 m.appendReplacement(result, "blech"); 2884 failCount++; 2885 } catch (IllegalStateException e) { 2886 } 2887 m.find(); 2888 m.appendReplacement(result, "blech"); 2889 if (!result.toString().equals("zzzblech")) 2890 failCount++; 2891 2892 m.appendTail(result); 2893 if (!result.toString().equals("zzzblechzzz")) 2894 failCount++; 2895 2896 // SB substitution with groups 2897 blah = "zzzabcdzzz"; 2898 p = Pattern.compile("(ab)(cd)*"); 2899 m = p.matcher(blah); 2900 result = new StringBuffer(); 2901 try { 2902 m.appendReplacement(result, "$1"); 2903 failCount++; 2904 } catch (IllegalStateException e) { 2905 } 2906 m.find(); 2907 m.appendReplacement(result, "$1"); 2908 if (!result.toString().equals("zzzab")) 2909 failCount++; 2910 2911 m.appendTail(result); 2912 if (!result.toString().equals("zzzabzzz")) 2913 failCount++; 2914 2915 // SB substitution with 3 groups 2916 blah = "zzzabcdcdefzzz"; 2917 p = Pattern.compile("(ab)(cd)*(ef)"); 2918 m = p.matcher(blah); 2919 result = new StringBuffer(); 2920 try { 2921 m.appendReplacement(result, "$1w$2w$3"); 2922 failCount++; 2923 } catch (IllegalStateException e) { 2924 } 2925 m.find(); 2926 m.appendReplacement(result, "$1w$2w$3"); 2927 if (!result.toString().equals("zzzabwcdwef")) 2928 failCount++; 2929 2930 m.appendTail(result); 2931 if (!result.toString().equals("zzzabwcdwefzzz")) 2932 failCount++; 2933 2934 // SB substitution with groups and three matches 2935 // skipping middle match 2936 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2937 p = Pattern.compile("(ab)(cd*)"); 2938 m = p.matcher(blah); 2939 result = new StringBuffer(); 2940 try { 2941 m.appendReplacement(result, "$1"); 2942 failCount++; 2943 } catch (IllegalStateException e) { 2944 } 2945 m.find(); 2946 m.appendReplacement(result, "$1"); 2947 if (!result.toString().equals("zzzab")) 2948 failCount++; 2949 2950 m.find(); 2951 m.find(); 2952 m.appendReplacement(result, "$2"); 2953 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2954 failCount++; 2955 2956 m.appendTail(result); 2957 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2958 failCount++; 2959 2960 // Check to make sure escaped $ is ignored 2961 blah = "zzzabcdcdefzzz"; 2962 p = Pattern.compile("(ab)(cd)*(ef)"); 2963 m = p.matcher(blah); 2964 result = new StringBuffer(); 2965 m.find(); 2966 m.appendReplacement(result, "$1w\\$2w$3"); 2967 if (!result.toString().equals("zzzabw$2wef")) 2968 failCount++; 2969 2970 m.appendTail(result); 2971 if (!result.toString().equals("zzzabw$2wefzzz")) 2972 failCount++; 2973 2974 // Check to make sure a reference to nonexistent group causes error 2975 blah = "zzzabcdcdefzzz"; 2976 p = Pattern.compile("(ab)(cd)*(ef)"); 2977 m = p.matcher(blah); 2978 result = new StringBuffer(); 2979 m.find(); 2980 try { 2981 m.appendReplacement(result, "$1w$5w$3"); 2982 failCount++; 2983 } catch (IndexOutOfBoundsException ioobe) { 2984 // Correct result 2985 } 2986 2987 // Check double digit group references 2988 blah = "zzz123456789101112zzz"; 2989 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2990 m = p.matcher(blah); 2991 result = new StringBuffer(); 2992 m.find(); 2993 m.appendReplacement(result, "$1w$11w$3"); 2994 if (!result.toString().equals("zzz1w11w3")) 2995 failCount++; 2996 2997 // Check to make sure it backs off $15 to $1 if only three groups 2998 blah = "zzzabcdcdefzzz"; 2999 p = Pattern.compile("(ab)(cd)*(ef)"); 3000 m = p.matcher(blah); 3001 result = new StringBuffer(); 3002 m.find(); 3003 m.appendReplacement(result, "$1w$15w$3"); 3004 if (!result.toString().equals("zzzabwab5wef")) 3005 failCount++; 3006 3007 3008 // Supplementary character test 3009 // SB substitution with literal 3010 blah = toSupplementaries("zzzblahzzz"); 3011 p = Pattern.compile(toSupplementaries("blah")); 3012 m = p.matcher(blah); 3013 result = new StringBuffer(); 3014 try { 3015 m.appendReplacement(result, toSupplementaries("blech")); 3016 failCount++; 3017 } catch (IllegalStateException e) { 3018 } 3019 m.find(); 3020 m.appendReplacement(result, toSupplementaries("blech")); 3021 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3022 failCount++; 3023 3024 m.appendTail(result); 3025 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3026 failCount++; 3027 3028 // SB substitution with groups 3029 blah = toSupplementaries("zzzabcdzzz"); 3030 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3031 m = p.matcher(blah); 3032 result = new StringBuffer(); 3033 try { 3034 m.appendReplacement(result, "$1"); 3035 failCount++; 3036 } catch (IllegalStateException e) { 3037 } 3038 m.find(); 3039 m.appendReplacement(result, "$1"); 3040 if (!result.toString().equals(toSupplementaries("zzzab"))) 3041 failCount++; 3042 3043 m.appendTail(result); 3044 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3045 failCount++; 3046 3047 // SB substitution with 3 groups 3048 blah = toSupplementaries("zzzabcdcdefzzz"); 3049 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3050 m = p.matcher(blah); 3051 result = new StringBuffer(); 3052 try { 3053 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3054 failCount++; 3055 } catch (IllegalStateException e) { 3056 } 3057 m.find(); 3058 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3059 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3060 failCount++; 3061 3062 m.appendTail(result); 3063 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3064 failCount++; 3065 3066 // SB substitution with groups and three matches 3067 // skipping middle match 3068 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3069 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3070 m = p.matcher(blah); 3071 result = new StringBuffer(); 3072 try { 3073 m.appendReplacement(result, "$1"); 3074 failCount++; 3075 } catch (IllegalStateException e) { 3076 } 3077 m.find(); 3078 m.appendReplacement(result, "$1"); 3079 if (!result.toString().equals(toSupplementaries("zzzab"))) 3080 failCount++; 3081 3082 m.find(); 3083 m.find(); 3084 m.appendReplacement(result, "$2"); 3085 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3086 failCount++; 3087 3088 m.appendTail(result); 3089 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3090 failCount++; 3091 3092 // Check to make sure escaped $ is ignored 3093 blah = toSupplementaries("zzzabcdcdefzzz"); 3094 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3095 m = p.matcher(blah); 3096 result = new StringBuffer(); 3097 m.find(); 3098 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3099 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3100 failCount++; 3101 3102 m.appendTail(result); 3103 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3104 failCount++; 3105 3106 // Check to make sure a reference to nonexistent group causes error 3107 blah = toSupplementaries("zzzabcdcdefzzz"); 3108 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3109 m = p.matcher(blah); 3110 result = new StringBuffer(); 3111 m.find(); 3112 try { 3113 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3114 failCount++; 3115 } catch (IndexOutOfBoundsException ioobe) { 3116 // Correct result 3117 } 3118 3119 // Check double digit group references 3120 blah = toSupplementaries("zzz123456789101112zzz"); 3121 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3122 m = p.matcher(blah); 3123 result = new StringBuffer(); 3124 m.find(); 3125 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3126 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3127 failCount++; 3128 3129 // Check to make sure it backs off $15 to $1 if only three groups 3130 blah = toSupplementaries("zzzabcdcdefzzz"); 3131 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3132 m = p.matcher(blah); 3133 result = new StringBuffer(); 3134 m.find(); 3135 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3136 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3137 failCount++; 3138 3139 // Check nothing has been appended into the output buffer if 3140 // the replacement string triggers IllegalArgumentException. 3141 p = Pattern.compile("(abc)"); 3142 m = p.matcher("abcd"); 3143 result = new StringBuffer(); 3144 m.find(); 3145 try { 3146 m.appendReplacement(result, ("xyz$g")); 3147 failCount++; 3148 } catch (IllegalArgumentException iae) { 3149 if (result.length() != 0) 3150 failCount++; 3151 } 3152 3153 report("SB Substitution"); 3154 } 3155 3156 /** 3157 * Tests the usage of Matcher.appendReplacement() with literal 3158 * and group substitutions. 3159 */ 3160 private static void stringbuilderSubstitute() throws Exception { 3161 // SB substitution with literal 3162 String blah = "zzzblahzzz"; 3163 Pattern p = Pattern.compile("blah"); 3164 Matcher m = p.matcher(blah); 3165 StringBuilder result = new StringBuilder(); 3166 try { 3167 m.appendReplacement(result, "blech"); 3168 failCount++; 3169 } catch (IllegalStateException e) { 3170 } 3171 m.find(); 3172 m.appendReplacement(result, "blech"); 3173 if (!result.toString().equals("zzzblech")) 3174 failCount++; 3175 3176 m.appendTail(result); 3177 if (!result.toString().equals("zzzblechzzz")) 3178 failCount++; 3179 3180 // SB substitution with groups 3181 blah = "zzzabcdzzz"; 3182 p = Pattern.compile("(ab)(cd)*"); 3183 m = p.matcher(blah); 3184 result = new StringBuilder(); 3185 try { 3186 m.appendReplacement(result, "$1"); 3187 failCount++; 3188 } catch (IllegalStateException e) { 3189 } 3190 m.find(); 3191 m.appendReplacement(result, "$1"); 3192 if (!result.toString().equals("zzzab")) 3193 failCount++; 3194 3195 m.appendTail(result); 3196 if (!result.toString().equals("zzzabzzz")) 3197 failCount++; 3198 3199 // SB substitution with 3 groups 3200 blah = "zzzabcdcdefzzz"; 3201 p = Pattern.compile("(ab)(cd)*(ef)"); 3202 m = p.matcher(blah); 3203 result = new StringBuilder(); 3204 try { 3205 m.appendReplacement(result, "$1w$2w$3"); 3206 failCount++; 3207 } catch (IllegalStateException e) { 3208 } 3209 m.find(); 3210 m.appendReplacement(result, "$1w$2w$3"); 3211 if (!result.toString().equals("zzzabwcdwef")) 3212 failCount++; 3213 3214 m.appendTail(result); 3215 if (!result.toString().equals("zzzabwcdwefzzz")) 3216 failCount++; 3217 3218 // SB substitution with groups and three matches 3219 // skipping middle match 3220 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3221 p = Pattern.compile("(ab)(cd*)"); 3222 m = p.matcher(blah); 3223 result = new StringBuilder(); 3224 try { 3225 m.appendReplacement(result, "$1"); 3226 failCount++; 3227 } catch (IllegalStateException e) { 3228 } 3229 m.find(); 3230 m.appendReplacement(result, "$1"); 3231 if (!result.toString().equals("zzzab")) 3232 failCount++; 3233 3234 m.find(); 3235 m.find(); 3236 m.appendReplacement(result, "$2"); 3237 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3238 failCount++; 3239 3240 m.appendTail(result); 3241 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3242 failCount++; 3243 3244 // Check to make sure escaped $ is ignored 3245 blah = "zzzabcdcdefzzz"; 3246 p = Pattern.compile("(ab)(cd)*(ef)"); 3247 m = p.matcher(blah); 3248 result = new StringBuilder(); 3249 m.find(); 3250 m.appendReplacement(result, "$1w\\$2w$3"); 3251 if (!result.toString().equals("zzzabw$2wef")) 3252 failCount++; 3253 3254 m.appendTail(result); 3255 if (!result.toString().equals("zzzabw$2wefzzz")) 3256 failCount++; 3257 3258 // Check to make sure a reference to nonexistent group causes error 3259 blah = "zzzabcdcdefzzz"; 3260 p = Pattern.compile("(ab)(cd)*(ef)"); 3261 m = p.matcher(blah); 3262 result = new StringBuilder(); 3263 m.find(); 3264 try { 3265 m.appendReplacement(result, "$1w$5w$3"); 3266 failCount++; 3267 } catch (IndexOutOfBoundsException ioobe) { 3268 // Correct result 3269 } 3270 3271 // Check double digit group references 3272 blah = "zzz123456789101112zzz"; 3273 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3274 m = p.matcher(blah); 3275 result = new StringBuilder(); 3276 m.find(); 3277 m.appendReplacement(result, "$1w$11w$3"); 3278 if (!result.toString().equals("zzz1w11w3")) 3279 failCount++; 3280 3281 // Check to make sure it backs off $15 to $1 if only three groups 3282 blah = "zzzabcdcdefzzz"; 3283 p = Pattern.compile("(ab)(cd)*(ef)"); 3284 m = p.matcher(blah); 3285 result = new StringBuilder(); 3286 m.find(); 3287 m.appendReplacement(result, "$1w$15w$3"); 3288 if (!result.toString().equals("zzzabwab5wef")) 3289 failCount++; 3290 3291 3292 // Supplementary character test 3293 // SB substitution with literal 3294 blah = toSupplementaries("zzzblahzzz"); 3295 p = Pattern.compile(toSupplementaries("blah")); 3296 m = p.matcher(blah); 3297 result = new StringBuilder(); 3298 try { 3299 m.appendReplacement(result, toSupplementaries("blech")); 3300 failCount++; 3301 } catch (IllegalStateException e) { 3302 } 3303 m.find(); 3304 m.appendReplacement(result, toSupplementaries("blech")); 3305 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3306 failCount++; 3307 m.appendTail(result); 3308 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3309 failCount++; 3310 3311 // SB substitution with groups 3312 blah = toSupplementaries("zzzabcdzzz"); 3313 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3314 m = p.matcher(blah); 3315 result = new StringBuilder(); 3316 try { 3317 m.appendReplacement(result, "$1"); 3318 failCount++; 3319 } catch (IllegalStateException e) { 3320 } 3321 m.find(); 3322 m.appendReplacement(result, "$1"); 3323 if (!result.toString().equals(toSupplementaries("zzzab"))) 3324 failCount++; 3325 3326 m.appendTail(result); 3327 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3328 failCount++; 3329 3330 // SB substitution with 3 groups 3331 blah = toSupplementaries("zzzabcdcdefzzz"); 3332 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3333 m = p.matcher(blah); 3334 result = new StringBuilder(); 3335 try { 3336 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3337 failCount++; 3338 } catch (IllegalStateException e) { 3339 } 3340 m.find(); 3341 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3342 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3343 failCount++; 3344 3345 m.appendTail(result); 3346 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3347 failCount++; 3348 3349 // SB substitution with groups and three matches 3350 // skipping middle match 3351 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3352 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3353 m = p.matcher(blah); 3354 result = new StringBuilder(); 3355 try { 3356 m.appendReplacement(result, "$1"); 3357 failCount++; 3358 } catch (IllegalStateException e) { 3359 } 3360 m.find(); 3361 m.appendReplacement(result, "$1"); 3362 if (!result.toString().equals(toSupplementaries("zzzab"))) 3363 failCount++; 3364 3365 m.find(); 3366 m.find(); 3367 m.appendReplacement(result, "$2"); 3368 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3369 failCount++; 3370 3371 m.appendTail(result); 3372 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3373 failCount++; 3374 3375 // Check to make sure escaped $ is ignored 3376 blah = toSupplementaries("zzzabcdcdefzzz"); 3377 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3378 m = p.matcher(blah); 3379 result = new StringBuilder(); 3380 m.find(); 3381 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3382 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3383 failCount++; 3384 3385 m.appendTail(result); 3386 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3387 failCount++; 3388 3389 // Check to make sure a reference to nonexistent group causes error 3390 blah = toSupplementaries("zzzabcdcdefzzz"); 3391 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3392 m = p.matcher(blah); 3393 result = new StringBuilder(); 3394 m.find(); 3395 try { 3396 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3397 failCount++; 3398 } catch (IndexOutOfBoundsException ioobe) { 3399 // Correct result 3400 } 3401 // Check double digit group references 3402 blah = toSupplementaries("zzz123456789101112zzz"); 3403 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3404 m = p.matcher(blah); 3405 result = new StringBuilder(); 3406 m.find(); 3407 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3408 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3409 failCount++; 3410 3411 // Check to make sure it backs off $15 to $1 if only three groups 3412 blah = toSupplementaries("zzzabcdcdefzzz"); 3413 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3414 m = p.matcher(blah); 3415 result = new StringBuilder(); 3416 m.find(); 3417 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3418 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3419 failCount++; 3420 // Check nothing has been appended into the output buffer if 3421 // the replacement string triggers IllegalArgumentException. 3422 p = Pattern.compile("(abc)"); 3423 m = p.matcher("abcd"); 3424 result = new StringBuilder(); 3425 m.find(); 3426 try { 3427 m.appendReplacement(result, ("xyz$g")); 3428 failCount++; 3429 } catch (IllegalArgumentException iae) { 3430 if (result.length() != 0) 3431 failCount++; 3432 } 3433 report("SB Substitution 2"); 3434 } 3435 3436 /* 3437 * 5 groups of characters are created to make a substitution string. 3438 * A base string will be created including random lead chars, the 3439 * substitution string, and random trailing chars. 3440 * A pattern containing the 5 groups is searched for and replaced with: 3441 * random group + random string + random group. 3442 * The results are checked for correctness. 3443 */ 3444 private static void substitutionBasher() { 3445 for (int runs = 0; runs<1000; runs++) { 3446 // Create a base string to work in 3447 int leadingChars = generator.nextInt(10); 3448 StringBuffer baseBuffer = new StringBuffer(100); 3449 String leadingString = getRandomAlphaString(leadingChars); 3450 baseBuffer.append(leadingString); 3451 3452 // Create 5 groups of random number of random chars 3453 // Create the string to substitute 3454 // Create the pattern string to search for 3455 StringBuffer bufferToSub = new StringBuffer(25); 3456 StringBuffer bufferToPat = new StringBuffer(50); 3457 String[] groups = new String[5]; 3458 for(int i=0; i<5; i++) { 3459 int aGroupSize = generator.nextInt(5)+1; 3460 groups[i] = getRandomAlphaString(aGroupSize); 3461 bufferToSub.append(groups[i]); 3462 bufferToPat.append('('); 3463 bufferToPat.append(groups[i]); 3464 bufferToPat.append(')'); 3465 } 3466 String stringToSub = bufferToSub.toString(); 3467 String pattern = bufferToPat.toString(); 3468 3469 // Place sub string into working string at random index 3470 baseBuffer.append(stringToSub); 3471 3472 // Append random chars to end 3473 int trailingChars = generator.nextInt(10); 3474 String trailingString = getRandomAlphaString(trailingChars); 3475 baseBuffer.append(trailingString); 3476 String baseString = baseBuffer.toString(); 3477 3478 // Create test pattern and matcher 3479 Pattern p = Pattern.compile(pattern); 3480 Matcher m = p.matcher(baseString); 3481 3482 // Reject candidate if pattern happens to start early 3483 m.find(); 3484 if (m.start() < leadingChars) 3485 continue; 3486 3487 // Reject candidate if more than one match 3488 if (m.find()) 3489 continue; 3490 3491 // Construct a replacement string with : 3492 // random group + random string + random group 3493 StringBuffer bufferToRep = new StringBuffer(); 3494 int groupIndex1 = generator.nextInt(5); 3495 bufferToRep.append("$" + (groupIndex1 + 1)); 3496 String randomMidString = getRandomAlphaString(5); 3497 bufferToRep.append(randomMidString); 3498 int groupIndex2 = generator.nextInt(5); 3499 bufferToRep.append("$" + (groupIndex2 + 1)); 3500 String replacement = bufferToRep.toString(); 3501 3502 // Do the replacement 3503 String result = m.replaceAll(replacement); 3504 3505 // Construct expected result 3506 StringBuffer bufferToRes = new StringBuffer(); 3507 bufferToRes.append(leadingString); 3508 bufferToRes.append(groups[groupIndex1]); 3509 bufferToRes.append(randomMidString); 3510 bufferToRes.append(groups[groupIndex2]); 3511 bufferToRes.append(trailingString); 3512 String expectedResult = bufferToRes.toString(); 3513 3514 // Check results 3515 if (!result.equals(expectedResult)) 3516 failCount++; 3517 } 3518 3519 report("Substitution Basher"); 3520 } 3521 3522 /* 3523 * 5 groups of characters are created to make a substitution string. 3524 * A base string will be created including random lead chars, the 3525 * substitution string, and random trailing chars. 3526 * A pattern containing the 5 groups is searched for and replaced with: 3527 * random group + random string + random group. 3528 * The results are checked for correctness. 3529 */ 3530 private static void substitutionBasher2() { 3531 for (int runs = 0; runs<1000; runs++) { 3532 // Create a base string to work in 3533 int leadingChars = generator.nextInt(10); 3534 StringBuilder baseBuffer = new StringBuilder(100); 3535 String leadingString = getRandomAlphaString(leadingChars); 3536 baseBuffer.append(leadingString); 3537 3538 // Create 5 groups of random number of random chars 3539 // Create the string to substitute 3540 // Create the pattern string to search for 3541 StringBuilder bufferToSub = new StringBuilder(25); 3542 StringBuilder bufferToPat = new StringBuilder(50); 3543 String[] groups = new String[5]; 3544 for(int i=0; i<5; i++) { 3545 int aGroupSize = generator.nextInt(5)+1; 3546 groups[i] = getRandomAlphaString(aGroupSize); 3547 bufferToSub.append(groups[i]); 3548 bufferToPat.append('('); 3549 bufferToPat.append(groups[i]); 3550 bufferToPat.append(')'); 3551 } 3552 String stringToSub = bufferToSub.toString(); 3553 String pattern = bufferToPat.toString(); 3554 3555 // Place sub string into working string at random index 3556 baseBuffer.append(stringToSub); 3557 3558 // Append random chars to end 3559 int trailingChars = generator.nextInt(10); 3560 String trailingString = getRandomAlphaString(trailingChars); 3561 baseBuffer.append(trailingString); 3562 String baseString = baseBuffer.toString(); 3563 3564 // Create test pattern and matcher 3565 Pattern p = Pattern.compile(pattern); 3566 Matcher m = p.matcher(baseString); 3567 3568 // Reject candidate if pattern happens to start early 3569 m.find(); 3570 if (m.start() < leadingChars) 3571 continue; 3572 3573 // Reject candidate if more than one match 3574 if (m.find()) 3575 continue; 3576 3577 // Construct a replacement string with : 3578 // random group + random string + random group 3579 StringBuilder bufferToRep = new StringBuilder(); 3580 int groupIndex1 = generator.nextInt(5); 3581 bufferToRep.append("$" + (groupIndex1 + 1)); 3582 String randomMidString = getRandomAlphaString(5); 3583 bufferToRep.append(randomMidString); 3584 int groupIndex2 = generator.nextInt(5); 3585 bufferToRep.append("$" + (groupIndex2 + 1)); 3586 String replacement = bufferToRep.toString(); 3587 3588 // Do the replacement 3589 String result = m.replaceAll(replacement); 3590 3591 // Construct expected result 3592 StringBuilder bufferToRes = new StringBuilder(); 3593 bufferToRes.append(leadingString); 3594 bufferToRes.append(groups[groupIndex1]); 3595 bufferToRes.append(randomMidString); 3596 bufferToRes.append(groups[groupIndex2]); 3597 bufferToRes.append(trailingString); 3598 String expectedResult = bufferToRes.toString(); 3599 3600 // Check results 3601 if (!result.equals(expectedResult)) { 3602 failCount++; 3603 } 3604 } 3605 3606 report("Substitution Basher 2"); 3607 } 3608 3609 /** 3610 * Checks the handling of some escape sequences that the Pattern 3611 * class should process instead of the java compiler. These are 3612 * not in the file because the escapes should be be processed 3613 * by the Pattern class when the regex is compiled. 3614 */ 3615 private static void escapes() throws Exception { 3616 Pattern p = Pattern.compile("\\043"); 3617 Matcher m = p.matcher("#"); 3618 if (!m.find()) 3619 failCount++; 3620 3621 p = Pattern.compile("\\x23"); 3622 m = p.matcher("#"); 3623 if (!m.find()) 3624 failCount++; 3625 3626 p = Pattern.compile("\\u0023"); 3627 m = p.matcher("#"); 3628 if (!m.find()) 3629 failCount++; 3630 3631 report("Escape sequences"); 3632 } 3633 3634 /** 3635 * Checks the handling of blank input situations. These 3636 * tests are incompatible with my test file format. 3637 */ 3638 private static void blankInput() throws Exception { 3639 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3640 Matcher m = p.matcher(""); 3641 if (m.find()) 3642 failCount++; 3643 3644 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3645 m = p.matcher(""); 3646 if (!m.find()) 3647 failCount++; 3648 3649 p = Pattern.compile("abc"); 3650 m = p.matcher(""); 3651 if (m.find()) 3652 failCount++; 3653 3654 p = Pattern.compile("a*"); 3655 m = p.matcher(""); 3656 if (!m.find()) 3657 failCount++; 3658 3659 report("Blank input"); 3660 } 3661 3662 /** 3663 * Tests the Boyer-Moore pattern matching of a character sequence 3664 * on randomly generated patterns. 3665 */ 3666 private static void bm() throws Exception { 3667 doBnM('a'); 3668 report("Boyer Moore (ASCII)"); 3669 3670 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3671 report("Boyer Moore (Supplementary)"); 3672 } 3673 3674 private static void doBnM(int baseCharacter) throws Exception { 3675 int achar=0; 3676 3677 for (int i=0; i<100; i++) { 3678 // Create a short pattern to search for 3679 int patternLength = generator.nextInt(7) + 4; 3680 StringBuffer patternBuffer = new StringBuffer(patternLength); 3681 String pattern; 3682 retry: for (;;) { 3683 for (int x=0; x<patternLength; x++) { 3684 int ch = baseCharacter + generator.nextInt(26); 3685 if (Character.isSupplementaryCodePoint(ch)) { 3686 patternBuffer.append(Character.toChars(ch)); 3687 } else { 3688 patternBuffer.append((char)ch); 3689 } 3690 } 3691 pattern = patternBuffer.toString(); 3692 3693 // Avoid patterns that start and end with the same substring 3694 // See JDK-6854417 3695 for (int x=1; x < pattern.length(); x++) { 3696 if (pattern.startsWith(pattern.substring(x))) 3697 continue retry; 3698 } 3699 break; 3700 } 3701 Pattern p = Pattern.compile(pattern); 3702 3703 // Create a buffer with random ASCII chars that does 3704 // not match the sample 3705 String toSearch = null; 3706 StringBuffer s = null; 3707 Matcher m = p.matcher(""); 3708 do { 3709 s = new StringBuffer(100); 3710 for (int x=0; x<100; x++) { 3711 int ch = baseCharacter + generator.nextInt(26); 3712 if (Character.isSupplementaryCodePoint(ch)) { 3713 s.append(Character.toChars(ch)); 3714 } else { 3715 s.append((char)ch); 3716 } 3717 } 3718 toSearch = s.toString(); 3719 m.reset(toSearch); 3720 } while (m.find()); 3721 3722 // Insert the pattern at a random spot 3723 int insertIndex = generator.nextInt(99); 3724 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3725 insertIndex++; 3726 s = s.insert(insertIndex, pattern); 3727 toSearch = s.toString(); 3728 3729 // Make sure that the pattern is found 3730 m.reset(toSearch); 3731 if (!m.find()) 3732 failCount++; 3733 3734 // Make sure that the match text is the pattern 3735 if (!m.group().equals(pattern)) 3736 failCount++; 3737 3738 // Make sure match occured at insertion point 3739 if (m.start() != insertIndex) 3740 failCount++; 3741 } 3742 } 3743 3744 /** 3745 * Tests the matching of slices on randomly generated patterns. 3746 * The Boyer-Moore optimization is not done on these patterns 3747 * because it uses unicode case folding. 3748 */ 3749 private static void slice() throws Exception { 3750 doSlice(Character.MAX_VALUE); 3751 report("Slice"); 3752 3753 doSlice(Character.MAX_CODE_POINT); 3754 report("Slice (Supplementary)"); 3755 } 3756 3757 private static void doSlice(int maxCharacter) throws Exception { 3758 Random generator = new Random(); 3759 int achar=0; 3760 3761 for (int i=0; i<100; i++) { 3762 // Create a short pattern to search for 3763 int patternLength = generator.nextInt(7) + 4; 3764 StringBuffer patternBuffer = new StringBuffer(patternLength); 3765 for (int x=0; x<patternLength; x++) { 3766 int randomChar = 0; 3767 while (!Character.isLetterOrDigit(randomChar)) 3768 randomChar = generator.nextInt(maxCharacter); 3769 if (Character.isSupplementaryCodePoint(randomChar)) { 3770 patternBuffer.append(Character.toChars(randomChar)); 3771 } else { 3772 patternBuffer.append((char) randomChar); 3773 } 3774 } 3775 String pattern = patternBuffer.toString(); 3776 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3777 3778 // Create a buffer with random chars that does not match the sample 3779 String toSearch = null; 3780 StringBuffer s = null; 3781 Matcher m = p.matcher(""); 3782 do { 3783 s = new StringBuffer(100); 3784 for (int x=0; x<100; x++) { 3785 int randomChar = 0; 3786 while (!Character.isLetterOrDigit(randomChar)) 3787 randomChar = generator.nextInt(maxCharacter); 3788 if (Character.isSupplementaryCodePoint(randomChar)) { 3789 s.append(Character.toChars(randomChar)); 3790 } else { 3791 s.append((char) randomChar); 3792 } 3793 } 3794 toSearch = s.toString(); 3795 m.reset(toSearch); 3796 } while (m.find()); 3797 3798 // Insert the pattern at a random spot 3799 int insertIndex = generator.nextInt(99); 3800 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3801 insertIndex++; 3802 s = s.insert(insertIndex, pattern); 3803 toSearch = s.toString(); 3804 3805 // Make sure that the pattern is found 3806 m.reset(toSearch); 3807 if (!m.find()) 3808 failCount++; 3809 3810 // Make sure that the match text is the pattern 3811 if (!m.group().equals(pattern)) 3812 failCount++; 3813 3814 // Make sure match occured at insertion point 3815 if (m.start() != insertIndex) 3816 failCount++; 3817 } 3818 } 3819 3820 private static void explainFailure(String pattern, String data, 3821 String expected, String actual) { 3822 System.err.println("----------------------------------------"); 3823 System.err.println("Pattern = "+pattern); 3824 System.err.println("Data = "+data); 3825 System.err.println("Expected = " + expected); 3826 System.err.println("Actual = " + actual); 3827 } 3828 3829 private static void explainFailure(String pattern, String data, 3830 Throwable t) { 3831 System.err.println("----------------------------------------"); 3832 System.err.println("Pattern = "+pattern); 3833 System.err.println("Data = "+data); 3834 t.printStackTrace(System.err); 3835 } 3836 3837 // Testing examples from a file 3838 3839 /** 3840 * Goes through the file "TestCases.txt" and creates many patterns 3841 * described in the file, matching the patterns against input lines in 3842 * the file, and comparing the results against the correct results 3843 * also found in the file. The file format is described in comments 3844 * at the head of the file. 3845 */ 3846 private static void processFile(String fileName) throws Exception { 3847 File testCases = new File(System.getProperty("test.src", "."), 3848 fileName); 3849 FileInputStream in = new FileInputStream(testCases); 3850 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3851 3852 // Process next test case. 3853 String aLine; 3854 while((aLine = r.readLine()) != null) { 3855 // Read a line for pattern 3856 String patternString = grabLine(r); 3857 Pattern p = null; 3858 try { 3859 p = compileTestPattern(patternString); 3860 } catch (PatternSyntaxException e) { 3861 String dataString = grabLine(r); 3862 String expectedResult = grabLine(r); 3863 if (expectedResult.startsWith("error")) 3864 continue; 3865 explainFailure(patternString, dataString, e); 3866 failCount++; 3867 continue; 3868 } 3869 3870 // Read a line for input string 3871 String dataString = grabLine(r); 3872 Matcher m = p.matcher(dataString); 3873 StringBuffer result = new StringBuffer(); 3874 3875 // Check for IllegalStateExceptions before a match 3876 failCount += preMatchInvariants(m); 3877 3878 boolean found = m.find(); 3879 3880 if (found) 3881 failCount += postTrueMatchInvariants(m); 3882 else 3883 failCount += postFalseMatchInvariants(m); 3884 3885 if (found) { 3886 result.append("true "); 3887 result.append(m.group(0) + " "); 3888 } else { 3889 result.append("false "); 3890 } 3891 3892 result.append(m.groupCount()); 3893 3894 if (found) { 3895 for (int i=1; i<m.groupCount()+1; i++) 3896 if (m.group(i) != null) 3897 result.append(" " +m.group(i)); 3898 } 3899 3900 // Read a line for the expected result 3901 String expectedResult = grabLine(r); 3902 3903 if (!result.toString().equals(expectedResult)) { 3904 explainFailure(patternString, dataString, expectedResult, result.toString()); 3905 failCount++; 3906 } 3907 } 3908 3909 report(fileName); 3910 } 3911 3912 private static int preMatchInvariants(Matcher m) { 3913 int failCount = 0; 3914 try { 3915 m.start(); 3916 failCount++; 3917 } catch (IllegalStateException ise) {} 3918 try { 3919 m.end(); 3920 failCount++; 3921 } catch (IllegalStateException ise) {} 3922 try { 3923 m.group(); 3924 failCount++; 3925 } catch (IllegalStateException ise) {} 3926 return failCount; 3927 } 3928 3929 private static int postFalseMatchInvariants(Matcher m) { 3930 int failCount = 0; 3931 try { 3932 m.group(); 3933 failCount++; 3934 } catch (IllegalStateException ise) {} 3935 try { 3936 m.start(); 3937 failCount++; 3938 } catch (IllegalStateException ise) {} 3939 try { 3940 m.end(); 3941 failCount++; 3942 } catch (IllegalStateException ise) {} 3943 return failCount; 3944 } 3945 3946 private static int postTrueMatchInvariants(Matcher m) { 3947 int failCount = 0; 3948 //assert(m.start() = m.start(0); 3949 if (m.start() != m.start(0)) 3950 failCount++; 3951 //assert(m.end() = m.end(0); 3952 if (m.start() != m.start(0)) 3953 failCount++; 3954 //assert(m.group() = m.group(0); 3955 if (!m.group().equals(m.group(0))) 3956 failCount++; 3957 try { 3958 m.group(50); 3959 failCount++; 3960 } catch (IndexOutOfBoundsException ise) {} 3961 3962 return failCount; 3963 } 3964 3965 private static Pattern compileTestPattern(String patternString) { 3966 if (!patternString.startsWith("'")) { 3967 return Pattern.compile(patternString); 3968 } 3969 int break1 = patternString.lastIndexOf("'"); 3970 String flagString = patternString.substring( 3971 break1+1, patternString.length()); 3972 patternString = patternString.substring(1, break1); 3973 3974 if (flagString.equals("i")) 3975 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3976 3977 if (flagString.equals("m")) 3978 return Pattern.compile(patternString, Pattern.MULTILINE); 3979 3980 return Pattern.compile(patternString); 3981 } 3982 3983 /** 3984 * Reads a line from the input file. Keeps reading lines until a non 3985 * empty non comment line is read. If the line contains a \n then 3986 * these two characters are replaced by a newline char. If a \\uxxxx 3987 * sequence is read then the sequence is replaced by the unicode char. 3988 */ 3989 private static String grabLine(BufferedReader r) throws Exception { 3990 int index = 0; 3991 String line = r.readLine(); 3992 while (line.startsWith("//") || line.length() < 1) 3993 line = r.readLine(); 3994 while ((index = line.indexOf("\\n")) != -1) { 3995 StringBuffer temp = new StringBuffer(line); 3996 temp.replace(index, index+2, "\n"); 3997 line = temp.toString(); 3998 } 3999 while ((index = line.indexOf("\\u")) != -1) { 4000 StringBuffer temp = new StringBuffer(line); 4001 String value = temp.substring(index+2, index+6); 4002 char aChar = (char)Integer.parseInt(value, 16); 4003 String unicodeChar = "" + aChar; 4004 temp.replace(index, index+6, unicodeChar); 4005 line = temp.toString(); 4006 } 4007 4008 return line; 4009 } 4010 4011 private static void check(Pattern p, String s, String g, String expected) { 4012 Matcher m = p.matcher(s); 4013 m.find(); 4014 if (!m.group(g).equals(expected) || 4015 s.charAt(m.start(g)) != expected.charAt(0) || 4016 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 4017 failCount++; 4018 } 4019 4020 private static void checkReplaceFirst(String p, String s, String r, String expected) 4021 { 4022 if (!expected.equals(Pattern.compile(p) 4023 .matcher(s) 4024 .replaceFirst(r))) 4025 failCount++; 4026 } 4027 4028 private static void checkReplaceAll(String p, String s, String r, String expected) 4029 { 4030 if (!expected.equals(Pattern.compile(p) 4031 .matcher(s) 4032 .replaceAll(r))) 4033 failCount++; 4034 } 4035 4036 private static void checkExpectedFail(String p) { 4037 try { 4038 Pattern.compile(p); 4039 } catch (PatternSyntaxException pse) { 4040 //pse.printStackTrace(); 4041 return; 4042 } 4043 failCount++; 4044 } 4045 4046 private static void checkExpectedIAE(Matcher m, String g) { 4047 m.find(); 4048 try { 4049 m.group(g); 4050 } catch (IllegalArgumentException x) { 4051 //iae.printStackTrace(); 4052 try { 4053 m.start(g); 4054 } catch (IllegalArgumentException xx) { 4055 try { 4056 m.start(g); 4057 } catch (IllegalArgumentException xxx) { 4058 return; 4059 } 4060 } 4061 } 4062 failCount++; 4063 } 4064 4065 private static void checkExpectedNPE(Matcher m) { 4066 m.find(); 4067 try { 4068 m.group(null); 4069 } catch (NullPointerException x) { 4070 try { 4071 m.start(null); 4072 } catch (NullPointerException xx) { 4073 try { 4074 m.end(null); 4075 } catch (NullPointerException xxx) { 4076 return; 4077 } 4078 } 4079 } 4080 failCount++; 4081 } 4082 4083 private static void namedGroupCaptureTest() throws Exception { 4084 check(Pattern.compile("x+(?<gname>y+)z+"), 4085 "xxxyyyzzz", 4086 "gname", 4087 "yyy"); 4088 4089 check(Pattern.compile("x+(?<gname8>y+)z+"), 4090 "xxxyyyzzz", 4091 "gname8", 4092 "yyy"); 4093 4094 //backref 4095 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4096 check(pattern, "zzzaabcazzz", true); // found "abca" 4097 4098 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4099 "zzzaabcaazzz", true); 4100 4101 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4102 "abcdefabc", true); 4103 4104 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4105 "abcdefghijkk", true); 4106 4107 // Supplementary character tests 4108 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4109 toSupplementaries("zzzaabcazzz"), true); 4110 4111 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4112 toSupplementaries("zzzaabcaazzz"), true); 4113 4114 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4115 toSupplementaries("abcdefabc"), true); 4116 4117 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4118 "(?<gname>" + 4119 toSupplementaries("k)") + "\\k<gname>"), 4120 toSupplementaries("abcdefghijkk"), true); 4121 4122 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4123 "xxxyyyzzzyyy", 4124 "gname", 4125 "yyy"); 4126 4127 //replaceFirst/All 4128 checkReplaceFirst("(?<gn>ab)(c*)", 4129 "abccczzzabcczzzabccc", 4130 "${gn}", 4131 "abzzzabcczzzabccc"); 4132 4133 checkReplaceAll("(?<gn>ab)(c*)", 4134 "abccczzzabcczzzabccc", 4135 "${gn}", 4136 "abzzzabzzzab"); 4137 4138 4139 checkReplaceFirst("(?<gn>ab)(c*)", 4140 "zzzabccczzzabcczzzabccczzz", 4141 "${gn}", 4142 "zzzabzzzabcczzzabccczzz"); 4143 4144 checkReplaceAll("(?<gn>ab)(c*)", 4145 "zzzabccczzzabcczzzabccczzz", 4146 "${gn}", 4147 "zzzabzzzabzzzabzzz"); 4148 4149 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4150 "zzzabccczzzabcczzzabccczzz", 4151 "${gn2}", 4152 "zzzccczzzabcczzzabccczzz"); 4153 4154 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4155 "zzzabccczzzabcczzzabccczzz", 4156 "${gn2}", 4157 "zzzccczzzcczzzccczzz"); 4158 4159 //toSupplementaries("(ab)(c*)")); 4160 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4161 ")(?<gn2>" + toSupplementaries("c") + "*)", 4162 toSupplementaries("abccczzzabcczzzabccc"), 4163 "${gn1}", 4164 toSupplementaries("abzzzabcczzzabccc")); 4165 4166 4167 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4168 ")(?<gn2>" + toSupplementaries("c") + "*)", 4169 toSupplementaries("abccczzzabcczzzabccc"), 4170 "${gn1}", 4171 toSupplementaries("abzzzabzzzab")); 4172 4173 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4174 ")(?<gn2>" + toSupplementaries("c") + "*)", 4175 toSupplementaries("abccczzzabcczzzabccc"), 4176 "${gn2}", 4177 toSupplementaries("ccczzzabcczzzabccc")); 4178 4179 4180 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4181 ")(?<gn2>" + toSupplementaries("c") + "*)", 4182 toSupplementaries("abccczzzabcczzzabccc"), 4183 "${gn2}", 4184 toSupplementaries("ccczzzcczzzccc")); 4185 4186 checkReplaceFirst("(?<dog>Dog)AndCat", 4187 "zzzDogAndCatzzzDogAndCatzzz", 4188 "${dog}", 4189 "zzzDogzzzDogAndCatzzz"); 4190 4191 4192 checkReplaceAll("(?<dog>Dog)AndCat", 4193 "zzzDogAndCatzzzDogAndCatzzz", 4194 "${dog}", 4195 "zzzDogzzzDogzzz"); 4196 4197 // backref in Matcher & String 4198 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4199 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4200 failCount++; 4201 4202 // negative 4203 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4204 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4205 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4206 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4207 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4208 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4209 "gnameX"); 4210 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4211 report("NamedGroupCapture"); 4212 } 4213 4214 // This is for bug 6919132 4215 private static void nonBmpClassComplementTest() throws Exception { 4216 Pattern p = Pattern.compile("\\P{Lu}"); 4217 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4218 4219 if (m.find() && m.start() == 1) 4220 failCount++; 4221 4222 // from a unicode category 4223 p = Pattern.compile("\\P{Lu}"); 4224 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4225 if (m.find()) 4226 failCount++; 4227 if (!m.hitEnd()) 4228 failCount++; 4229 4230 // block 4231 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4232 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4233 if (m.find() && m.start() == 1) 4234 failCount++; 4235 4236 p = Pattern.compile("\\P{sc=GRANTHA}"); 4237 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4238 if (m.find() && m.start() == 1) 4239 failCount++; 4240 4241 report("NonBmpClassComplement"); 4242 } 4243 4244 private static void unicodePropertiesTest() throws Exception { 4245 // different forms 4246 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4247 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4248 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4249 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4250 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4251 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4252 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4253 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4254 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4255 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4256 failCount++; 4257 4258 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4259 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4260 Matcher lastSM = common; 4261 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4262 4263 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4264 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4265 Matcher lastBM = latin; 4266 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4267 4268 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4269 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4270 continue; // only pick couple code points, they are the same 4271 } 4272 4273 // Unicode Script 4274 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4275 Matcher m; 4276 String str = new String(Character.toChars(cp)); 4277 if (script == lastScript) { 4278 m = lastSM; 4279 m.reset(str); 4280 } else { 4281 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4282 } 4283 if (!m.matches()) { 4284 failCount++; 4285 } 4286 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4287 other.reset(str); 4288 if (other.matches()) { 4289 failCount++; 4290 } 4291 lastSM = m; 4292 lastScript = script; 4293 4294 // Unicode Block 4295 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4296 if (block == null) { 4297 //System.out.printf("Not a Block: cp=%x%n", cp); 4298 continue; 4299 } 4300 if (block == lastBlock) { 4301 m = lastBM; 4302 m.reset(str); 4303 } else { 4304 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4305 } 4306 if (!m.matches()) { 4307 failCount++; 4308 } 4309 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4310 other.reset(str); 4311 if (other.matches()) { 4312 failCount++; 4313 } 4314 lastBM = m; 4315 lastBlock = block; 4316 } 4317 report("unicodeProperties"); 4318 } 4319 4320 private static void unicodeHexNotationTest() throws Exception { 4321 4322 // negative 4323 checkExpectedFail("\\x{-23}"); 4324 checkExpectedFail("\\x{110000}"); 4325 checkExpectedFail("\\x{}"); 4326 checkExpectedFail("\\x{AB[ef]"); 4327 4328 // codepoint 4329 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4330 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4331 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4332 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4333 4334 // in class 4335 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4336 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4337 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4338 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4339 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4340 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4341 4342 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4343 String s = "A" + new String(Character.toChars(cp)) + "B"; 4344 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4345 : String.format("\\u%04x\\u%04x", 4346 (int) Character.toChars(cp)[0], 4347 (int) Character.toChars(cp)[1]); 4348 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4349 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4350 failCount++; 4351 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4352 failCount++; 4353 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4354 failCount++; 4355 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4356 failCount++; 4357 } 4358 report("unicodeHexNotation"); 4359 } 4360 4361 private static void unicodeClassesTest() throws Exception { 4362 4363 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4364 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4365 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4366 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4367 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4368 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4369 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4370 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4371 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4372 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4373 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4374 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4375 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4376 Matcher bound = Pattern.compile("\\b").matcher(""); 4377 Matcher word = Pattern.compile("\\w++").matcher(""); 4378 // UNICODE_CHARACTER_CLASS 4379 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4380 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4381 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4382 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4383 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4384 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4385 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4386 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4387 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4388 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4389 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4390 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4391 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4392 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4393 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4394 // embedded flag (?U) 4395 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4396 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4397 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4398 4399 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4400 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4401 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4402 // properties 4403 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4404 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4405 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4406 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4407 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4408 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4409 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4410 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4411 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4412 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4413 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4414 // javaMethod 4415 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4416 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4417 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4418 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4419 // GC/C 4420 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4421 4422 for (int cp = 1; cp < 0x30000; cp++) { 4423 String str = new String(Character.toChars(cp)); 4424 int type = Character.getType(cp); 4425 if (// lower 4426 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4427 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4428 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4429 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4430 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4431 // upper 4432 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4433 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4434 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4435 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4436 // alpha 4437 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4438 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4439 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4440 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4441 // digit 4442 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4443 Character.isDigit(cp) != digitU.reset(str).matches() || 4444 // alnum 4445 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4446 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4447 // punct 4448 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4449 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4450 // graph 4451 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4452 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4453 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4454 // blank 4455 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4456 != blank.reset(str).matches() || 4457 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4458 // print 4459 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4460 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4461 // cntrl 4462 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4463 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4464 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4465 // hexdigit 4466 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4467 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4468 // space 4469 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4470 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4471 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4472 // word 4473 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4474 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4475 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4476 // bwordb 4477 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4478 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4479 // properties 4480 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4481 Character.isLetter(cp) != letterP.reset(str).matches()|| 4482 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4483 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4484 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4485 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4486 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4487 // gc_C 4488 (Character.CONTROL == type || Character.FORMAT == type || 4489 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4490 Character.UNASSIGNED == type) 4491 != gcC.reset(str).matches()) { 4492 failCount++; 4493 } 4494 } 4495 4496 // bounds/word align 4497 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4498 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4499 failCount++; 4500 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4501 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4502 failCount++; 4503 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4504 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4505 failCount++; 4506 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4507 failCount++; 4508 report("unicodePredefinedClasses"); 4509 } 4510 4511 private static void unicodeCharacterNameTest() throws Exception { 4512 4513 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4514 if (!Character.isValidCodePoint(cp) || 4515 Character.getType(cp) == Character.UNASSIGNED) 4516 continue; 4517 String str = new String(Character.toChars(cp)); 4518 // single 4519 String p = "\\N{" + Character.getName(cp) + "}"; 4520 if (!Pattern.compile(p).matcher(str).matches()) { 4521 failCount++; 4522 } 4523 // class[c] 4524 p = "[\\N{" + Character.getName(cp) + "}]"; 4525 if (!Pattern.compile(p).matcher(str).matches()) { 4526 failCount++; 4527 } 4528 } 4529 4530 // range 4531 for (int i = 0; i < 10; i++) { 4532 int start = generator.nextInt(20); 4533 int end = start + generator.nextInt(200); 4534 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4535 String str; 4536 for (int cp = start; cp < end; cp++) { 4537 str = new String(Character.toChars(cp)); 4538 if (!Pattern.compile(p).matcher(str).matches()) { 4539 failCount++; 4540 } 4541 } 4542 str = new String(Character.toChars(end + 10)); 4543 if (Pattern.compile(p).matcher(str).matches()) { 4544 failCount++; 4545 } 4546 } 4547 4548 // slice 4549 for (int i = 0; i < 10; i++) { 4550 int n = generator.nextInt(256); 4551 int[] buf = new int[n]; 4552 StringBuffer sb = new StringBuffer(1024); 4553 for (int j = 0; j < n; j++) { 4554 int cp = generator.nextInt(1000); 4555 if (!Character.isValidCodePoint(cp) || 4556 Character.getType(cp) == Character.UNASSIGNED) 4557 cp = 0x4e00; // just use 4e00 4558 sb.append("\\N{" + Character.getName(cp) + "}"); 4559 buf[j] = cp; 4560 } 4561 String p = sb.toString(); 4562 String str = new String(buf, 0, buf.length); 4563 if (!Pattern.compile(p).matcher(str).matches()) { 4564 failCount++; 4565 } 4566 } 4567 report("unicodeCharacterName"); 4568 } 4569 4570 private static void horizontalAndVerticalWSTest() throws Exception { 4571 String hws = new String (new char[] { 4572 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4573 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4574 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4575 0x202f, 0x205f, 0x3000 }); 4576 String vws = new String (new char[] { 4577 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4578 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4579 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4580 failCount++; 4581 if (Pattern.compile("\\H").matcher(hws).find() || 4582 Pattern.compile("[\\H]").matcher(hws).find()) 4583 failCount++; 4584 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4585 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4586 failCount++; 4587 if (Pattern.compile("\\V").matcher(vws).find() || 4588 Pattern.compile("[\\V]").matcher(vws).find()) 4589 failCount++; 4590 String prefix = "abcd"; 4591 String suffix = "efgh"; 4592 String ng = "A"; 4593 for (int i = 0; i < hws.length(); i++) { 4594 String c = String.valueOf(hws.charAt(i)); 4595 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4596 if (!m.find() || !c.equals(m.group())) 4597 failCount++; 4598 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4599 if (!m.find() || !c.equals(m.group())) 4600 failCount++; 4601 4602 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4603 if (!m.find() || !ng.equals(m.group())) 4604 failCount++; 4605 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4606 if (!m.find() || !ng.equals(m.group())) 4607 failCount++; 4608 } 4609 for (int i = 0; i < vws.length(); i++) { 4610 String c = String.valueOf(vws.charAt(i)); 4611 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4612 if (!m.find() || !c.equals(m.group())) 4613 failCount++; 4614 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4615 if (!m.find() || !c.equals(m.group())) 4616 failCount++; 4617 4618 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4619 if (!m.find() || !ng.equals(m.group())) 4620 failCount++; 4621 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4622 if (!m.find() || !ng.equals(m.group())) 4623 failCount++; 4624 } 4625 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4626 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4627 failCount++; 4628 report("horizontalAndVerticalWSTest"); 4629 } 4630 4631 private static void linebreakTest() throws Exception { 4632 String linebreaks = new String (new char[] { 4633 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4634 String crnl = "\r\n"; 4635 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4636 Pattern.compile("\\R").matcher(crnl).matches() && 4637 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4638 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4639 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4640 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4641 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4642 failCount++; 4643 } 4644 report("linebreakTest"); 4645 } 4646 4647 // #7189363 4648 private static void branchTest() throws Exception { 4649 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4650 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4651 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4652 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4653 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4654 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4655 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4656 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4657 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4658 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4659 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4660 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4661 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4662 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4663 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4664 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4665 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4666 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4667 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4668 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4669 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4670 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4671 failCount++; 4672 report("branchTest"); 4673 } 4674 4675 // This test is for 8007395 4676 private static void groupCurlyNotFoundSuppTest() throws Exception { 4677 String input = "test this as \ud83d\ude0d"; 4678 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4679 "test(.)*(@[a-zA-Z.]+)", 4680 "test([^B])+(@[a-zA-Z.]+)", 4681 "test([^B])*(@[a-zA-Z.]+)", 4682 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4683 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4684 }) { 4685 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4686 .matcher(input); 4687 try { 4688 if (m.find()) { 4689 failCount++; 4690 } 4691 } catch (Exception x) { 4692 failCount++; 4693 } 4694 } 4695 report("GroupCurly NotFoundSupp"); 4696 } 4697 4698 // This test is for 8023647 4699 private static void groupCurlyBackoffTest() throws Exception { 4700 if (!"abc1c".matches("(\\w)+1\\1") || 4701 "abc11".matches("(\\w)+1\\1")) { 4702 failCount++; 4703 } 4704 report("GroupCurly backoff"); 4705 } 4706 4707 // This test is for 8012646 4708 private static void patternAsPredicate() throws Exception { 4709 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4710 4711 if (p.test("")) { 4712 failCount++; 4713 } 4714 if (!p.test("word")) { 4715 failCount++; 4716 } 4717 if (p.test("1234")) { 4718 failCount++; 4719 } 4720 if (!p.test("word1234")) { 4721 failCount++; 4722 } 4723 report("Pattern.asPredicate"); 4724 } 4725 4726 // This test is for 8184692 4727 private static void patternAsMatchPredicate() throws Exception { 4728 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4729 4730 if (p.test("")) { 4731 failCount++; 4732 } 4733 if (!p.test("word")) { 4734 failCount++; 4735 } 4736 if (p.test("1234word")) { 4737 failCount++; 4738 } 4739 if (p.test("1234")) { 4740 failCount++; 4741 } 4742 report("Pattern.asMatchPredicate"); 4743 } 4744 4745 4746 // This test is for 8035975 4747 private static void invalidFlags() throws Exception { 4748 for (int flag = 1; flag != 0; flag <<= 1) { 4749 switch (flag) { 4750 case Pattern.CASE_INSENSITIVE: 4751 case Pattern.MULTILINE: 4752 case Pattern.DOTALL: 4753 case Pattern.UNICODE_CASE: 4754 case Pattern.CANON_EQ: 4755 case Pattern.UNIX_LINES: 4756 case Pattern.LITERAL: 4757 case Pattern.UNICODE_CHARACTER_CLASS: 4758 case Pattern.COMMENTS: 4759 // valid flag, continue 4760 break; 4761 default: 4762 try { 4763 Pattern.compile(".", flag); 4764 failCount++; 4765 } catch (IllegalArgumentException expected) { 4766 } 4767 } 4768 } 4769 report("Invalid compile flags"); 4770 } 4771 4772 // This test is for 8158482 4773 private static void embeddedFlags() throws Exception { 4774 try { 4775 Pattern.compile("(?i).(?-i)."); 4776 Pattern.compile("(?m).(?-m)."); 4777 Pattern.compile("(?s).(?-s)."); 4778 Pattern.compile("(?d).(?-d)."); 4779 Pattern.compile("(?u).(?-u)."); 4780 Pattern.compile("(?c).(?-c)."); 4781 Pattern.compile("(?x).(?-x)."); 4782 Pattern.compile("(?U).(?-U)."); 4783 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4784 } catch (PatternSyntaxException x) { 4785 failCount++; 4786 } 4787 report("Embedded flags"); 4788 } 4789 4790 private static void grapheme() throws Exception { 4791 Files.lines(UCDFiles.GRAPHEME_BREAK_TEST) 4792 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4793 .forEach( ln -> { 4794 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4795 // System.out.println(str); 4796 String[] strs = ln.split("\u00f7|\u00d7"); 4797 StringBuilder src = new StringBuilder(); 4798 ArrayList<String> graphemes = new ArrayList<>(); 4799 StringBuilder buf = new StringBuilder(); 4800 int offBk = 0; 4801 for (String str : strs) { 4802 if (str.length() == 0) // first empty str 4803 continue; 4804 int cp = Integer.parseInt(str, 16); 4805 src.appendCodePoint(cp); 4806 buf.appendCodePoint(cp); 4807 offBk += (str.length() + 1); 4808 if (ln.charAt(offBk) == '\u00f7') { // DIV 4809 graphemes.add(buf.toString()); 4810 buf = new StringBuilder(); 4811 } 4812 } 4813 Pattern p = Pattern.compile("\\X"); 4814 Matcher m = p.matcher(src.toString()); 4815 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4816 for (String g : graphemes) { 4817 // System.out.printf(" grapheme:=[%s]%n", g); 4818 // (1) test \\X directly 4819 if (!m.find() || !m.group().equals(g)) { 4820 System.out.println("Failed \\X [" + ln + "] : " + g); 4821 failCount++; 4822 } 4823 // (2) test \\b{g} + \\X via Scanner 4824 boolean hasNext = s.hasNext(p); 4825 // if (!s.hasNext() || !s.next().equals(next)) { 4826 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4827 System.out.println("Failed b{g} [" + ln + "] : " + g); 4828 failCount++; 4829 } 4830 } 4831 }); 4832 // some sanity checks 4833 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4834 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4835 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4836 failCount++; 4837 // make sure "\b{n}" still works 4838 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4839 failCount++; 4840 report("Unicode extended grapheme cluster"); 4841 } 4842 4843 // hangup/timeout if go into exponential backtracking 4844 private static void expoBacktracking() throws Exception { 4845 4846 Object[][] patternMatchers = { 4847 // 6328855 4848 { "(.*\n*)*", 4849 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4850 false }, 4851 // 6192895 4852 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4853 "Hello World this is a test this is a test this is a test A", 4854 true }, 4855 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4856 "Hello World this is a test this is a test this is a test \u4e00 ", 4857 false }, 4858 { " *([a-z0-9]+ *)+", 4859 "hello world this is a test this is a test this is a test A", 4860 false }, 4861 // 4771934 [FIXED] #5013651? 4862 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4863 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4864 true }, 4865 // 4866249 [FIXED] 4866 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4867 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4868 true }, 4869 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4870 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4871 false }, 4872 // 6345469 4873 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4874 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4875 true }, // --> matched 4876 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4877 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4878 false }, 4879 // 5026912 4880 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4881 "156580451111112225588087755221111111566969655555555", 4882 false}, 4883 // 6988218 4884 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4885 "'%)) order by ANGEBOT.ID", 4886 false}, // find 4887 // 6693451 4888 { "^(\\s*foo\\s*)*$", 4889 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4890 true }, 4891 { "^(\\s*foo\\s*)*$", 4892 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4893 false 4894 }, 4895 // 7006761 4896 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4897 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4898 // 8140212 4899 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4900 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4901 false 4902 }, 4903 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4904 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4905 4906 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4907 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4908 4909 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4910 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4911 4912 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4913 4914 /* not fixed 4915 //8132141 ---> second level exponential backtracking 4916 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4917 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4918 */ 4919 }; 4920 4921 for (Object[] pm : patternMatchers) { 4922 String p = (String)pm[0]; 4923 String s = (String)pm[1]; 4924 boolean r = (Boolean)pm[2]; 4925 if (r != Pattern.compile(p).matcher(s).matches()) { 4926 failCount++; 4927 } 4928 } 4929 } 4930 4931 private static void invalidGroupName() { 4932 // Invalid start of a group name 4933 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4934 "\u0060", "\u007b", "\u0416")) { 4935 for (String pat : List.of("(?<" + groupName + ">)", 4936 "\\k<" + groupName + ">")) { 4937 try { 4938 Pattern.compile(pat); 4939 failCount++; 4940 } catch (PatternSyntaxException e) { 4941 if (!e.getMessage().startsWith( 4942 "capturing group name does not start with a" 4943 + " Latin letter")) { 4944 failCount++; 4945 } 4946 } 4947 } 4948 } 4949 // Invalid char in a group name 4950 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4951 "d\u0060", "e\u007b", "f\u0416")) { 4952 for (String pat : List.of("(?<" + groupName + ">)", 4953 "\\k<" + groupName + ">")) { 4954 try { 4955 Pattern.compile(pat); 4956 failCount++; 4957 } catch (PatternSyntaxException e) { 4958 if (!e.getMessage().startsWith( 4959 "named capturing group is missing trailing '>'")) { 4960 failCount++; 4961 } 4962 } 4963 } 4964 } 4965 report("Invalid capturing group names"); 4966 } 4967 4968 private static void illegalRepetitionRange() { 4969 // huge integers > (2^31 - 1) 4970 String n = BigInteger.valueOf(1L << 32) 4971 .toString(); 4972 String m = BigInteger.valueOf(1L << 31) 4973 .add(new BigInteger(80, generator)) 4974 .toString(); 4975 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 4976 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 4977 String pat = ".{" + rep + "}"; 4978 try { 4979 Pattern.compile(pat); 4980 failCount++; 4981 System.out.println("Expected to fail. Pattern: " + pat); 4982 } catch (PatternSyntaxException e) { 4983 if (!e.getMessage().startsWith("Illegal repetition")) { 4984 failCount++; 4985 System.out.println("Unexpected error message: " + e.getMessage()); 4986 } 4987 } catch (Throwable t) { 4988 failCount++; 4989 System.out.println("Unexpected exception: " + t); 4990 } 4991 } 4992 report("illegalRepetitionRange"); 4993 } 4994 4995 private static void surrogatePairWithCanonEq() { 4996 try { 4997 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 4998 } catch (Throwable t) { 4999 failCount++; 5000 System.out.println("Unexpected exception: " + t); 5001 } 5002 report("surrogatePairWithCanonEq"); 5003 } 5004 5005 // This test is for 8214245 5006 private static void caseInsensitivePMatch() { 5007 for (String input : List.of("abcd", "AbCd", "ABCD")) { 5008 for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}", 5009 "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}", 5010 "\\p{IsLl}{4}", "\\p{gc=Ll}{4}", 5011 "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}", 5012 "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}", 5013 "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}", 5014 "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}", 5015 "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}", 5016 "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}", 5017 "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}", 5018 "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}", 5019 "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}", 5020 "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}", 5021 "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}", 5022 "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}", 5023 "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}", 5024 "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}", 5025 "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}")) 5026 { 5027 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE) 5028 .matcher(input) 5029 .matches()) 5030 { 5031 failCount++; 5032 System.out.println("Expected to match: " + 5033 "'" + input + "' =~ /" + pattern + "/"); 5034 } 5035 } 5036 } 5037 5038 for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) { 5039 for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9", 5040 "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]", 5041 "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]", 5042 "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}", 5043 "\\p{general_category=Ll}", "\\p{IsLowercase}", 5044 "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}", 5045 "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}", 5046 "\\p{IsUppercase}", "\\p{javaUpperCase}", 5047 "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}", 5048 "\\p{general_category=Lt}", "\\p{IsTitlecase}", 5049 "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]", 5050 "[\\p{IsLl}]", "[\\p{gc=Ll}]", 5051 "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]", 5052 "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]", 5053 "[\\p{IsLu}]", "[\\p{gc=Lu}]", 5054 "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]", 5055 "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]", 5056 "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]", 5057 "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]")) 5058 { 5059 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE 5060 | Pattern.UNICODE_CHARACTER_CLASS) 5061 .matcher(input) 5062 .matches()) 5063 { 5064 failCount++; 5065 System.out.println("Expected to match: " + 5066 "'" + input + "' =~ /" + pattern + "/"); 5067 } 5068 } 5069 } 5070 report("caseInsensitivePMatch"); 5071 } 5072 } --- EOF ---