# HG changeset patch # User igerasim # Date 1581105177 28800 # Fri Feb 07 11:52:57 2020 -0800 # Node ID 0c3a01937d1d09a5cacc9c2d8ab87beb70134304 # Parent f1f8562f3ad2b3771b7735b8386820af178aee66 [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes diff --git a/src/java.base/share/classes/java/util/regex/CharPredicates.java b/src/java.base/share/classes/java/util/regex/CharPredicates.java --- a/src/java.base/share/classes/java/util/regex/CharPredicates.java +++ b/src/java.base/share/classes/java/util/regex/CharPredicates.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -169,11 +169,15 @@ ///////////////////////////////////////////////////////////////////////////// - private static CharPredicate getPosixPredicate(String name) { + private static CharPredicate getPosixPredicate(String name, boolean caseIns) { switch (name) { case "ALPHA": return ALPHABETIC(); - case "LOWER": return LOWERCASE(); - case "UPPER": return UPPERCASE(); + case "LOWER": return caseIns + ? LOWERCASE().union(UPPERCASE(), TITLECASE()) + : LOWERCASE(); + case "UPPER": return caseIns + ? UPPERCASE().union(LOWERCASE(), TITLECASE()) + : UPPERCASE(); case "SPACE": return WHITE_SPACE(); case "PUNCT": return PUNCTUATION(); case "XDIGIT": return HEX_DIGIT(); @@ -187,40 +191,46 @@ } } - private static CharPredicate getUnicodePredicate(String name) { + private static CharPredicate getUnicodePredicate(String name, boolean caseIns) { switch (name) { case "ALPHABETIC": return ALPHABETIC(); case "ASSIGNED": return ASSIGNED(); case "CONTROL": return CONTROL(); - case "HEXDIGIT": return HEX_DIGIT(); + case "HEXDIGIT": + case "HEX_DIGIT": return HEX_DIGIT(); case "IDEOGRAPHIC": return IDEOGRAPHIC(); - case "JOINCONTROL": return JOIN_CONTROL(); + case "JOINCONTROL": + case "JOIN_CONTROL": return JOIN_CONTROL(); case "LETTER": return LETTER(); - case "LOWERCASE": return LOWERCASE(); - case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT(); - case "TITLECASE": return TITLECASE(); + case "LOWERCASE": return caseIns + ? LOWERCASE().union(UPPERCASE(), TITLECASE()) + : LOWERCASE(); + case "NONCHARACTERCODEPOINT": + case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT(); + case "TITLECASE": return caseIns + ? TITLECASE().union(LOWERCASE(), UPPERCASE()) + : TITLECASE(); case "PUNCTUATION": return PUNCTUATION(); - case "UPPERCASE": return UPPERCASE(); - case "WHITESPACE": return WHITE_SPACE(); - case "WORD": return WORD(); + case "UPPERCASE": return caseIns + ? UPPERCASE().union(LOWERCASE(), TITLECASE()) + : UPPERCASE(); + case "WHITESPACE": case "WHITE_SPACE": return WHITE_SPACE(); - case "HEX_DIGIT": return HEX_DIGIT(); - case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT(); - case "JOIN_CONTROL": return JOIN_CONTROL(); + case "WORD": return WORD(); default: return null; } } - public static CharPredicate forUnicodeProperty(String propName) { + public static CharPredicate forUnicodeProperty(String propName, boolean caseIns) { propName = propName.toUpperCase(Locale.ROOT); - CharPredicate p = getUnicodePredicate(propName); + CharPredicate p = getUnicodePredicate(propName, caseIns); if (p != null) return p; - return getPosixPredicate(propName); + return getPosixPredicate(propName, caseIns); } - public static CharPredicate forPOSIXName(String propName) { - return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH)); + public static CharPredicate forPOSIXName(String propName, boolean caseIns) { + return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH), caseIns); } ///////////////////////////////////////////////////////////////////////////// @@ -254,14 +264,23 @@ // unicode categories, aliases, properties, java methods ... - static CharPredicate forProperty(String name) { + static CharPredicate forProperty(String name, boolean caseIns) { // Unicode character property aliases, defined in // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt switch (name) { case "Cn": return category(1< Character.isLowerCase(c) || + Character.isUpperCase(c) || + Character.isTitleCase(c) + : Character::isLowerCase; + case "javaUpperCase": return caseIns ? c -> Character.isUpperCase(c) || + Character.isLowerCase(c) || + Character.isTitleCase(c) + : Character::isUpperCase; + case "javaAlphabetic": return Character::isAlphabetic; + case "javaIdeographic": return Character::isIdeographic; + case "javaTitleCase": return caseIns ? c -> Character.isTitleCase(c) || + Character.isLowerCase(c) || + Character.isUpperCase(c) + : Character::isTitleCase; + case "javaDigit": return Character::isDigit; + case "javaDefined": return Character::isDefined; + case "javaLetter": return Character::isLetter; + case "javaLetterOrDigit": return Character::isLetterOrDigit; + case "javaJavaIdentifierStart": return Character::isJavaIdentifierStart; + case "javaJavaIdentifierPart": return Character::isJavaIdentifierPart; + case "javaUnicodeIdentifierStart": return Character::isUnicodeIdentifierStart; + case "javaUnicodeIdentifierPart": return Character::isUnicodeIdentifierPart; + case "javaIdentifierIgnorable": return Character::isIdentifierIgnorable; + case "javaSpaceChar": return Character::isSpaceChar; + case "javaWhitespace": return Character::isWhitespace; + case "javaISOControl": return Character::isISOControl; + case "javaMirrored": return Character::isMirrored; default: return null; } } diff --git a/src/java.base/share/classes/java/util/regex/Pattern.java b/src/java.base/share/classes/java/util/regex/Pattern.java --- a/src/java.base/share/classes/java/util/regex/Pattern.java +++ b/src/java.base/share/classes/java/util/regex/Pattern.java @@ -2887,7 +2887,7 @@ break; case "gc": case "general_category": - p = CharPredicates.forProperty(value); + p = CharPredicates.forProperty(value, has(CASE_INSENSITIVE)); break; default: break; @@ -2903,17 +2903,16 @@ } else if (name.startsWith("Is")) { // \p{IsGeneralCategory} and \p{IsScriptName} String shortName = name.substring(2); - p = CharPredicates.forUnicodeProperty(shortName); + p = CharPredicates.forUnicodeProperty(shortName, has(CASE_INSENSITIVE)); if (p == null) - p = CharPredicates.forProperty(shortName); + p = CharPredicates.forProperty(shortName, has(CASE_INSENSITIVE)); if (p == null) p = CharPredicates.forUnicodeScript(shortName); } else { - if (has(UNICODE_CHARACTER_CLASS)) { - p = CharPredicates.forPOSIXName(name); - } + if (has(UNICODE_CHARACTER_CLASS)) + p = CharPredicates.forPOSIXName(name, has(CASE_INSENSITIVE)); if (p == null) - p = CharPredicates.forProperty(name); + p = CharPredicates.forProperty(name, has(CASE_INSENSITIVE)); } if (p == null) throw error("Unknown character property name {" + name + "}"); @@ -5619,7 +5618,7 @@ return ch -> is(ch) || p.is(ch); } default CharPredicate union(CharPredicate p1, - CharPredicate p2 ) { + CharPredicate p2) { return ch -> is(ch) || p1.is(ch) || p2.is(ch); } default CharPredicate negate() { diff --git a/test/jdk/java/util/regex/RegExTest.java b/test/jdk/java/util/regex/RegExTest.java --- a/test/jdk/java/util/regex/RegExTest.java +++ b/test/jdk/java/util/regex/RegExTest.java @@ -35,7 +35,7 @@ * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 - * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 + * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8214245 * * @library /test/lib * @library /lib/testlibrary/java/lang @@ -186,6 +186,7 @@ invalidGroupName(); illegalRepetitionRange(); surrogatePairWithCanonEq(); + caseInsensitivePMatch(); if (failure) { throw new @@ -5000,4 +5001,72 @@ } report("surrogatePairWithCanonEq"); } + + // This test is for 8214245 + private static void caseInsensitivePMatch() { + for (String input : List.of("abcd", "AbCd", "ABCD")) { + for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}", + "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}", + "\\p{IsLl}{4}", "\\p{gc=Ll}{4}", + "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}", + "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}", + "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}", + "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}", + "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}", + "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}", + "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}", + "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}", + "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}", + "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}", + "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}", + "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}", + "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}", + "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}", + "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}")) + { + if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE) + .matcher(input) + .matches()) + { + failCount++; + System.out.println("Expected to match: " + + "'" + input + "' =~ /" + pattern + "/"); + } + } + } + + for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) { + for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9", + "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]", + "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]", + "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}", + "\\p{general_category=Ll}", "\\p{IsLowercase}", + "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}", + "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}", + "\\p{IsUppercase}", "\\p{javaUpperCase}", + "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}", + "\\p{general_category=Lt}", "\\p{IsTitlecase}", + "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]", + "[\\p{IsLl}]", "[\\p{gc=Ll}]", + "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]", + "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]", + "[\\p{IsLu}]", "[\\p{gc=Lu}]", + "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]", + "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]", + "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]", + "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]")) + { + if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE + | Pattern.UNICODE_CHARACTER_CLASS) + .matcher(input) + .matches()) + { + failCount++; + System.out.println("Expected to match: " + + "'" + input + "' =~ /" + pattern + "/"); + } + } + } + report("caseInsensitivePMatch"); + } }