< prev index next >

src/java.base/share/classes/java/util/regex/CharPredicates.java

Print this page
rev 54580 : [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes

*** 1,7 **** /* ! * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this --- 1,7 ---- /* ! * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this
*** 167,181 **** JOIN_CONTROL()); } ///////////////////////////////////////////////////////////////////////////// ! private static CharPredicate getPosixPredicate(String name) { switch (name) { case "ALPHA": return ALPHABETIC(); ! case "LOWER": return LOWERCASE(); ! case "UPPER": return UPPERCASE(); case "SPACE": return WHITE_SPACE(); case "PUNCT": return PUNCTUATION(); case "XDIGIT": return HEX_DIGIT(); case "ALNUM": return ALNUM(); case "CNTRL": return CONTROL(); --- 167,185 ---- JOIN_CONTROL()); } ///////////////////////////////////////////////////////////////////////////// ! private static CharPredicate getPosixPredicate(String name, boolean caseIns) { switch (name) { case "ALPHA": return ALPHABETIC(); ! case "LOWER": return caseIns ! ? LOWERCASE().union(UPPERCASE(), TITLECASE()) ! : LOWERCASE(); ! case "UPPER": return caseIns ! ? UPPERCASE().union(LOWERCASE(), TITLECASE()) ! : UPPERCASE(); case "SPACE": return WHITE_SPACE(); case "PUNCT": return PUNCTUATION(); case "XDIGIT": return HEX_DIGIT(); case "ALNUM": return ALNUM(); case "CNTRL": return CONTROL();
*** 185,228 **** case "PRINT": return PRINT(); default: return null; } } ! private static CharPredicate getUnicodePredicate(String name) { switch (name) { case "ALPHABETIC": return ALPHABETIC(); case "ASSIGNED": return ASSIGNED(); case "CONTROL": return CONTROL(); ! case "HEXDIGIT": return HEX_DIGIT(); case "IDEOGRAPHIC": return IDEOGRAPHIC(); ! case "JOINCONTROL": return JOIN_CONTROL(); case "LETTER": return LETTER(); ! case "LOWERCASE": return LOWERCASE(); ! case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT(); ! case "TITLECASE": return TITLECASE(); case "PUNCTUATION": return PUNCTUATION(); ! case "UPPERCASE": return UPPERCASE(); ! case "WHITESPACE": return WHITE_SPACE(); ! case "WORD": return WORD(); case "WHITE_SPACE": return WHITE_SPACE(); ! case "HEX_DIGIT": return HEX_DIGIT(); ! case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT(); ! case "JOIN_CONTROL": return JOIN_CONTROL(); default: return null; } } ! public static CharPredicate forUnicodeProperty(String propName) { propName = propName.toUpperCase(Locale.ROOT); ! CharPredicate p = getUnicodePredicate(propName); if (p != null) return p; ! return getPosixPredicate(propName); } ! public static CharPredicate forPOSIXName(String propName) { ! return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH)); } ///////////////////////////////////////////////////////////////////////////// /** --- 189,238 ---- case "PRINT": return PRINT(); default: return null; } } ! private static CharPredicate getUnicodePredicate(String name, boolean caseIns) { switch (name) { case "ALPHABETIC": return ALPHABETIC(); case "ASSIGNED": return ASSIGNED(); case "CONTROL": return CONTROL(); ! case "HEXDIGIT": ! case "HEX_DIGIT": return HEX_DIGIT(); case "IDEOGRAPHIC": return IDEOGRAPHIC(); ! case "JOINCONTROL": ! case "JOIN_CONTROL": return JOIN_CONTROL(); case "LETTER": return LETTER(); ! case "LOWERCASE": return caseIns ! ? LOWERCASE().union(UPPERCASE(), TITLECASE()) ! : LOWERCASE(); ! case "NONCHARACTERCODEPOINT": ! case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT(); ! case "TITLECASE": return caseIns ! ? TITLECASE().union(LOWERCASE(), UPPERCASE()) ! : TITLECASE(); case "PUNCTUATION": return PUNCTUATION(); ! case "UPPERCASE": return caseIns ! ? UPPERCASE().union(LOWERCASE(), TITLECASE()) ! : UPPERCASE(); ! case "WHITESPACE": case "WHITE_SPACE": return WHITE_SPACE(); ! case "WORD": return WORD(); default: return null; } } ! public static CharPredicate forUnicodeProperty(String propName, boolean caseIns) { propName = propName.toUpperCase(Locale.ROOT); ! CharPredicate p = getUnicodePredicate(propName, caseIns); if (p != null) return p; ! return getPosixPredicate(propName, caseIns); } ! public static CharPredicate forPOSIXName(String propName, boolean caseIns) { ! return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH), caseIns); } ///////////////////////////////////////////////////////////////////////////// /**
*** 252,269 **** ///////////////////////////////////////////////////////////////////////////// // unicode categories, aliases, properties, java methods ... ! static CharPredicate forProperty(String name) { // Unicode character property aliases, defined in // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt switch (name) { case "Cn": return category(1<<Character.UNASSIGNED); ! case "Lu": return category(1<<Character.UPPERCASE_LETTER); ! case "Ll": return category(1<<Character.LOWERCASE_LETTER); ! case "Lt": return category(1<<Character.TITLECASE_LETTER); case "Lm": return category(1<<Character.MODIFIER_LETTER); case "Lo": return category(1<<Character.OTHER_LETTER); case "Mn": return category(1<<Character.NON_SPACING_MARK); case "Me": return category(1<<Character.ENCLOSING_MARK); case "Mc": return category(1<<Character.COMBINING_SPACING_MARK); --- 262,288 ---- ///////////////////////////////////////////////////////////////////////////// // unicode categories, aliases, properties, java methods ... ! static CharPredicate forProperty(String name, boolean caseIns) { // Unicode character property aliases, defined in // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt switch (name) { case "Cn": return category(1<<Character.UNASSIGNED); ! case "Lu": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) | ! (1<<Character.UPPERCASE_LETTER) | ! (1<<Character.TITLECASE_LETTER) ! : (1<<Character.UPPERCASE_LETTER)); ! case "Ll": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) | ! (1<<Character.UPPERCASE_LETTER) | ! (1<<Character.TITLECASE_LETTER) ! : (1<<Character.LOWERCASE_LETTER)); ! case "Lt": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) | ! (1<<Character.UPPERCASE_LETTER) | ! (1<<Character.TITLECASE_LETTER) ! : (1<<Character.TITLECASE_LETTER)); case "Lm": return category(1<<Character.MODIFIER_LETTER); case "Lo": return category(1<<Character.OTHER_LETTER); case "Mn": return category(1<<Character.NON_SPACING_MARK); case "Me": return category(1<<Character.ENCLOSING_MARK); case "Mc": return category(1<<Character.COMBINING_SPACING_MARK);
*** 336,371 **** case "Alpha": return ctype(ASCII.ALPHA); // Alphabetic characters case "Blank": return ctype(ASCII.BLANK); // Space and tab characters case "Cntrl": return ctype(ASCII.CNTRL); // Control characters case "Digit": return range('0', '9'); // Numeric characters case "Graph": return ctype(ASCII.GRAPH); // printable and visible ! case "Lower": return range('a', 'z'); // Lower-case alphabetic case "Print": return range(0x20, 0x7E); // Printable characters case "Punct": return ctype(ASCII.PUNCT); // Punctuation characters case "Space": return ctype(ASCII.SPACE); // Space characters ! case "Upper": return range('A', 'Z'); // Upper-case alphabetic case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits // Java character properties, defined by methods in Character.java ! case "javaLowerCase": return java.lang.Character::isLowerCase; ! case "javaUpperCase": return Character::isUpperCase; ! case "javaAlphabetic": return java.lang.Character::isAlphabetic; ! case "javaIdeographic": return java.lang.Character::isIdeographic; ! case "javaTitleCase": return java.lang.Character::isTitleCase; ! case "javaDigit": return java.lang.Character::isDigit; ! case "javaDefined": return java.lang.Character::isDefined; ! case "javaLetter": return java.lang.Character::isLetter; ! case "javaLetterOrDigit": return java.lang.Character::isLetterOrDigit; ! case "javaJavaIdentifierStart": return java.lang.Character::isJavaIdentifierStart; ! case "javaJavaIdentifierPart": return java.lang.Character::isJavaIdentifierPart; ! case "javaUnicodeIdentifierStart": return java.lang.Character::isUnicodeIdentifierStart; ! case "javaUnicodeIdentifierPart": return java.lang.Character::isUnicodeIdentifierPart; ! case "javaIdentifierIgnorable": return java.lang.Character::isIdentifierIgnorable; ! case "javaSpaceChar": return java.lang.Character::isSpaceChar; ! case "javaWhitespace": return java.lang.Character::isWhitespace; ! case "javaISOControl": return java.lang.Character::isISOControl; ! case "javaMirrored": return java.lang.Character::isMirrored; default: return null; } } private static CharPredicate category(final int typeMask) { --- 355,401 ---- case "Alpha": return ctype(ASCII.ALPHA); // Alphabetic characters case "Blank": return ctype(ASCII.BLANK); // Space and tab characters case "Cntrl": return ctype(ASCII.CNTRL); // Control characters case "Digit": return range('0', '9'); // Numeric characters case "Graph": return ctype(ASCII.GRAPH); // printable and visible ! case "Lower": return caseIns ? ctype(ASCII.ALPHA) ! : range('a', 'z'); // Lower-case alphabetic case "Print": return range(0x20, 0x7E); // Printable characters case "Punct": return ctype(ASCII.PUNCT); // Punctuation characters case "Space": return ctype(ASCII.SPACE); // Space characters ! case "Upper": return caseIns ? ctype(ASCII.ALPHA) ! : range('A', 'Z'); // Upper-case alphabetic case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits // Java character properties, defined by methods in Character.java ! case "javaLowerCase": return caseIns ? c -> Character.isLowerCase(c) || ! Character.isUpperCase(c) || ! Character.isTitleCase(c) ! : Character::isLowerCase; ! case "javaUpperCase": return caseIns ? c -> Character.isUpperCase(c) || ! Character.isLowerCase(c) || ! Character.isTitleCase(c) ! : Character::isUpperCase; ! case "javaAlphabetic": return Character::isAlphabetic; ! case "javaIdeographic": return Character::isIdeographic; ! case "javaTitleCase": return caseIns ? c -> Character.isTitleCase(c) || ! Character.isLowerCase(c) || ! Character.isUpperCase(c) ! : Character::isTitleCase; ! case "javaDigit": return Character::isDigit; ! case "javaDefined": return Character::isDefined; ! case "javaLetter": return Character::isLetter; ! case "javaLetterOrDigit": return Character::isLetterOrDigit; ! case "javaJavaIdentifierStart": return Character::isJavaIdentifierStart; ! case "javaJavaIdentifierPart": return Character::isJavaIdentifierPart; ! case "javaUnicodeIdentifierStart": return Character::isUnicodeIdentifierStart; ! case "javaUnicodeIdentifierPart": return Character::isUnicodeIdentifierPart; ! case "javaIdentifierIgnorable": return Character::isIdentifierIgnorable; ! case "javaSpaceChar": return Character::isSpaceChar; ! case "javaWhitespace": return Character::isWhitespace; ! case "javaISOControl": return Character::isISOControl; ! case "javaMirrored": return Character::isMirrored; default: return null; } } private static CharPredicate category(final int typeMask) {
< prev index next >