< prev index next >
src/java.base/share/classes/java/util/regex/CharPredicates.java
Print this page
rev 57965 : [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes
*** 1,7 ****
/*
! * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
--- 1,7 ----
/*
! * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
*** 167,181 ****
JOIN_CONTROL());
}
/////////////////////////////////////////////////////////////////////////////
! private static CharPredicate getPosixPredicate(String name) {
switch (name) {
case "ALPHA": return ALPHABETIC();
! case "LOWER": return LOWERCASE();
! case "UPPER": return UPPERCASE();
case "SPACE": return WHITE_SPACE();
case "PUNCT": return PUNCTUATION();
case "XDIGIT": return HEX_DIGIT();
case "ALNUM": return ALNUM();
case "CNTRL": return CONTROL();
--- 167,185 ----
JOIN_CONTROL());
}
/////////////////////////////////////////////////////////////////////////////
! private static CharPredicate getPosixPredicate(String name, boolean caseIns) {
switch (name) {
case "ALPHA": return ALPHABETIC();
! case "LOWER": return caseIns
! ? LOWERCASE().union(UPPERCASE(), TITLECASE())
! : LOWERCASE();
! case "UPPER": return caseIns
! ? UPPERCASE().union(LOWERCASE(), TITLECASE())
! : UPPERCASE();
case "SPACE": return WHITE_SPACE();
case "PUNCT": return PUNCTUATION();
case "XDIGIT": return HEX_DIGIT();
case "ALNUM": return ALNUM();
case "CNTRL": return CONTROL();
*** 185,228 ****
case "PRINT": return PRINT();
default: return null;
}
}
! private static CharPredicate getUnicodePredicate(String name) {
switch (name) {
case "ALPHABETIC": return ALPHABETIC();
case "ASSIGNED": return ASSIGNED();
case "CONTROL": return CONTROL();
! case "HEXDIGIT": return HEX_DIGIT();
case "IDEOGRAPHIC": return IDEOGRAPHIC();
! case "JOINCONTROL": return JOIN_CONTROL();
case "LETTER": return LETTER();
! case "LOWERCASE": return LOWERCASE();
! case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT();
! case "TITLECASE": return TITLECASE();
case "PUNCTUATION": return PUNCTUATION();
! case "UPPERCASE": return UPPERCASE();
! case "WHITESPACE": return WHITE_SPACE();
! case "WORD": return WORD();
case "WHITE_SPACE": return WHITE_SPACE();
! case "HEX_DIGIT": return HEX_DIGIT();
! case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT();
! case "JOIN_CONTROL": return JOIN_CONTROL();
default: return null;
}
}
! public static CharPredicate forUnicodeProperty(String propName) {
propName = propName.toUpperCase(Locale.ROOT);
! CharPredicate p = getUnicodePredicate(propName);
if (p != null)
return p;
! return getPosixPredicate(propName);
}
! public static CharPredicate forPOSIXName(String propName) {
! return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH));
}
/////////////////////////////////////////////////////////////////////////////
/**
--- 189,238 ----
case "PRINT": return PRINT();
default: return null;
}
}
! private static CharPredicate getUnicodePredicate(String name, boolean caseIns) {
switch (name) {
case "ALPHABETIC": return ALPHABETIC();
case "ASSIGNED": return ASSIGNED();
case "CONTROL": return CONTROL();
! case "HEXDIGIT":
! case "HEX_DIGIT": return HEX_DIGIT();
case "IDEOGRAPHIC": return IDEOGRAPHIC();
! case "JOINCONTROL":
! case "JOIN_CONTROL": return JOIN_CONTROL();
case "LETTER": return LETTER();
! case "LOWERCASE": return caseIns
! ? LOWERCASE().union(UPPERCASE(), TITLECASE())
! : LOWERCASE();
! case "NONCHARACTERCODEPOINT":
! case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT();
! case "TITLECASE": return caseIns
! ? TITLECASE().union(LOWERCASE(), UPPERCASE())
! : TITLECASE();
case "PUNCTUATION": return PUNCTUATION();
! case "UPPERCASE": return caseIns
! ? UPPERCASE().union(LOWERCASE(), TITLECASE())
! : UPPERCASE();
! case "WHITESPACE":
case "WHITE_SPACE": return WHITE_SPACE();
! case "WORD": return WORD();
default: return null;
}
}
! public static CharPredicate forUnicodeProperty(String propName, boolean caseIns) {
propName = propName.toUpperCase(Locale.ROOT);
! CharPredicate p = getUnicodePredicate(propName, caseIns);
if (p != null)
return p;
! return getPosixPredicate(propName, caseIns);
}
! public static CharPredicate forPOSIXName(String propName, boolean caseIns) {
! return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH), caseIns);
}
/////////////////////////////////////////////////////////////////////////////
/**
*** 252,269 ****
/////////////////////////////////////////////////////////////////////////////
// unicode categories, aliases, properties, java methods ...
! static CharPredicate forProperty(String name) {
// Unicode character property aliases, defined in
// http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
switch (name) {
case "Cn": return category(1<<Character.UNASSIGNED);
! case "Lu": return category(1<<Character.UPPERCASE_LETTER);
! case "Ll": return category(1<<Character.LOWERCASE_LETTER);
! case "Lt": return category(1<<Character.TITLECASE_LETTER);
case "Lm": return category(1<<Character.MODIFIER_LETTER);
case "Lo": return category(1<<Character.OTHER_LETTER);
case "Mn": return category(1<<Character.NON_SPACING_MARK);
case "Me": return category(1<<Character.ENCLOSING_MARK);
case "Mc": return category(1<<Character.COMBINING_SPACING_MARK);
--- 262,288 ----
/////////////////////////////////////////////////////////////////////////////
// unicode categories, aliases, properties, java methods ...
! static CharPredicate forProperty(String name, boolean caseIns) {
// Unicode character property aliases, defined in
// http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
switch (name) {
case "Cn": return category(1<<Character.UNASSIGNED);
! case "Lu": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
! (1<<Character.UPPERCASE_LETTER) |
! (1<<Character.TITLECASE_LETTER)
! : (1<<Character.UPPERCASE_LETTER));
! case "Ll": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
! (1<<Character.UPPERCASE_LETTER) |
! (1<<Character.TITLECASE_LETTER)
! : (1<<Character.LOWERCASE_LETTER));
! case "Lt": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
! (1<<Character.UPPERCASE_LETTER) |
! (1<<Character.TITLECASE_LETTER)
! : (1<<Character.TITLECASE_LETTER));
case "Lm": return category(1<<Character.MODIFIER_LETTER);
case "Lo": return category(1<<Character.OTHER_LETTER);
case "Mn": return category(1<<Character.NON_SPACING_MARK);
case "Me": return category(1<<Character.ENCLOSING_MARK);
case "Mc": return category(1<<Character.COMBINING_SPACING_MARK);
*** 336,371 ****
case "Alpha": return ctype(ASCII.ALPHA); // Alphabetic characters
case "Blank": return ctype(ASCII.BLANK); // Space and tab characters
case "Cntrl": return ctype(ASCII.CNTRL); // Control characters
case "Digit": return range('0', '9'); // Numeric characters
case "Graph": return ctype(ASCII.GRAPH); // printable and visible
! case "Lower": return range('a', 'z'); // Lower-case alphabetic
case "Print": return range(0x20, 0x7E); // Printable characters
case "Punct": return ctype(ASCII.PUNCT); // Punctuation characters
case "Space": return ctype(ASCII.SPACE); // Space characters
! case "Upper": return range('A', 'Z'); // Upper-case alphabetic
case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits
// Java character properties, defined by methods in Character.java
! case "javaLowerCase": return java.lang.Character::isLowerCase;
! case "javaUpperCase": return Character::isUpperCase;
! case "javaAlphabetic": return java.lang.Character::isAlphabetic;
! case "javaIdeographic": return java.lang.Character::isIdeographic;
! case "javaTitleCase": return java.lang.Character::isTitleCase;
! case "javaDigit": return java.lang.Character::isDigit;
! case "javaDefined": return java.lang.Character::isDefined;
! case "javaLetter": return java.lang.Character::isLetter;
! case "javaLetterOrDigit": return java.lang.Character::isLetterOrDigit;
! case "javaJavaIdentifierStart": return java.lang.Character::isJavaIdentifierStart;
! case "javaJavaIdentifierPart": return java.lang.Character::isJavaIdentifierPart;
! case "javaUnicodeIdentifierStart": return java.lang.Character::isUnicodeIdentifierStart;
! case "javaUnicodeIdentifierPart": return java.lang.Character::isUnicodeIdentifierPart;
! case "javaIdentifierIgnorable": return java.lang.Character::isIdentifierIgnorable;
! case "javaSpaceChar": return java.lang.Character::isSpaceChar;
! case "javaWhitespace": return java.lang.Character::isWhitespace;
! case "javaISOControl": return java.lang.Character::isISOControl;
! case "javaMirrored": return java.lang.Character::isMirrored;
default: return null;
}
}
private static CharPredicate category(final int typeMask) {
--- 355,401 ----
case "Alpha": return ctype(ASCII.ALPHA); // Alphabetic characters
case "Blank": return ctype(ASCII.BLANK); // Space and tab characters
case "Cntrl": return ctype(ASCII.CNTRL); // Control characters
case "Digit": return range('0', '9'); // Numeric characters
case "Graph": return ctype(ASCII.GRAPH); // printable and visible
! case "Lower": return caseIns ? ctype(ASCII.ALPHA)
! : range('a', 'z'); // Lower-case alphabetic
case "Print": return range(0x20, 0x7E); // Printable characters
case "Punct": return ctype(ASCII.PUNCT); // Punctuation characters
case "Space": return ctype(ASCII.SPACE); // Space characters
! case "Upper": return caseIns ? ctype(ASCII.ALPHA)
! : range('A', 'Z'); // Upper-case alphabetic
case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits
// Java character properties, defined by methods in Character.java
! case "javaLowerCase": return caseIns ? c -> Character.isLowerCase(c) ||
! Character.isUpperCase(c) ||
! Character.isTitleCase(c)
! : Character::isLowerCase;
! case "javaUpperCase": return caseIns ? c -> Character.isUpperCase(c) ||
! Character.isLowerCase(c) ||
! Character.isTitleCase(c)
! : Character::isUpperCase;
! case "javaAlphabetic": return Character::isAlphabetic;
! case "javaIdeographic": return Character::isIdeographic;
! case "javaTitleCase": return caseIns ? c -> Character.isTitleCase(c) ||
! Character.isLowerCase(c) ||
! Character.isUpperCase(c)
! : Character::isTitleCase;
! case "javaDigit": return Character::isDigit;
! case "javaDefined": return Character::isDefined;
! case "javaLetter": return Character::isLetter;
! case "javaLetterOrDigit": return Character::isLetterOrDigit;
! case "javaJavaIdentifierStart": return Character::isJavaIdentifierStart;
! case "javaJavaIdentifierPart": return Character::isJavaIdentifierPart;
! case "javaUnicodeIdentifierStart": return Character::isUnicodeIdentifierStart;
! case "javaUnicodeIdentifierPart": return Character::isUnicodeIdentifierPart;
! case "javaIdentifierIgnorable": return Character::isIdentifierIgnorable;
! case "javaSpaceChar": return Character::isSpaceChar;
! case "javaWhitespace": return Character::isWhitespace;
! case "javaISOControl": return Character::isISOControl;
! case "javaMirrored": return Character::isMirrored;
default: return null;
}
}
private static CharPredicate category(final int typeMask) {
< prev index next >