< prev index next >

src/java.base/share/classes/java/util/regex/Pattern.java

Print this page
rev 57965 : [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes


2870                 throw error("Empty character family");
2871             name = new String(temp, i, j-i-1);
2872         }
2873 
2874         int i = name.indexOf('=');
2875         if (i != -1) {
2876             // property construct \p{name=value}
2877             String value = name.substring(i + 1);
2878             name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
2879             switch (name) {
2880                 case "sc":
2881                 case "script":
2882                     p = CharPredicates.forUnicodeScript(value);
2883                     break;
2884                 case "blk":
2885                 case "block":
2886                     p = CharPredicates.forUnicodeBlock(value);
2887                     break;
2888                 case "gc":
2889                 case "general_category":
2890                     p = CharPredicates.forProperty(value);
2891                     break;
2892                 default:
2893                     break;
2894             }
2895             if (p == null)
2896                 throw error("Unknown Unicode property {name=<" + name + ">, "
2897                              + "value=<" + value + ">}");
2898 
2899         } else {
2900             if (name.startsWith("In")) {
2901                 // \p{InBlockName}
2902                 p = CharPredicates.forUnicodeBlock(name.substring(2));
2903             } else if (name.startsWith("Is")) {
2904                 // \p{IsGeneralCategory} and \p{IsScriptName}
2905                 String shortName = name.substring(2);
2906                 p = CharPredicates.forUnicodeProperty(shortName);
2907                 if (p == null)
2908                     p = CharPredicates.forProperty(shortName);
2909                 if (p == null)
2910                     p = CharPredicates.forUnicodeScript(shortName);
2911             } else {
2912                 if (has(UNICODE_CHARACTER_CLASS)) {
2913                     p = CharPredicates.forPOSIXName(name);
2914                 }
2915                 if (p == null)
2916                     p = CharPredicates.forProperty(name);
2917             }
2918             if (p == null)
2919                 throw error("Unknown character property name {" + name + "}");
2920         }
2921         if (isComplement) {
2922             // it might be too expensive to detect if a complement of
2923             // CharProperty can match "certain" supplementary. So just
2924             // go with StartS.
2925             hasSupplementary = true;
2926             p = p.negate();
2927         }
2928         return p;
2929     }
2930 
2931     private CharProperty newCharProperty(CharPredicate p) {
2932         if (p == null)
2933             return null;
2934         if (p instanceof BmpCharPredicate)
2935             return new BmpCharProperty((BmpCharPredicate)p);
2936         else


5602                     return true;
5603                 }
5604                 i += countChars(seq, i, 1);
5605             }
5606             matcher.hitEnd = true;
5607             return false;
5608         }
5609     }
5610 
5611     @FunctionalInterface
5612     static interface CharPredicate {
5613         boolean is(int ch);
5614 
5615         default CharPredicate and(CharPredicate p) {
5616             return ch -> is(ch) && p.is(ch);
5617         }
5618         default CharPredicate union(CharPredicate p) {
5619             return ch -> is(ch) || p.is(ch);
5620         }
5621         default CharPredicate union(CharPredicate p1,
5622                                     CharPredicate p2 ) {
5623             return ch -> is(ch) || p1.is(ch) || p2.is(ch);
5624         }
5625         default CharPredicate negate() {
5626             return ch -> !is(ch);
5627         }
5628     }
5629 
5630     static interface BmpCharPredicate extends CharPredicate {
5631 
5632         default CharPredicate and(CharPredicate p) {
5633             if (p instanceof BmpCharPredicate)
5634                 return (BmpCharPredicate)(ch -> is(ch) && p.is(ch));
5635             return ch -> is(ch) && p.is(ch);
5636         }
5637         default CharPredicate union(CharPredicate p) {
5638             if (p instanceof BmpCharPredicate)
5639                 return (BmpCharPredicate)(ch -> is(ch) || p.is(ch));
5640             return ch -> is(ch) || p.is(ch);
5641         }
5642         static CharPredicate union(CharPredicate... predicates) {




2870                 throw error("Empty character family");
2871             name = new String(temp, i, j-i-1);
2872         }
2873 
2874         int i = name.indexOf('=');
2875         if (i != -1) {
2876             // property construct \p{name=value}
2877             String value = name.substring(i + 1);
2878             name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
2879             switch (name) {
2880                 case "sc":
2881                 case "script":
2882                     p = CharPredicates.forUnicodeScript(value);
2883                     break;
2884                 case "blk":
2885                 case "block":
2886                     p = CharPredicates.forUnicodeBlock(value);
2887                     break;
2888                 case "gc":
2889                 case "general_category":
2890                     p = CharPredicates.forProperty(value, has(CASE_INSENSITIVE));
2891                     break;
2892                 default:
2893                     break;
2894             }
2895             if (p == null)
2896                 throw error("Unknown Unicode property {name=<" + name + ">, "
2897                              + "value=<" + value + ">}");
2898 
2899         } else {
2900             if (name.startsWith("In")) {
2901                 // \p{InBlockName}
2902                 p = CharPredicates.forUnicodeBlock(name.substring(2));
2903             } else if (name.startsWith("Is")) {
2904                 // \p{IsGeneralCategory} and \p{IsScriptName}
2905                 String shortName = name.substring(2);
2906                 p = CharPredicates.forUnicodeProperty(shortName, has(CASE_INSENSITIVE));
2907                 if (p == null)
2908                     p = CharPredicates.forProperty(shortName, has(CASE_INSENSITIVE));
2909                 if (p == null)
2910                     p = CharPredicates.forUnicodeScript(shortName);
2911             } else {
2912                 if (has(UNICODE_CHARACTER_CLASS))
2913                     p = CharPredicates.forPOSIXName(name, has(CASE_INSENSITIVE));

2914                 if (p == null)
2915                     p = CharPredicates.forProperty(name, has(CASE_INSENSITIVE));
2916             }
2917             if (p == null)
2918                 throw error("Unknown character property name {" + name + "}");
2919         }
2920         if (isComplement) {
2921             // it might be too expensive to detect if a complement of
2922             // CharProperty can match "certain" supplementary. So just
2923             // go with StartS.
2924             hasSupplementary = true;
2925             p = p.negate();
2926         }
2927         return p;
2928     }
2929 
2930     private CharProperty newCharProperty(CharPredicate p) {
2931         if (p == null)
2932             return null;
2933         if (p instanceof BmpCharPredicate)
2934             return new BmpCharProperty((BmpCharPredicate)p);
2935         else


5601                     return true;
5602                 }
5603                 i += countChars(seq, i, 1);
5604             }
5605             matcher.hitEnd = true;
5606             return false;
5607         }
5608     }
5609 
5610     @FunctionalInterface
5611     static interface CharPredicate {
5612         boolean is(int ch);
5613 
5614         default CharPredicate and(CharPredicate p) {
5615             return ch -> is(ch) && p.is(ch);
5616         }
5617         default CharPredicate union(CharPredicate p) {
5618             return ch -> is(ch) || p.is(ch);
5619         }
5620         default CharPredicate union(CharPredicate p1,
5621                                     CharPredicate p2) {
5622             return ch -> is(ch) || p1.is(ch) || p2.is(ch);
5623         }
5624         default CharPredicate negate() {
5625             return ch -> !is(ch);
5626         }
5627     }
5628 
5629     static interface BmpCharPredicate extends CharPredicate {
5630 
5631         default CharPredicate and(CharPredicate p) {
5632             if (p instanceof BmpCharPredicate)
5633                 return (BmpCharPredicate)(ch -> is(ch) && p.is(ch));
5634             return ch -> is(ch) && p.is(ch);
5635         }
5636         default CharPredicate union(CharPredicate p) {
5637             if (p instanceof BmpCharPredicate)
5638                 return (BmpCharPredicate)(ch -> is(ch) || p.is(ch));
5639             return ch -> is(ch) || p.is(ch);
5640         }
5641         static CharPredicate union(CharPredicate... predicates) {


< prev index next >