src/share/classes/sun/nio/cs/Surrogate.java

Print this page



  72     /**
  73      * Tells whether or not the given UTF-16 value is a surrogate character,
  74      */
  75     public static boolean is(int c) {
  76         return (MIN <= c) && (c <= MAX);
  77     }
  78 
  79     /**
  80      * Tells whether or not the given UCS-4 character must be represented as a
  81      * surrogate pair in UTF-16.
  82      */
  83     public static boolean neededFor(int uc) {
  84         return (uc >= UCS4_MIN) && (uc <= UCS4_MAX);
  85     }
  86 
  87     /**
  88      * Returns the high UTF-16 surrogate for the given UCS-4 character.
  89      */
  90     public static char high(int uc) {
  91         assert neededFor(uc);
  92         return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff));
  93     }
  94 
  95     /**
  96      * Returns the low UTF-16 surrogate for the given UCS-4 character.
  97      */
  98     public static char low(int uc) {
  99         assert neededFor(uc);
 100         return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff));
 101     }
 102 
 103     /**
 104      * Converts the given surrogate pair into a 32-bit UCS-4 character.
 105      */
 106     public static int toUCS4(char c, char d) {
 107         assert isHigh(c) && isLow(d);
 108         return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
 109     }
 110 
 111     /**
 112      * Surrogate parsing support.  Charset implementations may use instances of
 113      * this class to handle the details of parsing UTF-16 surrogate pairs.
 114      */
 115     public static class Parser {
 116 
 117         public Parser() { }
 118 
 119         private int character;          // UCS-4
 120         private CoderResult error = CoderResult.UNDERFLOW;
 121         private boolean isPair;
 122 
 123         /**
 124          * Returns the UCS-4 character previously parsed.
 125          */
 126         public int character() {
 127             assert (error == null);
 128             return character;


 161          */
 162         public CoderResult unmappableResult() {
 163             assert (error == null);
 164             return CoderResult.unmappableForLength(isPair ? 2 : 1);
 165         }
 166 
 167         /**
 168          * Parses a UCS-4 character from the given source buffer, handling
 169          * surrogates.
 170          *
 171          * @param  c    The first character
 172          * @param  in   The source buffer, from which one more character
 173          *              will be consumed if c is a high surrogate
 174          *
 175          * @returns  Either a parsed UCS-4 character, in which case the isPair()
 176          *           and increment() methods will return meaningful values, or
 177          *           -1, in which case error() will return a descriptive result
 178          *           object
 179          */
 180         public int parse(char c, CharBuffer in) {
 181             if (Surrogate.isHigh(c)) {
 182                 if (!in.hasRemaining()) {
 183                     error = CoderResult.UNDERFLOW;
 184                     return -1;
 185                 }
 186                 char d = in.get();
 187                 if (Surrogate.isLow(d)) {
 188                     character = toUCS4(c, d);
 189                     isPair = true;
 190                     error = null;
 191                     return character;
 192                 }
 193                 error = CoderResult.malformedForLength(1);
 194                 return -1;
 195             }
 196             if (Surrogate.isLow(c)) {
 197                 error = CoderResult.malformedForLength(1);
 198                 return -1;
 199             }
 200             character = c;
 201             isPair = false;
 202             error = null;
 203             return character;
 204         }
 205 
 206         /**
 207          * Parses a UCS-4 character from the given source buffer, handling
 208          * surrogates.
 209          *
 210          * @param  c    The first character
 211          * @param  ia   The input array, from which one more character
 212          *              will be consumed if c is a high surrogate
 213          * @param  ip   The input index
 214          * @param  il   The input limit
 215          *
 216          * @returns  Either a parsed UCS-4 character, in which case the isPair()
 217          *           and increment() methods will return meaningful values, or
 218          *           -1, in which case error() will return a descriptive result
 219          *           object
 220          */
 221         public int parse(char c, char[] ia, int ip, int il) {
 222             assert (ia[ip] == c);
 223             if (Surrogate.isHigh(c)) {
 224                 if (il - ip < 2) {
 225                     error = CoderResult.UNDERFLOW;
 226                     return -1;
 227                 }
 228                 char d = ia[ip + 1];
 229                 if (Surrogate.isLow(d)) {
 230                     character = toUCS4(c, d);
 231                     isPair = true;
 232                     error = null;
 233                     return character;
 234                 }
 235                 error = CoderResult.malformedForLength(1);
 236                 return -1;
 237             }
 238             if (Surrogate.isLow(c)) {
 239                 error = CoderResult.malformedForLength(1);
 240                 return -1;
 241             }
 242             character = c;
 243             isPair = false;
 244             error = null;
 245             return character;
 246         }
 247 
 248     }
 249 
 250     /**
 251      * Surrogate generation support.  Charset implementations may use instances
 252      * of this class to handle the details of generating UTF-16 surrogate
 253      * pairs.
 254      */
 255     public static class Generator {
 256 
 257         public Generator() { }
 258 


 265         public CoderResult error() {
 266             assert error != null;
 267             return error;
 268         }
 269 
 270         /**
 271          * Generates one or two UTF-16 characters to represent the given UCS-4
 272          * character.
 273          *
 274          * @param  uc   The UCS-4 character
 275          * @param  len  The number of input bytes from which the UCS-4 value
 276          *              was constructed (used when creating result objects)
 277          * @param  dst  The destination buffer, to which one or two UTF-16
 278          *              characters will be written
 279          *
 280          * @returns  Either a positive count of the number of UTF-16 characters
 281          *           written to the destination buffer, or -1, in which case
 282          *           error() will return a descriptive result object
 283          */
 284         public int generate(int uc, int len, CharBuffer dst) {
 285             if (uc <= 0xffff) {
 286                 if (Surrogate.is(uc)) {
 287                     error = CoderResult.malformedForLength(len);
 288                     return -1;
 289                 }
 290                 if (dst.remaining() < 1) {
 291                     error = CoderResult.OVERFLOW;
 292                     return -1;
 293                 }
 294                 dst.put((char)uc);
 295                 error = null;
 296                 return 1;
 297             }
 298             if (uc < Surrogate.UCS4_MIN) {
 299                 error = CoderResult.malformedForLength(len);
 300                 return -1;
 301             }
 302             if (uc <= Surrogate.UCS4_MAX) {
 303                 if (dst.remaining() < 2) {
 304                     error = CoderResult.OVERFLOW;
 305                     return -1;
 306                 }
 307                 dst.put(Surrogate.high(uc));
 308                 dst.put(Surrogate.low(uc));
 309                 error = null;
 310                 return 2;
 311             }
 312             error = CoderResult.unmappableForLength(len);
 313             return -1;
 314         }
 315 
 316         /**
 317          * Generates one or two UTF-16 characters to represent the given UCS-4
 318          * character.
 319          *
 320          * @param  uc   The UCS-4 character
 321          * @param  len  The number of input bytes from which the UCS-4 value
 322          *              was constructed (used when creating result objects)
 323          * @param  da   The destination array, to which one or two UTF-16
 324          *              characters will be written
 325          * @param  dp   The destination position
 326          * @param  dl   The destination limit
 327          *
 328          * @returns  Either a positive count of the number of UTF-16 characters
 329          *           written to the destination buffer, or -1, in which case
 330          *           error() will return a descriptive result object
 331          */
 332         public int generate(int uc, int len, char[] da, int dp, int dl) {
 333             if (uc <= 0xffff) {
 334                 if (Surrogate.is(uc)) {
 335                     error = CoderResult.malformedForLength(len);
 336                     return -1;
 337                 }
 338                 if (dl - dp < 1) {
 339                     error = CoderResult.OVERFLOW;
 340                     return -1;
 341                 }
 342                 da[dp] = (char)uc;
 343                 error = null;
 344                 return 1;
 345             }
 346             if (uc < Surrogate.UCS4_MIN) {
 347                 error = CoderResult.malformedForLength(len);
 348                 return -1;
 349             }
 350             if (uc <= Surrogate.UCS4_MAX) {
 351                 if (dl - dp < 2) {
 352                     error = CoderResult.OVERFLOW;
 353                     return -1;
 354                 }
 355                 da[dp] = Surrogate.high(uc);
 356                 da[dp + 1] = Surrogate.low(uc);
 357                 error = null;
 358                 return 2;
 359             }
 360             error = CoderResult.unmappableForLength(len);
 361             return -1;
 362         }
 363 
 364     }
 365 
 366 }

  72     /**
  73      * Tells whether or not the given UTF-16 value is a surrogate character,
  74      */
  75     public static boolean is(int c) {
  76         return (MIN <= c) && (c <= MAX);
  77     }
  78 
  79     /**
  80      * Tells whether or not the given UCS-4 character must be represented as a
  81      * surrogate pair in UTF-16.
  82      */
  83     public static boolean neededFor(int uc) {
  84         return (uc >= UCS4_MIN) && (uc <= UCS4_MAX);
  85     }
  86 
  87     /**
  88      * Returns the high UTF-16 surrogate for the given UCS-4 character.
  89      */
  90     public static char high(int uc) {
  91         assert neededFor(uc);
  92         return (char)((uc >> 10) + (MIN_HIGH - (UCS4_MIN >> 10)));
  93     }
  94 
  95     /**
  96      * Returns the low UTF-16 surrogate for the given UCS-4 character.
  97      */
  98     public static char low(int uc) {
  99         assert neededFor(uc);
 100         return (char)((uc & 0x3ff) + MIN_LOW);
 101     }
 102 
 103     /**
 104      * Converts the given surrogate pair into a 32-bit UCS-4 character.
 105      */
 106     public static int toUCS4(char c, char d) {
 107         assert isHigh(c) && isLow(d);
 108         return Character.toCodePoint(c, d);
 109     }
 110 
 111     /**
 112      * Surrogate parsing support.  Charset implementations may use instances of
 113      * this class to handle the details of parsing UTF-16 surrogate pairs.
 114      */
 115     public static class Parser {
 116 
 117         public Parser() { }
 118 
 119         private int character;          // UCS-4
 120         private CoderResult error = CoderResult.UNDERFLOW;
 121         private boolean isPair;
 122 
 123         /**
 124          * Returns the UCS-4 character previously parsed.
 125          */
 126         public int character() {
 127             assert (error == null);
 128             return character;


 161          */
 162         public CoderResult unmappableResult() {
 163             assert (error == null);
 164             return CoderResult.unmappableForLength(isPair ? 2 : 1);
 165         }
 166 
 167         /**
 168          * Parses a UCS-4 character from the given source buffer, handling
 169          * surrogates.
 170          *
 171          * @param  c    The first character
 172          * @param  in   The source buffer, from which one more character
 173          *              will be consumed if c is a high surrogate
 174          *
 175          * @returns  Either a parsed UCS-4 character, in which case the isPair()
 176          *           and increment() methods will return meaningful values, or
 177          *           -1, in which case error() will return a descriptive result
 178          *           object
 179          */
 180         public int parse(char c, CharBuffer in) {
 181             if (Character.isHighSurrogate(c)) {
 182                 if (!in.hasRemaining()) {
 183                     error = CoderResult.UNDERFLOW;
 184                     return -1;
 185                 }
 186                 char d = in.get();
 187                 if (Character.isLowSurrogate(d)) {
 188                     character = Character.toCodePoint(c, d);
 189                     isPair = true;
 190                     error = null;
 191                     return character;
 192                 }
 193                 error = CoderResult.malformedForLength(1);
 194                 return -1;
 195             }
 196             if (Character.isLowSurrogate(c)) {
 197                 error = CoderResult.malformedForLength(1);
 198                 return -1;
 199             }
 200             character = c;
 201             isPair = false;
 202             error = null;
 203             return character;
 204         }
 205 
 206         /**
 207          * Parses a UCS-4 character from the given source buffer, handling
 208          * surrogates.
 209          *
 210          * @param  c    The first character
 211          * @param  ia   The input array, from which one more character
 212          *              will be consumed if c is a high surrogate
 213          * @param  ip   The input index
 214          * @param  il   The input limit
 215          *
 216          * @returns  Either a parsed UCS-4 character, in which case the isPair()
 217          *           and increment() methods will return meaningful values, or
 218          *           -1, in which case error() will return a descriptive result
 219          *           object
 220          */
 221         public int parse(char c, char[] ia, int ip, int il) {
 222             assert (ia[ip] == c);
 223             if (Character.isHighSurrogate(c)) {
 224                 if (il - ip < 2) {
 225                     error = CoderResult.UNDERFLOW;
 226                     return -1;
 227                 }
 228                 char d = ia[ip + 1];
 229                 if (Character.isLowSurrogate(d)) {
 230                     character = Character.toCodePoint(c, d);
 231                     isPair = true;
 232                     error = null;
 233                     return character;
 234                 }
 235                 error = CoderResult.malformedForLength(1);
 236                 return -1;
 237             }
 238             if (Character.isLowSurrogate(c)) {
 239                 error = CoderResult.malformedForLength(1);
 240                 return -1;
 241             }
 242             character = c;
 243             isPair = false;
 244             error = null;
 245             return character;
 246         }
 247 
 248     }
 249 
 250     /**
 251      * Surrogate generation support.  Charset implementations may use instances
 252      * of this class to handle the details of generating UTF-16 surrogate
 253      * pairs.
 254      */
 255     public static class Generator {
 256 
 257         public Generator() { }
 258 


 265         public CoderResult error() {
 266             assert error != null;
 267             return error;
 268         }
 269 
 270         /**
 271          * Generates one or two UTF-16 characters to represent the given UCS-4
 272          * character.
 273          *
 274          * @param  uc   The UCS-4 character
 275          * @param  len  The number of input bytes from which the UCS-4 value
 276          *              was constructed (used when creating result objects)
 277          * @param  dst  The destination buffer, to which one or two UTF-16
 278          *              characters will be written
 279          *
 280          * @returns  Either a positive count of the number of UTF-16 characters
 281          *           written to the destination buffer, or -1, in which case
 282          *           error() will return a descriptive result object
 283          */
 284         public int generate(int uc, int len, CharBuffer dst) {
 285             if (uc < Surrogate.UCS4_MIN) {
 286                 if (Surrogate.is(uc)) {
 287                     error = CoderResult.malformedForLength(len);
 288                     return -1;
 289                 }
 290                 if (dst.remaining() < 1) {
 291                     error = CoderResult.OVERFLOW;
 292                     return -1;
 293                 }
 294                 dst.put((char)uc);
 295                 error = null;
 296                 return 1;
 297             }




 298             if (uc <= Surrogate.UCS4_MAX) {
 299                 if (dst.remaining() < 2) {
 300                     error = CoderResult.OVERFLOW;
 301                     return -1;
 302                 }
 303                 dst.put(Surrogate.high(uc));
 304                 dst.put(Surrogate.low(uc));
 305                 error = null;
 306                 return 2;
 307             }
 308             error = CoderResult.unmappableForLength(len);
 309             return -1;
 310         }
 311 
 312         /**
 313          * Generates one or two UTF-16 characters to represent the given UCS-4
 314          * character.
 315          *
 316          * @param  uc   The UCS-4 character
 317          * @param  len  The number of input bytes from which the UCS-4 value
 318          *              was constructed (used when creating result objects)
 319          * @param  da   The destination array, to which one or two UTF-16
 320          *              characters will be written
 321          * @param  dp   The destination position
 322          * @param  dl   The destination limit
 323          *
 324          * @returns  Either a positive count of the number of UTF-16 characters
 325          *           written to the destination buffer, or -1, in which case
 326          *           error() will return a descriptive result object
 327          */
 328         public int generate(int uc, int len, char[] da, int dp, int dl) {
 329             if (uc < Surrogate.UCS4_MIN) {
 330                 if (Surrogate.is(uc)) {
 331                     error = CoderResult.malformedForLength(len);
 332                     return -1;
 333                 }
 334                 if (dl - dp < 1) {
 335                     error = CoderResult.OVERFLOW;
 336                     return -1;
 337                 }
 338                 da[dp] = (char)uc;
 339                 error = null;
 340                 return 1;
 341             }




 342             if (uc <= Surrogate.UCS4_MAX) {
 343                 if (dl - dp < 2) {
 344                     error = CoderResult.OVERFLOW;
 345                     return -1;
 346                 }
 347                 da[dp] = Surrogate.high(uc);
 348                 da[dp + 1] = Surrogate.low(uc);
 349                 error = null;
 350                 return 2;
 351             }
 352             error = CoderResult.unmappableForLength(len);
 353             return -1;
 354         }
 355 
 356     }
 357 
 358 }