72 /** 73 * Tells whether or not the given UTF-16 value is a surrogate character, 74 */ 75 public static boolean is(int c) { 76 return (MIN <= c) && (c <= MAX); 77 } 78 79 /** 80 * Tells whether or not the given UCS-4 character must be represented as a 81 * surrogate pair in UTF-16. 82 */ 83 public static boolean neededFor(int uc) { 84 return (uc >= UCS4_MIN) && (uc <= UCS4_MAX); 85 } 86 87 /** 88 * Returns the high UTF-16 surrogate for the given UCS-4 character. 89 */ 90 public static char high(int uc) { 91 assert neededFor(uc); 92 return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff)); 93 } 94 95 /** 96 * Returns the low UTF-16 surrogate for the given UCS-4 character. 97 */ 98 public static char low(int uc) { 99 assert neededFor(uc); 100 return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff)); 101 } 102 103 /** 104 * Converts the given surrogate pair into a 32-bit UCS-4 character. 105 */ 106 public static int toUCS4(char c, char d) { 107 assert isHigh(c) && isLow(d); 108 return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000; 109 } 110 111 /** 112 * Surrogate parsing support. Charset implementations may use instances of 113 * this class to handle the details of parsing UTF-16 surrogate pairs. 114 */ 115 public static class Parser { 116 117 public Parser() { } 118 119 private int character; // UCS-4 120 private CoderResult error = CoderResult.UNDERFLOW; 121 private boolean isPair; 122 123 /** 124 * Returns the UCS-4 character previously parsed. 125 */ 126 public int character() { 127 assert (error == null); 128 return character; 161 */ 162 public CoderResult unmappableResult() { 163 assert (error == null); 164 return CoderResult.unmappableForLength(isPair ? 2 : 1); 165 } 166 167 /** 168 * Parses a UCS-4 character from the given source buffer, handling 169 * surrogates. 170 * 171 * @param c The first character 172 * @param in The source buffer, from which one more character 173 * will be consumed if c is a high surrogate 174 * 175 * @returns Either a parsed UCS-4 character, in which case the isPair() 176 * and increment() methods will return meaningful values, or 177 * -1, in which case error() will return a descriptive result 178 * object 179 */ 180 public int parse(char c, CharBuffer in) { 181 if (Surrogate.isHigh(c)) { 182 if (!in.hasRemaining()) { 183 error = CoderResult.UNDERFLOW; 184 return -1; 185 } 186 char d = in.get(); 187 if (Surrogate.isLow(d)) { 188 character = toUCS4(c, d); 189 isPair = true; 190 error = null; 191 return character; 192 } 193 error = CoderResult.malformedForLength(1); 194 return -1; 195 } 196 if (Surrogate.isLow(c)) { 197 error = CoderResult.malformedForLength(1); 198 return -1; 199 } 200 character = c; 201 isPair = false; 202 error = null; 203 return character; 204 } 205 206 /** 207 * Parses a UCS-4 character from the given source buffer, handling 208 * surrogates. 209 * 210 * @param c The first character 211 * @param ia The input array, from which one more character 212 * will be consumed if c is a high surrogate 213 * @param ip The input index 214 * @param il The input limit 215 * 216 * @returns Either a parsed UCS-4 character, in which case the isPair() 217 * and increment() methods will return meaningful values, or 218 * -1, in which case error() will return a descriptive result 219 * object 220 */ 221 public int parse(char c, char[] ia, int ip, int il) { 222 assert (ia[ip] == c); 223 if (Surrogate.isHigh(c)) { 224 if (il - ip < 2) { 225 error = CoderResult.UNDERFLOW; 226 return -1; 227 } 228 char d = ia[ip + 1]; 229 if (Surrogate.isLow(d)) { 230 character = toUCS4(c, d); 231 isPair = true; 232 error = null; 233 return character; 234 } 235 error = CoderResult.malformedForLength(1); 236 return -1; 237 } 238 if (Surrogate.isLow(c)) { 239 error = CoderResult.malformedForLength(1); 240 return -1; 241 } 242 character = c; 243 isPair = false; 244 error = null; 245 return character; 246 } 247 248 } 249 250 /** 251 * Surrogate generation support. Charset implementations may use instances 252 * of this class to handle the details of generating UTF-16 surrogate 253 * pairs. 254 */ 255 public static class Generator { 256 257 public Generator() { } 258 265 public CoderResult error() { 266 assert error != null; 267 return error; 268 } 269 270 /** 271 * Generates one or two UTF-16 characters to represent the given UCS-4 272 * character. 273 * 274 * @param uc The UCS-4 character 275 * @param len The number of input bytes from which the UCS-4 value 276 * was constructed (used when creating result objects) 277 * @param dst The destination buffer, to which one or two UTF-16 278 * characters will be written 279 * 280 * @returns Either a positive count of the number of UTF-16 characters 281 * written to the destination buffer, or -1, in which case 282 * error() will return a descriptive result object 283 */ 284 public int generate(int uc, int len, CharBuffer dst) { 285 if (uc <= 0xffff) { 286 if (Surrogate.is(uc)) { 287 error = CoderResult.malformedForLength(len); 288 return -1; 289 } 290 if (dst.remaining() < 1) { 291 error = CoderResult.OVERFLOW; 292 return -1; 293 } 294 dst.put((char)uc); 295 error = null; 296 return 1; 297 } 298 if (uc < Surrogate.UCS4_MIN) { 299 error = CoderResult.malformedForLength(len); 300 return -1; 301 } 302 if (uc <= Surrogate.UCS4_MAX) { 303 if (dst.remaining() < 2) { 304 error = CoderResult.OVERFLOW; 305 return -1; 306 } 307 dst.put(Surrogate.high(uc)); 308 dst.put(Surrogate.low(uc)); 309 error = null; 310 return 2; 311 } 312 error = CoderResult.unmappableForLength(len); 313 return -1; 314 } 315 316 /** 317 * Generates one or two UTF-16 characters to represent the given UCS-4 318 * character. 319 * 320 * @param uc The UCS-4 character 321 * @param len The number of input bytes from which the UCS-4 value 322 * was constructed (used when creating result objects) 323 * @param da The destination array, to which one or two UTF-16 324 * characters will be written 325 * @param dp The destination position 326 * @param dl The destination limit 327 * 328 * @returns Either a positive count of the number of UTF-16 characters 329 * written to the destination buffer, or -1, in which case 330 * error() will return a descriptive result object 331 */ 332 public int generate(int uc, int len, char[] da, int dp, int dl) { 333 if (uc <= 0xffff) { 334 if (Surrogate.is(uc)) { 335 error = CoderResult.malformedForLength(len); 336 return -1; 337 } 338 if (dl - dp < 1) { 339 error = CoderResult.OVERFLOW; 340 return -1; 341 } 342 da[dp] = (char)uc; 343 error = null; 344 return 1; 345 } 346 if (uc < Surrogate.UCS4_MIN) { 347 error = CoderResult.malformedForLength(len); 348 return -1; 349 } 350 if (uc <= Surrogate.UCS4_MAX) { 351 if (dl - dp < 2) { 352 error = CoderResult.OVERFLOW; 353 return -1; 354 } 355 da[dp] = Surrogate.high(uc); 356 da[dp + 1] = Surrogate.low(uc); 357 error = null; 358 return 2; 359 } 360 error = CoderResult.unmappableForLength(len); 361 return -1; 362 } 363 364 } 365 366 } | 72 /** 73 * Tells whether or not the given UTF-16 value is a surrogate character, 74 */ 75 public static boolean is(int c) { 76 return (MIN <= c) && (c <= MAX); 77 } 78 79 /** 80 * Tells whether or not the given UCS-4 character must be represented as a 81 * surrogate pair in UTF-16. 82 */ 83 public static boolean neededFor(int uc) { 84 return (uc >= UCS4_MIN) && (uc <= UCS4_MAX); 85 } 86 87 /** 88 * Returns the high UTF-16 surrogate for the given UCS-4 character. 89 */ 90 public static char high(int uc) { 91 assert neededFor(uc); 92 return (char)((uc >> 10) + (MIN_HIGH - (UCS4_MIN >> 10))); 93 } 94 95 /** 96 * Returns the low UTF-16 surrogate for the given UCS-4 character. 97 */ 98 public static char low(int uc) { 99 assert neededFor(uc); 100 return (char)((uc & 0x3ff) + MIN_LOW); 101 } 102 103 /** 104 * Converts the given surrogate pair into a 32-bit UCS-4 character. 105 */ 106 public static int toUCS4(char c, char d) { 107 assert isHigh(c) && isLow(d); 108 return Character.toCodePoint(c, d); 109 } 110 111 /** 112 * Surrogate parsing support. Charset implementations may use instances of 113 * this class to handle the details of parsing UTF-16 surrogate pairs. 114 */ 115 public static class Parser { 116 117 public Parser() { } 118 119 private int character; // UCS-4 120 private CoderResult error = CoderResult.UNDERFLOW; 121 private boolean isPair; 122 123 /** 124 * Returns the UCS-4 character previously parsed. 125 */ 126 public int character() { 127 assert (error == null); 128 return character; 161 */ 162 public CoderResult unmappableResult() { 163 assert (error == null); 164 return CoderResult.unmappableForLength(isPair ? 2 : 1); 165 } 166 167 /** 168 * Parses a UCS-4 character from the given source buffer, handling 169 * surrogates. 170 * 171 * @param c The first character 172 * @param in The source buffer, from which one more character 173 * will be consumed if c is a high surrogate 174 * 175 * @returns Either a parsed UCS-4 character, in which case the isPair() 176 * and increment() methods will return meaningful values, or 177 * -1, in which case error() will return a descriptive result 178 * object 179 */ 180 public int parse(char c, CharBuffer in) { 181 if (Character.isHighSurrogate(c)) { 182 if (!in.hasRemaining()) { 183 error = CoderResult.UNDERFLOW; 184 return -1; 185 } 186 char d = in.get(); 187 if (Character.isLowSurrogate(d)) { 188 character = Character.toCodePoint(c, d); 189 isPair = true; 190 error = null; 191 return character; 192 } 193 error = CoderResult.malformedForLength(1); 194 return -1; 195 } 196 if (Character.isLowSurrogate(c)) { 197 error = CoderResult.malformedForLength(1); 198 return -1; 199 } 200 character = c; 201 isPair = false; 202 error = null; 203 return character; 204 } 205 206 /** 207 * Parses a UCS-4 character from the given source buffer, handling 208 * surrogates. 209 * 210 * @param c The first character 211 * @param ia The input array, from which one more character 212 * will be consumed if c is a high surrogate 213 * @param ip The input index 214 * @param il The input limit 215 * 216 * @returns Either a parsed UCS-4 character, in which case the isPair() 217 * and increment() methods will return meaningful values, or 218 * -1, in which case error() will return a descriptive result 219 * object 220 */ 221 public int parse(char c, char[] ia, int ip, int il) { 222 assert (ia[ip] == c); 223 if (Character.isHighSurrogate(c)) { 224 if (il - ip < 2) { 225 error = CoderResult.UNDERFLOW; 226 return -1; 227 } 228 char d = ia[ip + 1]; 229 if (Character.isLowSurrogate(d)) { 230 character = Character.toCodePoint(c, d); 231 isPair = true; 232 error = null; 233 return character; 234 } 235 error = CoderResult.malformedForLength(1); 236 return -1; 237 } 238 if (Character.isLowSurrogate(c)) { 239 error = CoderResult.malformedForLength(1); 240 return -1; 241 } 242 character = c; 243 isPair = false; 244 error = null; 245 return character; 246 } 247 248 } 249 250 /** 251 * Surrogate generation support. Charset implementations may use instances 252 * of this class to handle the details of generating UTF-16 surrogate 253 * pairs. 254 */ 255 public static class Generator { 256 257 public Generator() { } 258 265 public CoderResult error() { 266 assert error != null; 267 return error; 268 } 269 270 /** 271 * Generates one or two UTF-16 characters to represent the given UCS-4 272 * character. 273 * 274 * @param uc The UCS-4 character 275 * @param len The number of input bytes from which the UCS-4 value 276 * was constructed (used when creating result objects) 277 * @param dst The destination buffer, to which one or two UTF-16 278 * characters will be written 279 * 280 * @returns Either a positive count of the number of UTF-16 characters 281 * written to the destination buffer, or -1, in which case 282 * error() will return a descriptive result object 283 */ 284 public int generate(int uc, int len, CharBuffer dst) { 285 if (uc < Surrogate.UCS4_MIN) { 286 if (Surrogate.is(uc)) { 287 error = CoderResult.malformedForLength(len); 288 return -1; 289 } 290 if (dst.remaining() < 1) { 291 error = CoderResult.OVERFLOW; 292 return -1; 293 } 294 dst.put((char)uc); 295 error = null; 296 return 1; 297 } 298 if (uc <= Surrogate.UCS4_MAX) { 299 if (dst.remaining() < 2) { 300 error = CoderResult.OVERFLOW; 301 return -1; 302 } 303 dst.put(Surrogate.high(uc)); 304 dst.put(Surrogate.low(uc)); 305 error = null; 306 return 2; 307 } 308 error = CoderResult.unmappableForLength(len); 309 return -1; 310 } 311 312 /** 313 * Generates one or two UTF-16 characters to represent the given UCS-4 314 * character. 315 * 316 * @param uc The UCS-4 character 317 * @param len The number of input bytes from which the UCS-4 value 318 * was constructed (used when creating result objects) 319 * @param da The destination array, to which one or two UTF-16 320 * characters will be written 321 * @param dp The destination position 322 * @param dl The destination limit 323 * 324 * @returns Either a positive count of the number of UTF-16 characters 325 * written to the destination buffer, or -1, in which case 326 * error() will return a descriptive result object 327 */ 328 public int generate(int uc, int len, char[] da, int dp, int dl) { 329 if (uc < Surrogate.UCS4_MIN) { 330 if (Surrogate.is(uc)) { 331 error = CoderResult.malformedForLength(len); 332 return -1; 333 } 334 if (dl - dp < 1) { 335 error = CoderResult.OVERFLOW; 336 return -1; 337 } 338 da[dp] = (char)uc; 339 error = null; 340 return 1; 341 } 342 if (uc <= Surrogate.UCS4_MAX) { 343 if (dl - dp < 2) { 344 error = CoderResult.OVERFLOW; 345 return -1; 346 } 347 da[dp] = Surrogate.high(uc); 348 da[dp + 1] = Surrogate.low(uc); 349 error = null; 350 return 2; 351 } 352 error = CoderResult.unmappableForLength(len); 353 return -1; 354 } 355 356 } 357 358 } |