1 /* 2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 6233345 6381699 6381702 6381705 6381706 27 * @summary Encode many char sequences in many ways 28 * @run main/timeout=1200 FindEncoderBugs 29 * @author Martin Buchholz 30 */ 31 32 import java.util.*; 33 import java.util.regex.*; 34 import java.nio.*; 35 import java.nio.charset.*; 36 37 public class FindEncoderBugs { 38 39 static boolean isBroken(String csn) { 40 if (csn.equals("x-COMPOUND_TEXT")) return true; 41 return false; 42 } 43 44 static <T extends Comparable<? super T>> List<T> sort(Collection<T> c) { 45 List<T> list = new ArrayList<T>(c); 46 Collections.sort(list); 47 return list; 48 } 49 50 static class TooManyFailures extends RuntimeException { 51 private static final long serialVersionUID = 0L; 52 } 53 54 static String string(byte[] a) { 55 final StringBuilder sb = new StringBuilder(); 56 for (byte b : a) { 57 if (sb.length() != 0) sb.append(' '); 58 sb.append(String.format("%02x", b & 0xff)); 59 } 60 return sb.toString(); 61 } 62 63 static String string(char[] a) { 64 final StringBuilder sb = new StringBuilder(); 65 for (char c : a) { 66 if (sb.length() != 0) sb.append(' '); 67 sb.append(String.format("\\u%04x", (int) c)); 68 } 69 return sb.toString(); 70 } 71 72 static class Reporter { 73 // Some machinery to make sure only a small number of errors 74 // that are "too similar" are reported. 75 static class Counts extends HashMap<String, Long> { 76 private static final long serialVersionUID = -1; 77 long inc(String signature) { 78 Long count = get(signature); 79 if (count == null) count = 0L; 80 put(signature, count+1); 81 return count+1; 82 } 83 } 84 85 final Counts failureCounts = new Counts(); 86 final static long maxFailures = 2; 87 88 final static Pattern hideBytes = Pattern.compile("\"[0-9a-f ]+\""); 89 final static Pattern hideChars = Pattern.compile("\\\\u[0-9a-f]{4}"); 90 91 boolean bug(String format, Object... args) { 92 String signature = String.format(format, args); 93 // signature = hideBytes.matcher(signature).replaceAll("\"??\""); 94 // signature = hideChars.matcher(signature).replaceAll("\\u????"); 95 failed++; 96 if (failureCounts.inc(signature) <= maxFailures) { 97 System.out.printf(format, args); 98 System.out.println(); 99 return true; 100 } 101 return false; 102 } 103 104 void summarize() { 105 for (String key : sort(failureCounts.keySet())) 106 System.out.printf("-----%n%s%nfailures=%d%n", 107 key, failureCounts.get(key)); 108 } 109 } 110 111 static final Reporter reporter = new Reporter(); 112 113 static class Result { 114 final int limit; 115 final int ipos; 116 final boolean direct; 117 final char[] ia; 118 final byte[] oa; 119 final CoderResult cr; 120 121 private static byte[] toByteArray(ByteBuffer bb) { 122 byte[] bytes = new byte[bb.position()]; 123 for (int i = 0; i < bytes.length; i++) 124 bytes[i] = bb.get(i); 125 return bytes; 126 } 127 128 Result(CharBuffer ib, ByteBuffer ob, CoderResult cr) { 129 ipos = ib.position(); 130 ia = toArray(ib); 131 oa = toArray(ob); 132 direct = ib.isDirect(); 133 limit = ob.limit(); 134 this.cr = cr; 135 } 136 137 static char[] toArray(CharBuffer b) { 138 int pos = b.position(); 139 char[] a = new char[b.limit()]; 140 b.position(0); 141 b.get(a); 142 b.position(pos); 143 return a; 144 } 145 146 static byte[] toArray(ByteBuffer b) { 147 byte[] a = new byte[b.position()]; 148 b.position(0); 149 b.get(a); 150 return a; 151 } 152 153 static boolean eq(Result x, Result y) { 154 return x == y || 155 (x != null && y != null && 156 (Arrays.equals(x.oa, y.oa) && 157 x.ipos == y.ipos && 158 x.cr == y.cr)); 159 } 160 161 public String toString() { 162 return String.format("\"%s\"[%d/%d] => %s \"%s\"[%d/%d]%s", 163 string(ia), ipos, ia.length, 164 cr, string(oa), oa.length, limit, 165 (direct ? " (direct)" : "")); 166 } 167 } 168 169 static class CharsetTester { 170 private final Charset cs; 171 private final boolean hasBom; 172 private static final int maxFailures = 5; 173 private int failures = 0; 174 // private static final long maxCharsetFailures = Long.MAX_VALUE; 175 private static final long maxCharsetFailures = 10000L; 176 private final long failed0 = failed; 177 178 // legend: r=regular d=direct In=Input Ou=Output 179 static final int maxBufSize = 20; 180 static final CharBuffer[] rInBuffers = new CharBuffer[maxBufSize]; 181 static final CharBuffer[] dInBuffers = new CharBuffer[maxBufSize]; 182 183 static final ByteBuffer[] rOuBuffers = new ByteBuffer[maxBufSize]; 184 static final ByteBuffer[] dOuBuffers = new ByteBuffer[maxBufSize]; 185 static { 186 for (int i = 0; i < maxBufSize; i++) { 187 rInBuffers[i] = CharBuffer.allocate(i); 188 dInBuffers[i] = ByteBuffer.allocateDirect(i*2).asCharBuffer(); 189 rOuBuffers[i] = ByteBuffer.allocate(i); 190 dOuBuffers[i] = ByteBuffer.allocateDirect(i); 191 } 192 } 193 194 CharsetTester(Charset cs) { 195 this.cs = cs; 196 this.hasBom = 197 cs.name().matches(".*BOM.*") || 198 cs.name().equals("UTF-16"); 199 } 200 201 static boolean bug(String format, Object... args) { 202 return reporter.bug(format, args); 203 } 204 205 static boolean hasBom(byte[] a) { 206 switch (a.length) { 207 case 2: case 4: 208 int sum = 0; 209 for (byte x : a) 210 sum += x; 211 return sum == (byte) 0xfe + (byte) 0xff; 212 default: return false; 213 } 214 } 215 216 void testSurrogates() { 217 int failures = 0; 218 for (int i = 0; i < 10; i++) { 219 Result r = test(new char[] { randomHighSurrogate() }); 220 if (r == null) break; 221 if (! (r.cr.isUnderflow() && 222 r.ipos == 0)) 223 bug("Lone high surrogate not UNDERFLOW: %s %s", 224 cs, r); 225 } 226 for (int i = 0; i < 10; i++) { 227 Result r = test(new char[] { randomLowSurrogate() }); 228 if (r == null) break; 229 if (! (r.cr.isMalformed() && r.cr.length() == 1)) 230 bug("Lone low surrogate not MALFORMED[1]: %s %s", 231 cs, r); 232 } 233 char[] chars = new char[2]; 234 for (int i = 0; i < 10; i++) { 235 chars[0] = randomLowSurrogate(); // Always illegal 236 chars[1] = randomChar(); 237 Result r = test(chars); 238 if (r == null) break; 239 if (! (r.cr.isMalformed() && 240 r.cr.length() == 1 && 241 (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 242 if (failures++ > 5) return; 243 bug("Unpaired low surrogate not MALFORMED[1]: %s %s", 244 cs, r); 245 } 246 } 247 for (int i = 0; i < 10; i++) { 248 chars[0] = randomHighSurrogate(); 249 do { 250 chars[1] = randomChar(); 251 } while (Character.isLowSurrogate(chars[1])); 252 Result r = test(chars); 253 if (r == null) break; 254 if (! (r.cr.isMalformed() && 255 r.cr.length() == 1 && 256 (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 257 if (failures++ > 5) return; 258 bug("Unpaired high surrogate not MALFORMED[1]: %s %s", 259 cs, r); 260 } 261 } 262 for (int i = 0; i < 1000; i++) { 263 chars[0] = randomHighSurrogate(); 264 chars[1] = randomLowSurrogate(); 265 Result r = test(chars); 266 if (r == null) break; 267 if (! ((r.cr.isUnmappable() && 268 r.cr.length() == 2 && 269 r.oa.length == 0) 270 || 271 (r.cr.isUnderflow() && 272 r.oa.length > 0 && 273 r.ipos == 2))) { 274 if (failures++ > 5) return; 275 bug("Legal supplementary character bug: %s %s", 276 cs, r); 277 } 278 } 279 } 280 281 // if (! (r.cr.isMalformed() && 282 // r.cr.length() == 1 && 283 // (rob.position() == 0 || hasBom(rob)))) { 284 // if (failures++ > 5) return; 285 // bug("Unpaired surrogate not malformed: %s %s", 286 // cs, r); 287 // } 288 // } 289 290 // dib.clear(); dib.put(chars); dib.flip(); 291 // rib.position(0); 292 // rob.clear(); rob.limit(lim); 293 // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 294 // Result r = recode(ib, rob); 295 // if (! (r.cr.isMalformed() && 296 // r.cr.length() == 1 && 297 // (rob.position() == 0 || hasBom(rob)))) { 298 // if (failures++ > 5) return; 299 // bug("Unpaired surrogate not malformed: %s %s", 300 // cs, r); 301 // } 302 // } 303 // //} 304 // for (int i = 0; i < 10000; i++) { 305 // chars[0] = randomHighSurrogate(); 306 // chars[1] = randomLowSurrogate(); 307 // dib.clear(); dib.put(chars); dib.flip(); 308 // rib.position(0); 309 // rob.clear(); rob.limit(lim); 310 // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 311 // Result r = recode(ib, rob); 312 // if (! ((r.cr.isUnmappable() && 313 // r.cr.length() == 2 && 314 // rob.position() == 0) 315 // || 316 // (r.cr.isUnderflow() && 317 // rob.position() > 0 && 318 // ib.position() == 2))) { 319 // if (failures++ > 5) return; 320 // bug("Legal supplementary character bug: %s %s", 321 // cs, r); 322 // } 323 // } 324 // } 325 // } 326 // } 327 328 Result recode(CharBuffer ib, ByteBuffer ob) { 329 try { 330 byte canary = 22; 331 ib.clear(); // Prepare to read 332 ob.clear(); // Prepare to write 333 for (int i = 0; i < ob.limit(); i++) 334 ob.put(i, canary); 335 CharsetEncoder coder = cs.newEncoder(); 336 CoderResult cr = coder.encode(ib, ob, false); 337 equal(ib.limit(), ib.capacity()); 338 equal(ob.limit(), ob.capacity()); 339 Result r = new Result(ib, ob, cr); 340 if (cr.isError()) 341 check(cr.length() > 0); 342 if (cr.isOverflow() && ob.remaining() > 10) 343 bug("OVERFLOW, but there's lots of room: %s %s", 344 cs, r); 345 // if (cr.isOverflow() && ib.remaining() == 0 && ! hasBom) 346 // bug("OVERFLOW, yet remaining() == 0: %s %s", 347 // cs, r); 348 if (cr.isError() && ib.remaining() < cr.length()) 349 bug("remaining() < CoderResult.length(): %s %s", 350 cs, r); 351 // if (ib.position() == 0 352 // && ob.position() > 0 353 // && ! hasBom(r.oa)) 354 // bug("output only if input consumed: %s %s", 355 // cs, r); 356 CoderResult cr2 = coder.encode(ib, ob, false); 357 if (ib.position() != r.ipos || 358 ob.position() != r.oa.length || 359 cr != cr2) 360 bug("Coding operation not idempotent: %s%n %s%n %s", 361 cs, r, new Result(ib, ob, cr2)); 362 if (ob.position() < ob.limit() && 363 ob.get(ob.position()) != canary) 364 bug("Buffer overrun: %s %s %s", 365 cs, r, ob.get(ob.position())); 366 return r; 367 } catch (Throwable t) { 368 if (bug("Unexpected exception: %s %s %s", 369 cs, t.getClass().getSimpleName(), 370 new Result(ib, ob, null))) 371 t.printStackTrace(); 372 return null; 373 } 374 } 375 376 Result recode2(char[] ia, int n) { 377 int len = ia.length; 378 CharBuffer rib = CharBuffer.wrap(ia); 379 CharBuffer dib = dInBuffers[len]; 380 dib.clear(); dib.put(ia); dib.clear(); 381 ByteBuffer rob = rOuBuffers[n]; 382 ByteBuffer dob = dOuBuffers[n]; 383 equal(rob.limit(), n); 384 equal(dob.limit(), n); 385 check(dib.isDirect()); 386 check(dob.isDirect()); 387 Result r1 = recode(rib, rob); 388 Result r2 = recode(dib, dob); 389 if (r1 != null && r2 != null && ! Result.eq(r1, r2)) 390 bug("Results differ for direct buffers: %s%n %s%n %s", 391 cs, r1, r2); 392 return r1; 393 } 394 395 Result test(char[] ia) { 396 if (failed - failed0 >= maxCharsetFailures) 397 throw new TooManyFailures(); 398 399 Result roomy = recode2(ia, maxBufSize - 1); 400 if (roomy == null) return roomy; 401 int olen = roomy.oa.length; 402 if (olen > 0) { 403 if (roomy.ipos == roomy.ia.length) { 404 Result perfectFit = recode2(ia, olen); 405 if (! Result.eq(roomy, perfectFit)) 406 bug("Results differ: %s%n %s%n %s", 407 cs, roomy, perfectFit); 408 } 409 for (int i = 0; i < olen; i++) { 410 Result claustrophobic = recode2(ia, i); 411 if (claustrophobic == null) return roomy; 412 if (roomy.cr.isUnderflow() && 413 ! claustrophobic.cr.isOverflow()) 414 bug("Expected OVERFLOW: %s%n %s%n %s", 415 cs, roomy, claustrophobic); 416 } 417 } 418 return roomy; 419 } 420 421 void testExhaustively(char[] prefix, int n) { 422 int len = prefix.length; 423 char[] ia = Arrays.copyOf(prefix, len + 1); 424 for (int i = 0; i < 0x10000; i++) { 425 ia[len] = (char) i; 426 if (n == 1) 427 test(ia); 428 else 429 testExhaustively(ia, n - 1); 430 } 431 } 432 433 void testRandomly(char[] prefix, int n) { 434 int len = prefix.length; 435 char[] ia = Arrays.copyOf(prefix, len + n); 436 for (int i = 0; i < 10000; i++) { 437 for (int j = 0; j < n; j++) 438 ia[len + j] = randomChar(); 439 test(ia); 440 } 441 } 442 443 void testPrefix(char[] prefix) { 444 if (prefix.length > 0) 445 System.out.printf("Testing prefix %s%n", string(prefix)); 446 447 test(prefix); 448 449 testExhaustively(prefix, 1); 450 // Can you spare a year of CPU time? 451 //testExhaustively(prefix, 2); 452 453 testRandomly(prefix, 2); 454 testRandomly(prefix, 3); 455 } 456 } 457 458 private final static Random rnd = new Random(); 459 private static char randomChar() { 460 return (char) rnd.nextInt(Character.MAX_VALUE); 461 } 462 private static char randomHighSurrogate() { 463 return (char) (Character.MIN_HIGH_SURROGATE + rnd.nextInt(1024)); 464 } 465 private static char randomLowSurrogate() { 466 return (char) (Character.MIN_LOW_SURROGATE + rnd.nextInt(1024)); 467 } 468 469 private static void testCharset(Charset cs) throws Throwable { 470 if (! cs.canEncode()) 471 return; 472 473 final String csn = cs.name(); 474 475 if (isBroken(csn)) { 476 System.out.printf("Skipping possibly broken charset %s%n", csn); 477 return; 478 } 479 System.out.println(csn); 480 481 CharsetTester tester = new CharsetTester(cs); 482 483 tester.testSurrogates(); 484 485 tester.testPrefix(new char[] {}); 486 487 if (csn.equals("x-ISCII91")) { 488 System.out.println("More ISCII testing..."); 489 new CharsetTester(cs).testPrefix(new char[]{'\u094d'}); // Halant 490 new CharsetTester(cs).testPrefix(new char[]{'\u093c'}); // Nukta 491 } 492 } 493 494 private static void realMain(String[] args) { 495 for (Charset cs : sort(Charset.availableCharsets().values())) { 496 try { 497 testCharset(cs); 498 } catch (TooManyFailures e) { 499 System.out.printf("Too many failures for %s%n", cs); 500 } catch (Throwable t) { 501 unexpected(t); 502 } 503 } 504 reporter.summarize(); 505 } 506 507 //--------------------- Infrastructure --------------------------- 508 static volatile long passed = 0, failed = 0; 509 static void pass() {passed++;} 510 static void fail() {failed++; Thread.dumpStack();} 511 static void fail(String format, Object... args) { 512 System.out.println(String.format(format, args)); failed++;} 513 static void fail(String msg) {System.out.println(msg); fail();} 514 static void unexpected(Throwable t) {failed++; t.printStackTrace();} 515 static void check(boolean cond) {if (cond) pass(); else fail();} 516 static void equal(Object x, Object y) { 517 if (x == null ? y == null : x.equals(y)) pass(); 518 else fail(x + " not equal to " + y);} 519 public static void main(String[] args) throws Throwable { 520 try {realMain(args);} catch (Throwable t) {unexpected(t);} 521 System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed); 522 if (failed > 0) throw new AssertionError("Some tests failed");} 523 }