1 /* 2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 6233345 6381699 6381702 6381705 6381706 27 * @summary Encode many char sequences in many ways 28 * @run main/timeout=1200 FindEncoderBugs 29 * @author Martin Buchholz 30 * @key randomness 31 */ 32 33 import java.util.*; 34 import java.util.regex.*; 35 import java.nio.*; 36 import java.nio.charset.*; 37 38 public class FindEncoderBugs { 39 40 static boolean isBroken(String csn) { 41 if (csn.equals("x-COMPOUND_TEXT")) return true; 42 return false; 43 } 44 45 static <T extends Comparable<? super T>> List<T> sort(Collection<T> c) { 46 List<T> list = new ArrayList<T>(c); 47 Collections.sort(list); 48 return list; 49 } 50 51 static class TooManyFailures extends RuntimeException { 52 private static final long serialVersionUID = 0L; 53 } 54 55 static String string(byte[] a) { 56 final StringBuilder sb = new StringBuilder(); 57 for (byte b : a) { 58 if (sb.length() != 0) sb.append(' '); 59 sb.append(String.format("%02x", b & 0xff)); 60 } 61 return sb.toString(); 62 } 63 64 static String string(char[] a) { 65 final StringBuilder sb = new StringBuilder(); 66 for (char c : a) { 67 if (sb.length() != 0) sb.append(' '); 68 sb.append(String.format("\\u%04x", (int) c)); 69 } 70 return sb.toString(); 71 } 72 73 static class Reporter { 74 // Some machinery to make sure only a small number of errors 75 // that are "too similar" are reported. 76 static class Counts extends HashMap<String, Long> { 77 private static final long serialVersionUID = -1; 78 long inc(String signature) { 79 Long count = get(signature); 80 if (count == null) count = 0L; 81 put(signature, count+1); 82 return count+1; 83 } 84 } 85 86 final Counts failureCounts = new Counts(); 87 final static long maxFailures = 2; 88 89 final static Pattern hideBytes = Pattern.compile("\"[0-9a-f ]+\""); 90 final static Pattern hideChars = Pattern.compile("\\\\u[0-9a-f]{4}"); 91 92 boolean bug(String format, Object... args) { 93 String signature = String.format(format, args); 94 // signature = hideBytes.matcher(signature).replaceAll("\"??\""); 95 // signature = hideChars.matcher(signature).replaceAll("\\u????"); 96 failed++; 97 if (failureCounts.inc(signature) <= maxFailures) { 98 System.out.printf(format, args); 99 System.out.println(); 100 return true; 101 } 102 return false; 103 } 104 105 void summarize() { 106 for (String key : sort(failureCounts.keySet())) 107 System.out.printf("-----%n%s%nfailures=%d%n", 108 key, failureCounts.get(key)); 109 } 110 } 111 112 static final Reporter reporter = new Reporter(); 113 114 static class Result { 115 final int limit; 116 final int ipos; 117 final boolean direct; 118 final char[] ia; 119 final byte[] oa; 120 final CoderResult cr; 121 122 private static byte[] toByteArray(ByteBuffer bb) { 123 byte[] bytes = new byte[bb.position()]; 124 for (int i = 0; i < bytes.length; i++) 125 bytes[i] = bb.get(i); 126 return bytes; 127 } 128 129 Result(CharBuffer ib, ByteBuffer ob, CoderResult cr) { 130 ipos = ib.position(); 131 ia = toArray(ib); 132 oa = toArray(ob); 133 direct = ib.isDirect(); 134 limit = ob.limit(); 135 this.cr = cr; 136 } 137 138 static char[] toArray(CharBuffer b) { 139 int pos = b.position(); 140 char[] a = new char[b.limit()]; 141 b.position(0); 142 b.get(a); 143 b.position(pos); 144 return a; 145 } 146 147 static byte[] toArray(ByteBuffer b) { 148 byte[] a = new byte[b.position()]; 149 b.position(0); 150 b.get(a); 151 return a; 152 } 153 154 static boolean eq(Result x, Result y) { 155 return x == y || 156 (x != null && y != null && 157 (Arrays.equals(x.oa, y.oa) && 158 x.ipos == y.ipos && 159 x.cr == y.cr)); 160 } 161 162 public String toString() { 163 return String.format("\"%s\"[%d/%d] => %s \"%s\"[%d/%d]%s", 164 string(ia), ipos, ia.length, 165 cr, string(oa), oa.length, limit, 166 (direct ? " (direct)" : "")); 167 } 168 } 169 170 static class CharsetTester { 171 private final Charset cs; 172 private final boolean hasBom; 173 private static final int maxFailures = 5; 174 private int failures = 0; 175 // private static final long maxCharsetFailures = Long.MAX_VALUE; 176 private static final long maxCharsetFailures = 10000L; 177 private final long failed0 = failed; 178 179 // legend: r=regular d=direct In=Input Ou=Output 180 static final int maxBufSize = 20; 181 static final CharBuffer[] rInBuffers = new CharBuffer[maxBufSize]; 182 static final CharBuffer[] dInBuffers = new CharBuffer[maxBufSize]; 183 184 static final ByteBuffer[] rOuBuffers = new ByteBuffer[maxBufSize]; 185 static final ByteBuffer[] dOuBuffers = new ByteBuffer[maxBufSize]; 186 static { 187 for (int i = 0; i < maxBufSize; i++) { 188 rInBuffers[i] = CharBuffer.allocate(i); 189 dInBuffers[i] = ByteBuffer.allocateDirect(i*2).asCharBuffer(); 190 rOuBuffers[i] = ByteBuffer.allocate(i); 191 dOuBuffers[i] = ByteBuffer.allocateDirect(i); 192 } 193 } 194 195 CharsetTester(Charset cs) { 196 this.cs = cs; 197 this.hasBom = 198 cs.name().matches(".*BOM.*") || 199 cs.name().equals("UTF-16"); 200 } 201 202 static boolean bug(String format, Object... args) { 203 return reporter.bug(format, args); 204 } 205 206 static boolean hasBom(byte[] a) { 207 switch (a.length) { 208 case 2: case 4: 209 int sum = 0; 210 for (byte x : a) 211 sum += x; 212 return sum == (byte) 0xfe + (byte) 0xff; 213 default: return false; 214 } 215 } 216 217 void testSurrogates() { 218 int failures = 0; 219 for (int i = 0; i < 10; i++) { 220 Result r = test(new char[] { randomHighSurrogate() }); 221 if (r == null) break; 222 if (! (r.cr.isUnderflow() && 223 r.ipos == 0)) 224 bug("Lone high surrogate not UNDERFLOW: %s %s", 225 cs, r); 226 } 227 for (int i = 0; i < 10; i++) { 228 Result r = test(new char[] { randomLowSurrogate() }); 229 if (r == null) break; 230 if (! (r.cr.isMalformed() && r.cr.length() == 1)) 231 bug("Lone low surrogate not MALFORMED[1]: %s %s", 232 cs, r); 233 } 234 char[] chars = new char[2]; 235 for (int i = 0; i < 10; i++) { 236 chars[0] = randomLowSurrogate(); // Always illegal 237 chars[1] = randomChar(); 238 Result r = test(chars); 239 if (r == null) break; 240 if (! (r.cr.isMalformed() && 241 r.cr.length() == 1 && 242 (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 243 if (failures++ > 5) return; 244 bug("Unpaired low surrogate not MALFORMED[1]: %s %s", 245 cs, r); 246 } 247 } 248 for (int i = 0; i < 10; i++) { 249 chars[0] = randomHighSurrogate(); 250 do { 251 chars[1] = randomChar(); 252 } while (Character.isLowSurrogate(chars[1])); 253 Result r = test(chars); 254 if (r == null) break; 255 if (! (r.cr.isMalformed() && 256 r.cr.length() == 1 && 257 (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 258 if (failures++ > 5) return; 259 bug("Unpaired high surrogate not MALFORMED[1]: %s %s", 260 cs, r); 261 } 262 } 263 for (int i = 0; i < 1000; i++) { 264 chars[0] = randomHighSurrogate(); 265 chars[1] = randomLowSurrogate(); 266 Result r = test(chars); 267 if (r == null) break; 268 if (! ((r.cr.isUnmappable() && 269 r.cr.length() == 2 && 270 r.oa.length == 0) 271 || 272 (r.cr.isUnderflow() && 273 r.oa.length > 0 && 274 r.ipos == 2))) { 275 if (failures++ > 5) return; 276 bug("Legal supplementary character bug: %s %s", 277 cs, r); 278 } 279 } 280 } 281 282 // if (! (r.cr.isMalformed() && 283 // r.cr.length() == 1 && 284 // (rob.position() == 0 || hasBom(rob)))) { 285 // if (failures++ > 5) return; 286 // bug("Unpaired surrogate not malformed: %s %s", 287 // cs, r); 288 // } 289 // } 290 291 // dib.clear(); dib.put(chars); dib.flip(); 292 // rib.position(0); 293 // rob.clear(); rob.limit(lim); 294 // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 295 // Result r = recode(ib, rob); 296 // if (! (r.cr.isMalformed() && 297 // r.cr.length() == 1 && 298 // (rob.position() == 0 || hasBom(rob)))) { 299 // if (failures++ > 5) return; 300 // bug("Unpaired surrogate not malformed: %s %s", 301 // cs, r); 302 // } 303 // } 304 // //} 305 // for (int i = 0; i < 10000; i++) { 306 // chars[0] = randomHighSurrogate(); 307 // chars[1] = randomLowSurrogate(); 308 // dib.clear(); dib.put(chars); dib.flip(); 309 // rib.position(0); 310 // rob.clear(); rob.limit(lim); 311 // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 312 // Result r = recode(ib, rob); 313 // if (! ((r.cr.isUnmappable() && 314 // r.cr.length() == 2 && 315 // rob.position() == 0) 316 // || 317 // (r.cr.isUnderflow() && 318 // rob.position() > 0 && 319 // ib.position() == 2))) { 320 // if (failures++ > 5) return; 321 // bug("Legal supplementary character bug: %s %s", 322 // cs, r); 323 // } 324 // } 325 // } 326 // } 327 // } 328 329 Result recode(CharBuffer ib, ByteBuffer ob) { 330 try { 331 byte canary = 22; 332 ib.clear(); // Prepare to read 333 ob.clear(); // Prepare to write 334 for (int i = 0; i < ob.limit(); i++) 335 ob.put(i, canary); 336 CharsetEncoder coder = cs.newEncoder(); 337 CoderResult cr = coder.encode(ib, ob, false); 338 equal(ib.limit(), ib.capacity()); 339 equal(ob.limit(), ob.capacity()); 340 Result r = new Result(ib, ob, cr); 341 if (cr.isError()) 342 check(cr.length() > 0); 343 if (cr.isOverflow() && ob.remaining() > 10) 344 bug("OVERFLOW, but there's lots of room: %s %s", 345 cs, r); 346 // if (cr.isOverflow() && ib.remaining() == 0 && ! hasBom) 347 // bug("OVERFLOW, yet remaining() == 0: %s %s", 348 // cs, r); 349 if (cr.isError() && ib.remaining() < cr.length()) 350 bug("remaining() < CoderResult.length(): %s %s", 351 cs, r); 352 // if (ib.position() == 0 353 // && ob.position() > 0 354 // && ! hasBom(r.oa)) 355 // bug("output only if input consumed: %s %s", 356 // cs, r); 357 CoderResult cr2 = coder.encode(ib, ob, false); 358 if (ib.position() != r.ipos || 359 ob.position() != r.oa.length || 360 cr != cr2) 361 bug("Coding operation not idempotent: %s%n %s%n %s", 362 cs, r, new Result(ib, ob, cr2)); 363 if (ob.position() < ob.limit() && 364 ob.get(ob.position()) != canary) 365 bug("Buffer overrun: %s %s %s", 366 cs, r, ob.get(ob.position())); 367 return r; 368 } catch (Throwable t) { 369 if (bug("Unexpected exception: %s %s %s", 370 cs, t.getClass().getSimpleName(), 371 new Result(ib, ob, null))) 372 t.printStackTrace(); 373 return null; 374 } 375 } 376 377 Result recode2(char[] ia, int n) { 378 int len = ia.length; 379 CharBuffer rib = CharBuffer.wrap(ia); 380 CharBuffer dib = dInBuffers[len]; 381 dib.clear(); dib.put(ia); dib.clear(); 382 ByteBuffer rob = rOuBuffers[n]; 383 ByteBuffer dob = dOuBuffers[n]; 384 equal(rob.limit(), n); 385 equal(dob.limit(), n); 386 check(dib.isDirect()); 387 check(dob.isDirect()); 388 Result r1 = recode(rib, rob); 389 Result r2 = recode(dib, dob); 390 if (r1 != null && r2 != null && ! Result.eq(r1, r2)) 391 bug("Results differ for direct buffers: %s%n %s%n %s", 392 cs, r1, r2); 393 return r1; 394 } 395 396 Result test(char[] ia) { 397 if (failed - failed0 >= maxCharsetFailures) 398 throw new TooManyFailures(); 399 400 Result roomy = recode2(ia, maxBufSize - 1); 401 if (roomy == null) return roomy; 402 int olen = roomy.oa.length; 403 if (olen > 0) { 404 if (roomy.ipos == roomy.ia.length) { 405 Result perfectFit = recode2(ia, olen); 406 if (! Result.eq(roomy, perfectFit)) 407 bug("Results differ: %s%n %s%n %s", 408 cs, roomy, perfectFit); 409 } 410 for (int i = 0; i < olen; i++) { 411 Result claustrophobic = recode2(ia, i); 412 if (claustrophobic == null) return roomy; 413 if (roomy.cr.isUnderflow() && 414 ! claustrophobic.cr.isOverflow()) 415 bug("Expected OVERFLOW: %s%n %s%n %s", 416 cs, roomy, claustrophobic); 417 } 418 } 419 return roomy; 420 } 421 422 void testExhaustively(char[] prefix, int n) { 423 int len = prefix.length; 424 char[] ia = Arrays.copyOf(prefix, len + 1); 425 for (int i = 0; i < 0x10000; i++) { 426 ia[len] = (char) i; 427 if (n == 1) 428 test(ia); 429 else 430 testExhaustively(ia, n - 1); 431 } 432 } 433 434 void testRandomly(char[] prefix, int n) { 435 int len = prefix.length; 436 char[] ia = Arrays.copyOf(prefix, len + n); 437 for (int i = 0; i < 10000; i++) { 438 for (int j = 0; j < n; j++) 439 ia[len + j] = randomChar(); 440 test(ia); 441 } 442 } 443 444 void testPrefix(char[] prefix) { 445 if (prefix.length > 0) 446 System.out.printf("Testing prefix %s%n", string(prefix)); 447 448 test(prefix); 449 450 testExhaustively(prefix, 1); 451 // Can you spare a year of CPU time? 452 //testExhaustively(prefix, 2); 453 454 testRandomly(prefix, 2); 455 testRandomly(prefix, 3); 456 } 457 } 458 459 private final static Random rnd = new Random(); 460 private static char randomChar() { 461 return (char) rnd.nextInt(Character.MAX_VALUE); 462 } 463 private static char randomHighSurrogate() { 464 return (char) (Character.MIN_HIGH_SURROGATE + rnd.nextInt(1024)); 465 } 466 private static char randomLowSurrogate() { 467 return (char) (Character.MIN_LOW_SURROGATE + rnd.nextInt(1024)); 468 } 469 470 private static void testCharset(Charset cs) throws Throwable { 471 if (! cs.canEncode()) 472 return; 473 474 final String csn = cs.name(); 475 476 if (isBroken(csn)) { 477 System.out.printf("Skipping possibly broken charset %s%n", csn); 478 return; 479 } 480 System.out.println(csn); 481 482 CharsetTester tester = new CharsetTester(cs); 483 484 tester.testSurrogates(); 485 486 tester.testPrefix(new char[] {}); 487 488 if (csn.equals("x-ISCII91")) { 489 System.out.println("More ISCII testing..."); 490 new CharsetTester(cs).testPrefix(new char[]{'\u094d'}); // Halant 491 new CharsetTester(cs).testPrefix(new char[]{'\u093c'}); // Nukta 492 } 493 } 494 495 private static void realMain(String[] args) { 496 for (Charset cs : sort(Charset.availableCharsets().values())) { 497 try { 498 testCharset(cs); 499 } catch (TooManyFailures e) { 500 System.out.printf("Too many failures for %s%n", cs); 501 } catch (Throwable t) { 502 unexpected(t); 503 } 504 } 505 reporter.summarize(); 506 } 507 508 //--------------------- Infrastructure --------------------------- 509 static volatile long passed = 0, failed = 0; 510 static void pass() {passed++;} 511 static void fail() {failed++; Thread.dumpStack();} 512 static void fail(String format, Object... args) { 513 System.out.println(String.format(format, args)); failed++;} 514 static void fail(String msg) {System.out.println(msg); fail();} 515 static void unexpected(Throwable t) {failed++; t.printStackTrace();} 516 static void check(boolean cond) {if (cond) pass(); else fail();} 517 static void equal(Object x, Object y) { 518 if (x == null ? y == null : x.equals(y)) pass(); 519 else fail(x + " not equal to " + y);} 520 public static void main(String[] args) throws Throwable { 521 try {realMain(args);} catch (Throwable t) {unexpected(t);} 522 System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed); 523 if (failed > 0) throw new AssertionError("Some tests failed");} 524 }