/* * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* * @test * @bug 6233345 6381699 6381702 6381705 6381706 * @summary Encode many char sequences in many ways * @run main/timeout=1200 FindEncoderBugs * @author Martin Buchholz * @key randomness */ import java.util.*; import java.util.regex.*; import java.nio.*; import java.nio.charset.*; public class FindEncoderBugs { static boolean isBroken(String csn) { if (csn.equals("x-COMPOUND_TEXT")) return true; return false; } static > List sort(Collection c) { List list = new ArrayList(c); Collections.sort(list); return list; } static class TooManyFailures extends RuntimeException { private static final long serialVersionUID = 0L; } static String string(byte[] a) { final StringBuilder sb = new StringBuilder(); for (byte b : a) { if (sb.length() != 0) sb.append(' '); sb.append(String.format("%02x", b & 0xff)); } return sb.toString(); } static String string(char[] a) { final StringBuilder sb = new StringBuilder(); for (char c : a) { if (sb.length() != 0) sb.append(' '); sb.append(String.format("\\u%04x", (int) c)); } return sb.toString(); } static class Reporter { // Some machinery to make sure only a small number of errors // that are "too similar" are reported. static class Counts extends HashMap { private static final long serialVersionUID = -1; long inc(String signature) { Long count = get(signature); if (count == null) count = 0L; put(signature, count+1); return count+1; } } final Counts failureCounts = new Counts(); final static long maxFailures = 2; final static Pattern hideBytes = Pattern.compile("\"[0-9a-f ]+\""); final static Pattern hideChars = Pattern.compile("\\\\u[0-9a-f]{4}"); boolean bug(String format, Object... args) { String signature = String.format(format, args); // signature = hideBytes.matcher(signature).replaceAll("\"??\""); // signature = hideChars.matcher(signature).replaceAll("\\u????"); failed++; if (failureCounts.inc(signature) <= maxFailures) { System.out.printf(format, args); System.out.println(); return true; } return false; } void summarize() { for (String key : sort(failureCounts.keySet())) System.out.printf("-----%n%s%nfailures=%d%n", key, failureCounts.get(key)); } } static final Reporter reporter = new Reporter(); static class Result { final int limit; final int ipos; final boolean direct; final char[] ia; final byte[] oa; final CoderResult cr; private static byte[] toByteArray(ByteBuffer bb) { byte[] bytes = new byte[bb.position()]; for (int i = 0; i < bytes.length; i++) bytes[i] = bb.get(i); return bytes; } Result(CharBuffer ib, ByteBuffer ob, CoderResult cr) { ipos = ib.position(); ia = toArray(ib); oa = toArray(ob); direct = ib.isDirect(); limit = ob.limit(); this.cr = cr; } static char[] toArray(CharBuffer b) { int pos = b.position(); char[] a = new char[b.limit()]; b.position(0); b.get(a); b.position(pos); return a; } static byte[] toArray(ByteBuffer b) { byte[] a = new byte[b.position()]; b.position(0); b.get(a); return a; } static boolean eq(Result x, Result y) { return x == y || (x != null && y != null && (Arrays.equals(x.oa, y.oa) && x.ipos == y.ipos && x.cr == y.cr)); } public String toString() { return String.format("\"%s\"[%d/%d] => %s \"%s\"[%d/%d]%s", string(ia), ipos, ia.length, cr, string(oa), oa.length, limit, (direct ? " (direct)" : "")); } } static class CharsetTester { private final Charset cs; private final boolean hasBom; private static final int maxFailures = 5; private int failures = 0; // private static final long maxCharsetFailures = Long.MAX_VALUE; private static final long maxCharsetFailures = 10000L; private final long failed0 = failed; // legend: r=regular d=direct In=Input Ou=Output static final int maxBufSize = 20; static final CharBuffer[] rInBuffers = new CharBuffer[maxBufSize]; static final CharBuffer[] dInBuffers = new CharBuffer[maxBufSize]; static final ByteBuffer[] rOuBuffers = new ByteBuffer[maxBufSize]; static final ByteBuffer[] dOuBuffers = new ByteBuffer[maxBufSize]; static { for (int i = 0; i < maxBufSize; i++) { rInBuffers[i] = CharBuffer.allocate(i); dInBuffers[i] = ByteBuffer.allocateDirect(i*2).asCharBuffer(); rOuBuffers[i] = ByteBuffer.allocate(i); dOuBuffers[i] = ByteBuffer.allocateDirect(i); } } CharsetTester(Charset cs) { this.cs = cs; this.hasBom = cs.name().matches(".*BOM.*") || cs.name().equals("UTF-16"); } static boolean bug(String format, Object... args) { return reporter.bug(format, args); } static boolean hasBom(byte[] a) { switch (a.length) { case 2: case 4: int sum = 0; for (byte x : a) sum += x; return sum == (byte) 0xfe + (byte) 0xff; default: return false; } } void testSurrogates() { int failures = 0; for (int i = 0; i < 10; i++) { Result r = test(new char[] { randomHighSurrogate() }); if (r == null) break; if (! (r.cr.isUnderflow() && r.ipos == 0)) bug("Lone high surrogate not UNDERFLOW: %s %s", cs, r); } for (int i = 0; i < 10; i++) { Result r = test(new char[] { randomLowSurrogate() }); if (r == null) break; if (! (r.cr.isMalformed() && r.cr.length() == 1)) bug("Lone low surrogate not MALFORMED[1]: %s %s", cs, r); } char[] chars = new char[2]; for (int i = 0; i < 10; i++) { chars[0] = randomLowSurrogate(); // Always illegal chars[1] = randomChar(); Result r = test(chars); if (r == null) break; if (! (r.cr.isMalformed() && r.cr.length() == 1 && (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { if (failures++ > 5) return; bug("Unpaired low surrogate not MALFORMED[1]: %s %s", cs, r); } } for (int i = 0; i < 10; i++) { chars[0] = randomHighSurrogate(); do { chars[1] = randomChar(); } while (Character.isLowSurrogate(chars[1])); Result r = test(chars); if (r == null) break; if (! (r.cr.isMalformed() && r.cr.length() == 1 && (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { if (failures++ > 5) return; bug("Unpaired high surrogate not MALFORMED[1]: %s %s", cs, r); } } for (int i = 0; i < 1000; i++) { chars[0] = randomHighSurrogate(); chars[1] = randomLowSurrogate(); Result r = test(chars); if (r == null) break; if (! ((r.cr.isUnmappable() && r.cr.length() == 2 && r.oa.length == 0) || (r.cr.isUnderflow() && r.oa.length > 0 && r.ipos == 2))) { if (failures++ > 5) return; bug("Legal supplementary character bug: %s %s", cs, r); } } } // if (! (r.cr.isMalformed() && // r.cr.length() == 1 && // (rob.position() == 0 || hasBom(rob)))) { // if (failures++ > 5) return; // bug("Unpaired surrogate not malformed: %s %s", // cs, r); // } // } // dib.clear(); dib.put(chars); dib.flip(); // rib.position(0); // rob.clear(); rob.limit(lim); // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { // Result r = recode(ib, rob); // if (! (r.cr.isMalformed() && // r.cr.length() == 1 && // (rob.position() == 0 || hasBom(rob)))) { // if (failures++ > 5) return; // bug("Unpaired surrogate not malformed: %s %s", // cs, r); // } // } // //} // for (int i = 0; i < 10000; i++) { // chars[0] = randomHighSurrogate(); // chars[1] = randomLowSurrogate(); // dib.clear(); dib.put(chars); dib.flip(); // rib.position(0); // rob.clear(); rob.limit(lim); // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { // Result r = recode(ib, rob); // if (! ((r.cr.isUnmappable() && // r.cr.length() == 2 && // rob.position() == 0) // || // (r.cr.isUnderflow() && // rob.position() > 0 && // ib.position() == 2))) { // if (failures++ > 5) return; // bug("Legal supplementary character bug: %s %s", // cs, r); // } // } // } // } // } Result recode(CharBuffer ib, ByteBuffer ob) { try { byte canary = 22; ib.clear(); // Prepare to read ob.clear(); // Prepare to write for (int i = 0; i < ob.limit(); i++) ob.put(i, canary); CharsetEncoder coder = cs.newEncoder(); CoderResult cr = coder.encode(ib, ob, false); equal(ib.limit(), ib.capacity()); equal(ob.limit(), ob.capacity()); Result r = new Result(ib, ob, cr); if (cr.isError()) check(cr.length() > 0); if (cr.isOverflow() && ob.remaining() > 10) bug("OVERFLOW, but there's lots of room: %s %s", cs, r); // if (cr.isOverflow() && ib.remaining() == 0 && ! hasBom) // bug("OVERFLOW, yet remaining() == 0: %s %s", // cs, r); if (cr.isError() && ib.remaining() < cr.length()) bug("remaining() < CoderResult.length(): %s %s", cs, r); // if (ib.position() == 0 // && ob.position() > 0 // && ! hasBom(r.oa)) // bug("output only if input consumed: %s %s", // cs, r); CoderResult cr2 = coder.encode(ib, ob, false); if (ib.position() != r.ipos || ob.position() != r.oa.length || cr != cr2) bug("Coding operation not idempotent: %s%n %s%n %s", cs, r, new Result(ib, ob, cr2)); if (ob.position() < ob.limit() && ob.get(ob.position()) != canary) bug("Buffer overrun: %s %s %s", cs, r, ob.get(ob.position())); return r; } catch (Throwable t) { if (bug("Unexpected exception: %s %s %s", cs, t.getClass().getSimpleName(), new Result(ib, ob, null))) t.printStackTrace(); return null; } } Result recode2(char[] ia, int n) { int len = ia.length; CharBuffer rib = CharBuffer.wrap(ia); CharBuffer dib = dInBuffers[len]; dib.clear(); dib.put(ia); dib.clear(); ByteBuffer rob = rOuBuffers[n]; ByteBuffer dob = dOuBuffers[n]; equal(rob.limit(), n); equal(dob.limit(), n); check(dib.isDirect()); check(dob.isDirect()); Result r1 = recode(rib, rob); Result r2 = recode(dib, dob); if (r1 != null && r2 != null && ! Result.eq(r1, r2)) bug("Results differ for direct buffers: %s%n %s%n %s", cs, r1, r2); return r1; } Result test(char[] ia) { if (failed - failed0 >= maxCharsetFailures) throw new TooManyFailures(); Result roomy = recode2(ia, maxBufSize - 1); if (roomy == null) return roomy; int olen = roomy.oa.length; if (olen > 0) { if (roomy.ipos == roomy.ia.length) { Result perfectFit = recode2(ia, olen); if (! Result.eq(roomy, perfectFit)) bug("Results differ: %s%n %s%n %s", cs, roomy, perfectFit); } for (int i = 0; i < olen; i++) { Result claustrophobic = recode2(ia, i); if (claustrophobic == null) return roomy; if (roomy.cr.isUnderflow() && ! claustrophobic.cr.isOverflow()) bug("Expected OVERFLOW: %s%n %s%n %s", cs, roomy, claustrophobic); } } return roomy; } void testExhaustively(char[] prefix, int n) { int len = prefix.length; char[] ia = Arrays.copyOf(prefix, len + 1); for (int i = 0; i < 0x10000; i++) { ia[len] = (char) i; if (n == 1) test(ia); else testExhaustively(ia, n - 1); } } void testRandomly(char[] prefix, int n) { int len = prefix.length; char[] ia = Arrays.copyOf(prefix, len + n); for (int i = 0; i < 10000; i++) { for (int j = 0; j < n; j++) ia[len + j] = randomChar(); test(ia); } } void testPrefix(char[] prefix) { if (prefix.length > 0) System.out.printf("Testing prefix %s%n", string(prefix)); test(prefix); testExhaustively(prefix, 1); // Can you spare a year of CPU time? //testExhaustively(prefix, 2); testRandomly(prefix, 2); testRandomly(prefix, 3); } } private final static Random rnd = new Random(); private static char randomChar() { return (char) rnd.nextInt(Character.MAX_VALUE); } private static char randomHighSurrogate() { return (char) (Character.MIN_HIGH_SURROGATE + rnd.nextInt(1024)); } private static char randomLowSurrogate() { return (char) (Character.MIN_LOW_SURROGATE + rnd.nextInt(1024)); } private static void testCharset(Charset cs) throws Throwable { if (! cs.canEncode()) return; final String csn = cs.name(); if (isBroken(csn)) { System.out.printf("Skipping possibly broken charset %s%n", csn); return; } System.out.println(csn); CharsetTester tester = new CharsetTester(cs); tester.testSurrogates(); tester.testPrefix(new char[] {}); if (csn.equals("x-ISCII91")) { System.out.println("More ISCII testing..."); new CharsetTester(cs).testPrefix(new char[]{'\u094d'}); // Halant new CharsetTester(cs).testPrefix(new char[]{'\u093c'}); // Nukta } } private static void realMain(String[] args) { for (Charset cs : sort(Charset.availableCharsets().values())) { try { testCharset(cs); } catch (TooManyFailures e) { System.out.printf("Too many failures for %s%n", cs); } catch (Throwable t) { unexpected(t); } } reporter.summarize(); } //--------------------- Infrastructure --------------------------- static volatile long passed = 0, failed = 0; static void pass() {passed++;} static void fail() {failed++; Thread.dumpStack();} static void fail(String format, Object... args) { System.out.println(String.format(format, args)); failed++;} static void fail(String msg) {System.out.println(msg); fail();} static void unexpected(Throwable t) {failed++; t.printStackTrace();} static void check(boolean cond) {if (cond) pass(); else fail();} static void equal(Object x, Object y) { if (x == null ? y == null : x.equals(y)) pass(); else fail(x + " not equal to " + y);} public static void main(String[] args) throws Throwable { try {realMain(args);} catch (Throwable t) {unexpected(t);} System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed); if (failed > 0) throw new AssertionError("Some tests failed");} }