< prev index next >

src/java.base/share/classes/java/lang/StringCoding.java

Print this page

        

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -587,10 +587,14 @@
         }
         return i;
     }
 
     private static byte[] encode8859_1(byte coder, byte[] val) {
+        return encode8859_1(coder, val, true);
+    }
+
+    private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
         if (coder == LATIN1) {
             return Arrays.copyOf(val, val.length);
         }
         int len = val.length >> 1;
         byte[] dst = new byte[len];

@@ -600,10 +604,13 @@
         while (sp < sl) {
             int ret = implEncodeISOArray(val, sp, dst, dp, len);
             sp = sp + ret;
             dp = dp + ret;
             if (ret != len) {
+                if (!doReplace) {
+                    throwMalformed(sp, 1);
+                }
                 char c = StringUTF16.getChar(val, sp++);
                 if (Character.isHighSurrogate(c) && sp < sl &&
                     Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
                     sp++;
                 }

@@ -674,10 +681,16 @@
     private static void throwMalformed(int off, int nb) {
         throw new IllegalArgumentException("malformed input off : " + off +
                                            ", length : " + nb);
     }
 
+    private static void throwMalformed(byte[] val) {
+        int dp = 0;
+        while (dp < val.length && val[dp] >=0) { dp++; }
+        throwMalformed(dp, 1);
+    }
+
     private static char repl = '\ufffd';
 
     private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
         // ascii-bais, which has a relative impact to the non-ascii-only bytes
         if (COMPACT_STRINGS && !hasNegatives(src, sp, len))

@@ -944,6 +957,134 @@
      * Throws iae, instead of replacing, if unmappble.
      */
     static byte[] getBytesUTF8NoRepl(String s) {
         return encodeUTF8(s.coder(), s.value(), false);
     }
+
+    ////////////////////// for j.n.f.Files //////////////////////////
+
+    private static boolean isASCII(byte[] src) {
+        return !hasNegatives(src, 0, src.length);
+    }
+
+    private static String newStringLatin1(byte[] src) {
+        if (COMPACT_STRINGS)
+           return new String(src, LATIN1);
+        return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
+    }
+
+    static String newStringNoRepl(byte[] src, Charset cs) {
+        if (cs == UTF_8) {
+            if (COMPACT_STRINGS && isASCII(src))
+                return new String(src, LATIN1);
+            Result ret = decodeUTF8_0(src, 0, src.length, false);
+            return new String(ret.value, ret.coder);
+        }
+        if (cs == ISO_8859_1) {
+            return newStringLatin1(src);
+        }
+        if (cs == US_ASCII) {
+            if (isASCII(src)) {
+                return newStringLatin1(src);
+            } else {
+                throwMalformed(src);
+            }
+        }
+
+        CharsetDecoder cd = cs.newDecoder();
+        // ascii fastpath
+        if ((cd instanceof ArrayDecoder) &&
+            ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
+            return newStringLatin1(src);
+        }
+        int len = src.length;
+        if (len == 0) {
+            return "";
+        }
+        int en = scale(len, cd.maxCharsPerByte());
+        char[] ca = new char[en];
+        if (cs.getClass().getClassLoader0() != null &&
+            System.getSecurityManager() != null) {
+            src = Arrays.copyOf(src, len);
+        }
+        ByteBuffer bb = ByteBuffer.wrap(src);
+        CharBuffer cb = CharBuffer.wrap(ca);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new IllegalArgumentException(x);  // todo
+        }
+        Result ret = resultCached.get().with(ca, 0, cb.position());
+        return new String(ret.value, ret.coder);
+    }
+
+    /*
+     * Throws iae, instead of replacing, if unmappble.
+     */
+    static byte[] getBytesNoRepl(String s, Charset cs) {
+        byte[] val = s.value();
+        byte coder = s.coder();
+        if (cs == UTF_8) {
+            if (isASCII(val)) {
+                return val;
+            }
+            return encodeUTF8(coder, val, false);
+        }
+        if (cs == ISO_8859_1) {
+            if (coder == LATIN1) {
+                return val;
+            }
+            return encode8859_1(coder, val, false);
+        }
+        if (cs == US_ASCII) {
+            if (coder == LATIN1) {
+                if (isASCII(val)) {
+                    return val;
+                } else {
+                    throwMalformed(val);
+                }
+            }
+        }
+        CharsetEncoder ce = cs.newEncoder();
+        // fastpath for ascii compatible
+        if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
+                                 ((ArrayEncoder)ce).isASCIICompatible() &&
+                                 isASCII(val)))) {
+            return val;
+        }
+        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
+        int en = scale(len, ce.maxBytesPerChar());
+        byte[] ba = new byte[en];
+        if (len == 0) {
+            return ba;
+        }
+        if (ce instanceof ArrayEncoder) {
+            int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
+                                          : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
+            if (blen != -1) {
+                return safeTrim(ba, blen, true);
+            }
+        }
+        boolean isTrusted = cs.getClass().getClassLoader0() == null ||
+                            System.getSecurityManager() == null;
+        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
+                                       : StringUTF16.toChars(val);
+        ByteBuffer bb = ByteBuffer.wrap(ba);
+        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
+        try {
+            CoderResult cr = ce.encode(cb, bb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = ce.flush(bb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
+        }
+        return safeTrim(ba, bb.position(), isTrusted);
+    }
 }
< prev index next >