--- old/src/java.base/share/classes/java/lang/StringCoding.java 2018-06-12 09:21:20.122802833 -0700 +++ new/src/java.base/share/classes/java/lang/StringCoding.java 2018-06-12 09:21:19.532739762 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -589,6 +589,10 @@ } private static byte[] encode8859_1(byte coder, byte[] val) { + return encode8859_1(coder, val, true); + } + + private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) { if (coder == LATIN1) { return Arrays.copyOf(val, val.length); } @@ -602,6 +606,9 @@ sp = sp + ret; dp = dp + ret; if (ret != len) { + if (!doReplace) { + throwMalformed(sp, 1); + } char c = StringUTF16.getChar(val, sp++); if (Character.isHighSurrogate(c) && sp < sl && Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { @@ -676,6 +683,12 @@ ", length : " + nb); } + private static void throwMalformed(byte[] val) { + int dp = 0; + while (dp < val.length && val[dp] >=0) { dp++; } + throwMalformed(dp, 1); + } + private static char repl = '\ufffd'; private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) { @@ -946,4 +959,132 @@ static byte[] getBytesUTF8NoRepl(String s) { return encodeUTF8(s.coder(), s.value(), false); } + + ////////////////////// for j.n.f.Files ////////////////////////// + + private static boolean isASCII(byte[] src) { + return !hasNegatives(src, 0, src.length); + } + + private static String newStringLatin1(byte[] src) { + if (COMPACT_STRINGS) + return new String(src, LATIN1); + return new String(StringLatin1.inflate(src, 0, src.length), UTF16); + } + + static String newStringNoRepl(byte[] src, Charset cs) { + if (cs == UTF_8) { + if (COMPACT_STRINGS && isASCII(src)) + return new String(src, LATIN1); + Result ret = decodeUTF8_0(src, 0, src.length, false); + return new String(ret.value, ret.coder); + } + if (cs == ISO_8859_1) { + return newStringLatin1(src); + } + if (cs == US_ASCII) { + if (isASCII(src)) { + return newStringLatin1(src); + } else { + throwMalformed(src); + } + } + + CharsetDecoder cd = cs.newDecoder(); + // ascii fastpath + if ((cd instanceof ArrayDecoder) && + ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) { + return newStringLatin1(src); + } + int len = src.length; + if (len == 0) { + return ""; + } + int en = scale(len, cd.maxCharsPerByte()); + char[] ca = new char[en]; + if (cs.getClass().getClassLoader0() != null && + System.getSecurityManager() != null) { + src = Arrays.copyOf(src, len); + } + ByteBuffer bb = ByteBuffer.wrap(src); + CharBuffer cb = CharBuffer.wrap(ca); + try { + CoderResult cr = cd.decode(bb, cb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = cd.flush(cb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new IllegalArgumentException(x); // todo + } + Result ret = resultCached.get().with(ca, 0, cb.position()); + return new String(ret.value, ret.coder); + } + + /* + * Throws iae, instead of replacing, if unmappble. + */ + static byte[] getBytesNoRepl(String s, Charset cs) { + byte[] val = s.value(); + byte coder = s.coder(); + if (cs == UTF_8) { + if (isASCII(val)) { + return val; + } + return encodeUTF8(coder, val, false); + } + if (cs == ISO_8859_1) { + if (coder == LATIN1) { + return val; + } + return encode8859_1(coder, val, false); + } + if (cs == US_ASCII) { + if (coder == LATIN1) { + if (isASCII(val)) { + return val; + } else { + throwMalformed(val); + } + } + } + CharsetEncoder ce = cs.newEncoder(); + // fastpath for ascii compatible + if (coder == LATIN1 && (((ce instanceof ArrayEncoder) && + ((ArrayEncoder)ce).isASCIICompatible() && + isASCII(val)))) { + return val; + } + int len = val.length >> coder; // assume LATIN1=0/UTF16=1; + int en = scale(len, ce.maxBytesPerChar()); + byte[] ba = new byte[en]; + if (len == 0) { + return ba; + } + if (ce instanceof ArrayEncoder) { + int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba) + : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba); + if (blen != -1) { + return safeTrim(ba, blen, true); + } + } + boolean isTrusted = cs.getClass().getClassLoader0() == null || + System.getSecurityManager() == null; + char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) + : StringUTF16.toChars(val); + ByteBuffer bb = ByteBuffer.wrap(ba); + CharBuffer cb = CharBuffer.wrap(ca, 0, len); + try { + CoderResult cr = ce.encode(cb, bb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = ce.flush(bb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new Error(x); + } + return safeTrim(ba, bb.position(), isTrusted); + } } --- old/src/java.base/share/classes/java/lang/System.java 2018-06-12 09:21:21.367935923 -0700 +++ new/src/java.base/share/classes/java/lang/System.java 2018-06-12 09:21:20.967893163 -0700 @@ -47,6 +47,7 @@ import java.security.PrivilegedAction; import java.nio.channels.Channel; import java.nio.channels.spi.SelectorProvider; +import java.nio.charset.Charset; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -2150,6 +2151,14 @@ return ModuleLayer.layers(loader); } + public String newStringNoRepl(byte[] bytes, Charset cs) { + return StringCoding.newStringNoRepl(bytes, cs); + } + + public byte[] getBytesNoRepl(String s, Charset cs) { + return StringCoding.getBytesNoRepl(s, cs); + } + public String newStringUTF8NoRepl(byte[] bytes, int off, int len) { return StringCoding.newStringUTF8NoRepl(bytes, off, len); } --- old/src/java.base/share/classes/java/nio/file/Files.java 2018-06-12 09:21:22.805089539 -0700 +++ new/src/java.base/share/classes/java/nio/file/Files.java 2018-06-12 09:21:22.386044748 -0700 @@ -3121,6 +3121,9 @@ */ private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; + private static final jdk.internal.misc.JavaLangAccess JLA = + jdk.internal.misc.SharedSecrets.getJavaLangAccess(); + /** * Reads all the bytes from an input stream. Uses {@code initialSize} as a hint * about how many bytes the stream will have. @@ -3203,6 +3206,81 @@ } /** + * Reads all content from a file into a string, decoding from bytes to characters + * using the {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}. + * The method ensures that the file is closed when all content have been read + * or an I/O error, or other runtime exception, is thrown. + * + *
This method is equivalent to: + * {@code readString(path, StandardCharsets.UTF_8) } + * + * @param path the path to the file + * + * @return a String containing the content read from the file + * + * @throws IOException + * if an I/O error occurs reading from the file or a malformed or + * unmappable byte sequence is read + * @throws OutOfMemoryError + * if the file is extremely large, for example larger than {@code 2GB} + * @throws SecurityException + * In the case of the default provider, and a security manager is + * installed, the {@link SecurityManager#checkRead(String) checkRead} + * method is invoked to check read access to the file. + * + * @since 11 + */ + public static String readString(Path path) throws IOException { + return readString(path, StandardCharsets.UTF_8); + } + + /** + * Reads all characters from a file into a string, decoding from bytes to characters + * using the specified {@linkplain Charset charset}. + * The method ensures that the file is closed when all content have been read + * or an I/O error, or other runtime exception, is thrown. + * + *
This method reads all content including the line separators in the middle + * and/or at the end. The resulting string will contain line separators as they + * appear in the file. + * + * @apiNote + * This method is intended for simple cases where it is appropriate and convenient + * to read the content of a file into a String. It is not intended for reading + * very large files. + * + * + * + * @param path the path to the file + * @param cs the charset to use for decoding + * + * @return a String containing the content read from the file + * + * @throws IOException + * if an I/O error occurs reading from the file or a malformed or + * unmappable byte sequence is read + * @throws OutOfMemoryError + * if the file is extremely large, for example larger than {@code 2GB} + * @throws SecurityException + * In the case of the default provider, and a security manager is + * installed, the {@link SecurityManager#checkRead(String) checkRead} + * method is invoked to check read access to the file. + * + * @since 11 + */ + public static String readString(Path path, Charset cs) throws IOException { + Objects.requireNonNull(path); + Objects.requireNonNull(cs); + + byte[] ba = readAllBytes(path); + try { + return JLA.newStringNoRepl(ba, cs); + } catch (IllegalArgumentException e) { + throw new IOException(e); + } + } + + /** * Read all lines from a file. This method ensures that the file is * closed when all bytes have been read or an I/O error, or other runtime * exception, is thrown. Bytes from the file are decoded into characters @@ -3456,6 +3534,110 @@ return write(path, lines, StandardCharsets.UTF_8, options); } + /** + * Write a {@linkplain java.lang.CharSequence CharSequence} to a file. + * Characters are encoded into bytes using the + * {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}. + * + *
This method is equivalent to: + * {@code writeString(path, test, StandardCharsets.UTF_8, options) } + * + * @param path + * the path to the file + * @param csq + * the CharSequence to be written + * @param options + * options specifying how the file is opened + * + * @return the path + * + * @throws IllegalArgumentException + * if {@code options} contains an invalid combination of options + * @throws IOException + * if an I/O error occurs writing to or creating the file, or the + * text cannot be encoded using the specified charset + * @throws UnsupportedOperationException + * if an unsupported option is specified + * @throws SecurityException + * In the case of the default provider, and a security manager is + * installed, the {@link SecurityManager#checkWrite(String) checkWrite} + * method is invoked to check write access to the file. The {@link + * SecurityManager#checkDelete(String) checkDelete} method is + * invoked to check delete access if the file is opened with the + * {@code DELETE_ON_CLOSE} option. + * + * @since 11 + */ + public static Path writeString(Path path, CharSequence csq, OpenOption... options) + throws IOException + { + return writeString(path, csq, StandardCharsets.UTF_8, options); + } + + /** + * Write a {@linkplain java.lang.CharSequence CharSequence} to a file. + * Characters are encoded into bytes using the specified + * {@linkplain java.nio.charset.Charset charset}. + * + *
All characters are written as they are, including the line separators in + * the char sequence. No extra characters are added. + * + *
The {@code options} parameter specifies how the file is created
+ * or opened. If no options are present then this method works as if the
+ * {@link StandardOpenOption#CREATE CREATE}, {@link
+ * StandardOpenOption#TRUNCATE_EXISTING TRUNCATE_EXISTING}, and {@link
+ * StandardOpenOption#WRITE WRITE} options are present. In other words, it
+ * opens the file for writing, creating the file if it doesn't exist, or
+ * initially truncating an existing {@link #isRegularFile regular-file} to
+ * a size of {@code 0}.
+ *
+ *
+ * @param path
+ * the path to the file
+ * @param csq
+ * the CharSequence to be written
+ * @param cs
+ * the charset to use for encoding
+ * @param options
+ * options specifying how the file is opened
+ *
+ * @return the path
+ *
+ * @throws IllegalArgumentException
+ * if {@code options} contains an invalid combination of options
+ * @throws IOException
+ * if an I/O error occurs writing to or creating the file, or the
+ * text cannot be encoded using the specified charset
+ * @throws UnsupportedOperationException
+ * if an unsupported option is specified
+ * @throws SecurityException
+ * In the case of the default provider, and a security manager is
+ * installed, the {@link SecurityManager#checkWrite(String) checkWrite}
+ * method is invoked to check write access to the file. The {@link
+ * SecurityManager#checkDelete(String) checkDelete} method is
+ * invoked to check delete access if the file is opened with the
+ * {@code DELETE_ON_CLOSE} option.
+ *
+ * @since 11
+ */
+ public static Path writeString(Path path, CharSequence csq, Charset cs, OpenOption... options)
+ throws IOException
+ {
+ // ensure the text is not null before opening file
+ Objects.requireNonNull(path);
+ Objects.requireNonNull(csq);
+ Objects.requireNonNull(cs);
+
+ try {
+ byte[] bytes = JLA.getBytesNoRepl(String.valueOf(csq), cs);
+ write(path, bytes, options);
+ } catch (IllegalArgumentException e) {
+ throw new IOException(e);
+ }
+
+ return path;
+ }
+
// -- Stream APIs --
/**
--- old/src/java.base/share/classes/jdk/internal/misc/JavaLangAccess.java 2018-06-12 09:21:24.159234282 -0700
+++ new/src/java.base/share/classes/jdk/internal/misc/JavaLangAccess.java 2018-06-12 09:21:23.758191415 -0700
@@ -30,6 +30,7 @@
import java.lang.reflect.Executable;
import java.lang.reflect.Method;
import java.net.URI;
+import java.nio.charset.Charset;
import java.security.AccessControlContext;
import java.security.ProtectionDomain;
import java.util.Iterator;
@@ -256,6 +257,36 @@
Stream