--- old/src/java.base/share/classes/java/lang/StringCoding.java 2018-06-12 09:21:20.122802833 -0700 +++ new/src/java.base/share/classes/java/lang/StringCoding.java 2018-06-12 09:21:19.532739762 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -589,6 +589,10 @@ } private static byte[] encode8859_1(byte coder, byte[] val) { + return encode8859_1(coder, val, true); + } + + private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) { if (coder == LATIN1) { return Arrays.copyOf(val, val.length); } @@ -602,6 +606,9 @@ sp = sp + ret; dp = dp + ret; if (ret != len) { + if (!doReplace) { + throwMalformed(sp, 1); + } char c = StringUTF16.getChar(val, sp++); if (Character.isHighSurrogate(c) && sp < sl && Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { @@ -676,6 +683,12 @@ ", length : " + nb); } + private static void throwMalformed(byte[] val) { + int dp = 0; + while (dp < val.length && val[dp] >=0) { dp++; } + throwMalformed(dp, 1); + } + private static char repl = '\ufffd'; private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) { @@ -946,4 +959,132 @@ static byte[] getBytesUTF8NoRepl(String s) { return encodeUTF8(s.coder(), s.value(), false); } + + ////////////////////// for j.n.f.Files ////////////////////////// + + private static boolean isASCII(byte[] src) { + return !hasNegatives(src, 0, src.length); + } + + private static String newStringLatin1(byte[] src) { + if (COMPACT_STRINGS) + return new String(src, LATIN1); + return new String(StringLatin1.inflate(src, 0, src.length), UTF16); + } + + static String newStringNoRepl(byte[] src, Charset cs) { + if (cs == UTF_8) { + if (COMPACT_STRINGS && isASCII(src)) + return new String(src, LATIN1); + Result ret = decodeUTF8_0(src, 0, src.length, false); + return new String(ret.value, ret.coder); + } + if (cs == ISO_8859_1) { + return newStringLatin1(src); + } + if (cs == US_ASCII) { + if (isASCII(src)) { + return newStringLatin1(src); + } else { + throwMalformed(src); + } + } + + CharsetDecoder cd = cs.newDecoder(); + // ascii fastpath + if ((cd instanceof ArrayDecoder) && + ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) { + return newStringLatin1(src); + } + int len = src.length; + if (len == 0) { + return ""; + } + int en = scale(len, cd.maxCharsPerByte()); + char[] ca = new char[en]; + if (cs.getClass().getClassLoader0() != null && + System.getSecurityManager() != null) { + src = Arrays.copyOf(src, len); + } + ByteBuffer bb = ByteBuffer.wrap(src); + CharBuffer cb = CharBuffer.wrap(ca); + try { + CoderResult cr = cd.decode(bb, cb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = cd.flush(cb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new IllegalArgumentException(x); // todo + } + Result ret = resultCached.get().with(ca, 0, cb.position()); + return new String(ret.value, ret.coder); + } + + /* + * Throws iae, instead of replacing, if unmappble. + */ + static byte[] getBytesNoRepl(String s, Charset cs) { + byte[] val = s.value(); + byte coder = s.coder(); + if (cs == UTF_8) { + if (isASCII(val)) { + return val; + } + return encodeUTF8(coder, val, false); + } + if (cs == ISO_8859_1) { + if (coder == LATIN1) { + return val; + } + return encode8859_1(coder, val, false); + } + if (cs == US_ASCII) { + if (coder == LATIN1) { + if (isASCII(val)) { + return val; + } else { + throwMalformed(val); + } + } + } + CharsetEncoder ce = cs.newEncoder(); + // fastpath for ascii compatible + if (coder == LATIN1 && (((ce instanceof ArrayEncoder) && + ((ArrayEncoder)ce).isASCIICompatible() && + isASCII(val)))) { + return val; + } + int len = val.length >> coder; // assume LATIN1=0/UTF16=1; + int en = scale(len, ce.maxBytesPerChar()); + byte[] ba = new byte[en]; + if (len == 0) { + return ba; + } + if (ce instanceof ArrayEncoder) { + int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba) + : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba); + if (blen != -1) { + return safeTrim(ba, blen, true); + } + } + boolean isTrusted = cs.getClass().getClassLoader0() == null || + System.getSecurityManager() == null; + char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) + : StringUTF16.toChars(val); + ByteBuffer bb = ByteBuffer.wrap(ba); + CharBuffer cb = CharBuffer.wrap(ca, 0, len); + try { + CoderResult cr = ce.encode(cb, bb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = ce.flush(bb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new Error(x); + } + return safeTrim(ba, bb.position(), isTrusted); + } } --- old/src/java.base/share/classes/java/lang/System.java 2018-06-12 09:21:21.367935923 -0700 +++ new/src/java.base/share/classes/java/lang/System.java 2018-06-12 09:21:20.967893163 -0700 @@ -47,6 +47,7 @@ import java.security.PrivilegedAction; import java.nio.channels.Channel; import java.nio.channels.spi.SelectorProvider; +import java.nio.charset.Charset; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -2150,6 +2151,14 @@ return ModuleLayer.layers(loader); } + public String newStringNoRepl(byte[] bytes, Charset cs) { + return StringCoding.newStringNoRepl(bytes, cs); + } + + public byte[] getBytesNoRepl(String s, Charset cs) { + return StringCoding.getBytesNoRepl(s, cs); + } + public String newStringUTF8NoRepl(byte[] bytes, int off, int len) { return StringCoding.newStringUTF8NoRepl(bytes, off, len); } --- old/src/java.base/share/classes/java/nio/file/Files.java 2018-06-12 09:21:22.805089539 -0700 +++ new/src/java.base/share/classes/java/nio/file/Files.java 2018-06-12 09:21:22.386044748 -0700 @@ -3121,6 +3121,9 @@ */ private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; + private static final jdk.internal.misc.JavaLangAccess JLA = + jdk.internal.misc.SharedSecrets.getJavaLangAccess(); + /** * Reads all the bytes from an input stream. Uses {@code initialSize} as a hint * about how many bytes the stream will have. @@ -3203,6 +3206,81 @@ } /** + * Reads all content from a file into a string, decoding from bytes to characters + * using the {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}. + * The method ensures that the file is closed when all content have been read + * or an I/O error, or other runtime exception, is thrown. + * + *

This method is equivalent to: + * {@code readString(path, StandardCharsets.UTF_8) } + * + * @param path the path to the file + * + * @return a String containing the content read from the file + * + * @throws IOException + * if an I/O error occurs reading from the file or a malformed or + * unmappable byte sequence is read + * @throws OutOfMemoryError + * if the file is extremely large, for example larger than {@code 2GB} + * @throws SecurityException + * In the case of the default provider, and a security manager is + * installed, the {@link SecurityManager#checkRead(String) checkRead} + * method is invoked to check read access to the file. + * + * @since 11 + */ + public static String readString(Path path) throws IOException { + return readString(path, StandardCharsets.UTF_8); + } + + /** + * Reads all characters from a file into a string, decoding from bytes to characters + * using the specified {@linkplain Charset charset}. + * The method ensures that the file is closed when all content have been read + * or an I/O error, or other runtime exception, is thrown. + * + *

This method reads all content including the line separators in the middle + * and/or at the end. The resulting string will contain line separators as they + * appear in the file. + * + * @apiNote + * This method is intended for simple cases where it is appropriate and convenient + * to read the content of a file into a String. It is not intended for reading + * very large files. + * + * + * + * @param path the path to the file + * @param cs the charset to use for decoding + * + * @return a String containing the content read from the file + * + * @throws IOException + * if an I/O error occurs reading from the file or a malformed or + * unmappable byte sequence is read + * @throws OutOfMemoryError + * if the file is extremely large, for example larger than {@code 2GB} + * @throws SecurityException + * In the case of the default provider, and a security manager is + * installed, the {@link SecurityManager#checkRead(String) checkRead} + * method is invoked to check read access to the file. + * + * @since 11 + */ + public static String readString(Path path, Charset cs) throws IOException { + Objects.requireNonNull(path); + Objects.requireNonNull(cs); + + byte[] ba = readAllBytes(path); + try { + return JLA.newStringNoRepl(ba, cs); + } catch (IllegalArgumentException e) { + throw new IOException(e); + } + } + + /** * Read all lines from a file. This method ensures that the file is * closed when all bytes have been read or an I/O error, or other runtime * exception, is thrown. Bytes from the file are decoded into characters @@ -3456,6 +3534,110 @@ return write(path, lines, StandardCharsets.UTF_8, options); } + /** + * Write a {@linkplain java.lang.CharSequence CharSequence} to a file. + * Characters are encoded into bytes using the + * {@link StandardCharsets#UTF_8 UTF-8} {@link Charset charset}. + * + *

This method is equivalent to: + * {@code writeString(path, test, StandardCharsets.UTF_8, options) } + * + * @param path + * the path to the file + * @param csq + * the CharSequence to be written + * @param options + * options specifying how the file is opened + * + * @return the path + * + * @throws IllegalArgumentException + * if {@code options} contains an invalid combination of options + * @throws IOException + * if an I/O error occurs writing to or creating the file, or the + * text cannot be encoded using the specified charset + * @throws UnsupportedOperationException + * if an unsupported option is specified + * @throws SecurityException + * In the case of the default provider, and a security manager is + * installed, the {@link SecurityManager#checkWrite(String) checkWrite} + * method is invoked to check write access to the file. The {@link + * SecurityManager#checkDelete(String) checkDelete} method is + * invoked to check delete access if the file is opened with the + * {@code DELETE_ON_CLOSE} option. + * + * @since 11 + */ + public static Path writeString(Path path, CharSequence csq, OpenOption... options) + throws IOException + { + return writeString(path, csq, StandardCharsets.UTF_8, options); + } + + /** + * Write a {@linkplain java.lang.CharSequence CharSequence} to a file. + * Characters are encoded into bytes using the specified + * {@linkplain java.nio.charset.Charset charset}. + * + *

All characters are written as they are, including the line separators in + * the char sequence. No extra characters are added. + * + *

The {@code options} parameter specifies how the file is created + * or opened. If no options are present then this method works as if the + * {@link StandardOpenOption#CREATE CREATE}, {@link + * StandardOpenOption#TRUNCATE_EXISTING TRUNCATE_EXISTING}, and {@link + * StandardOpenOption#WRITE WRITE} options are present. In other words, it + * opens the file for writing, creating the file if it doesn't exist, or + * initially truncating an existing {@link #isRegularFile regular-file} to + * a size of {@code 0}. + * + * + * @param path + * the path to the file + * @param csq + * the CharSequence to be written + * @param cs + * the charset to use for encoding + * @param options + * options specifying how the file is opened + * + * @return the path + * + * @throws IllegalArgumentException + * if {@code options} contains an invalid combination of options + * @throws IOException + * if an I/O error occurs writing to or creating the file, or the + * text cannot be encoded using the specified charset + * @throws UnsupportedOperationException + * if an unsupported option is specified + * @throws SecurityException + * In the case of the default provider, and a security manager is + * installed, the {@link SecurityManager#checkWrite(String) checkWrite} + * method is invoked to check write access to the file. The {@link + * SecurityManager#checkDelete(String) checkDelete} method is + * invoked to check delete access if the file is opened with the + * {@code DELETE_ON_CLOSE} option. + * + * @since 11 + */ + public static Path writeString(Path path, CharSequence csq, Charset cs, OpenOption... options) + throws IOException + { + // ensure the text is not null before opening file + Objects.requireNonNull(path); + Objects.requireNonNull(csq); + Objects.requireNonNull(cs); + + try { + byte[] bytes = JLA.getBytesNoRepl(String.valueOf(csq), cs); + write(path, bytes, options); + } catch (IllegalArgumentException e) { + throw new IOException(e); + } + + return path; + } + // -- Stream APIs -- /** --- old/src/java.base/share/classes/jdk/internal/misc/JavaLangAccess.java 2018-06-12 09:21:24.159234282 -0700 +++ new/src/java.base/share/classes/jdk/internal/misc/JavaLangAccess.java 2018-06-12 09:21:23.758191415 -0700 @@ -30,6 +30,7 @@ import java.lang.reflect.Executable; import java.lang.reflect.Method; import java.net.URI; +import java.nio.charset.Charset; import java.security.AccessControlContext; import java.security.ProtectionDomain; import java.util.Iterator; @@ -256,6 +257,36 @@ Stream layers(ClassLoader loader); /** + * Constructs a new {@code String} by decoding the specified subarray of + * bytes using the specified {@linkplain java.nio.charset.Charset charset}. + * + * The caller of this method shall relinquish and transfer the ownership of + * the byte array to the callee since the later will not make a copy. + * + * @param bytes the byte array source + * @param cs the Charset + * @return the newly created string + * @throws IllegalArgumentException for malformed or unmappable bytes + */ + String newStringNoRepl(byte[] bytes, Charset cs); + + /** + * Encode the given string into a sequence of bytes using the specified Charset. + * + * This method avoids copying the String's internal representation if the input + * is ASCII. + * + * This method throws IllegalArgumentException instead of replacing when + * malformed input or unmappable characters are encountered. + * + * @param s the string to encode + * @param cs the charset + * @return the encoded bytes + * @throws IllegalArgumentException for malformed input or unmappable characters + */ + byte[] getBytesNoRepl(String s, Charset cs); + + /** * Returns a new string by decoding from the given utf8 bytes array. * * @param off the index of the first byte to decode --- /dev/null 2018-05-07 23:51:01.519000000 -0700 +++ new/test/jdk/java/nio/file/Files/ReadWriteString.java 2018-06-12 09:21:24.985322581 -0700 @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.Charset; +import static java.nio.charset.StandardCharsets.US_ASCII; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; +import java.nio.file.Files; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import static java.nio.file.StandardOpenOption.APPEND; +import java.util.Random; +import java.util.concurrent.Callable; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +/* @test + * @bug 8201276 + * @build ReadWriteString PassThroughFileSystem + * @run testng ReadWriteString + * @summary Unit test for methods for Files readString and write methods. + * @key randomness + */ +@Test(groups = "readwrite") +public class ReadWriteString { + + private static final OpenOption OPTION_CREATE = StandardOpenOption.CREATE; + // data for text files + private static final String EN_STRING = "The quick brown fox jumps over the lazy dog"; + private static final String JA_STRING = "\u65e5\u672c\u8a9e\u6587\u5b57\u5217"; + // malformed input: a high surrogate without the low surrogate + static char[] illChars = { + '\u00fa', '\ud800' + }; + + static byte[] data = getData(); + + static byte[] getData() { + try { + String str1 = "A string that contains "; + String str2 = " , an invalid character for UTF-8."; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(str1.getBytes()); + baos.write(0xFA); + baos.write(str2.getBytes()); + return baos.toByteArray(); + } catch (IOException ex) { + return null; //shouldn't happen + } + } + + // file used by most tests + private Path tmpfile; + + + /* + * DataProvider for malformed write test. Provides the following fields: + * file path, malformed input string, charset + */ + @DataProvider(name = "malformedWrite") + public Object[][] getMalformedWrite() throws IOException { + Path path = Files.createTempFile("malformedWrite", null); + return new Object[][]{ + {path, "\ud800", null}, //the default Charset is UTF_8 + {path, "\u00A0\u00A1", US_ASCII}, + {path, "\ud800", UTF_8}, + {path, JA_STRING, ISO_8859_1}, + }; + } + + /* + * DataProvider for illegal input test + * Writes the data in ISO8859 and reads with UTF_8, expects MalformedInputException + */ + @DataProvider(name = "illegalInput") + public Object[][] getIllegalInput() throws IOException { + Path path = Files.createTempFile("illegalInput", null); + return new Object[][]{ + {path, data, ISO_8859_1, null}, + {path, data, ISO_8859_1, UTF_8} + }; + } + + @BeforeClass + void setup() throws IOException { + tmpfile = Files.createTempFile("readWriteString", null); + } + + @AfterClass + void cleanup() throws IOException { + Files.deleteIfExists(tmpfile); + } + + /** + * Verifies that NPE is thrown when one of the parameters is null. + */ + @Test + public void testNulls() { + Path path = Paths.get("."); + String s = "abc"; + + checkNullPointerException(() -> Files.readString((Path) null)); + checkNullPointerException(() -> Files.readString((Path) null, UTF_8)); + checkNullPointerException(() -> Files.readString(path, (Charset) null)); + + checkNullPointerException(() -> Files.writeString((Path) null, s, OPTION_CREATE)); + checkNullPointerException(() -> Files.writeString(path, (CharSequence) null, OPTION_CREATE)); + checkNullPointerException(() -> Files.writeString(path, s, (OpenOption[]) null)); + + checkNullPointerException(() -> Files.writeString((Path) null, s, UTF_8, OPTION_CREATE)); + checkNullPointerException(() -> Files.writeString(path, (CharSequence) null, UTF_8, OPTION_CREATE)); + checkNullPointerException(() -> Files.writeString(path, s, (Charset) null, OPTION_CREATE)); + checkNullPointerException(() -> Files.writeString(path, s, UTF_8, (OpenOption[]) null)); + } + + /** + * Verifies the readString and write String methods. Writes to files Strings + * of various sizes, with/without specifying the Charset, and then compares + * the result of reading the files. + */ + @Test + public void testReadWrite() throws IOException { + int size = 0; + while (size < 16 * 1024) { + testReadWrite(size, null, false); + testReadWrite(size, null, true); + testReadWrite(size, UTF_8, false); + testReadWrite(size, UTF_8, true); + size += 1024; + } + } + + /** + * Verifies that IOException is thrown (as specified) when giving a malformed + * string input. + * + * @param path the path to write + * @param s the string + * @param cs the Charset + * @throws IOException if the input is malformed + */ + @Test(dataProvider = "malformedWrite", expectedExceptions = IOException.class) + public void testMalformedWrite(Path path, String s, Charset cs) throws IOException { + path.toFile().deleteOnExit(); + if (cs == null) { + Files.writeString(path, s, OPTION_CREATE); + } else { + Files.writeString(path, s, cs, OPTION_CREATE); + } + } + + /** + * Verifies that IOException is thrown when reading a file using the wrong + * Charset. + * + * @param path the path to write and read + * @param data the data used for the test + * @param csWrite the Charset to use for writing the test file + * @param csRead the Charset to use for reading the file + * @throws IOException when the Charset used for reading the file is incorrect + */ + @Test(dataProvider = "illegalInput", expectedExceptions = IOException.class) + public void testMalformedRead(Path path, byte[] data, Charset csWrite, Charset csRead) throws IOException { + path.toFile().deleteOnExit(); + String temp = new String(data, csWrite); + Files.writeString(path, temp, csWrite, OPTION_CREATE); + String s; + if (csRead == null) { + s = Files.readString(path); + } else { + s = Files.readString(path, csRead); + } + } + + private void checkNullPointerException(Callable c) { + try { + c.call(); + fail("NullPointerException expected"); + } catch (NullPointerException ignore) { + } catch (Exception e) { + fail(e + " not expected"); + } + } + + private void testReadWrite(int size, Charset cs, boolean append) throws IOException { + StringBuilder sb = new StringBuilder(size); + String expected; + String str = generateString(size); + Path result; + if (cs == null) { + result = Files.writeString(tmpfile, str); + } else { + result = Files.writeString(tmpfile, str, cs); + } + + //System.out.println(result.toUri().toASCIIString()); + assertTrue(result == tmpfile); + if (append) { + if (cs == null) { + Files.writeString(tmpfile, str, APPEND); + } else { + Files.writeString(tmpfile, str, cs, APPEND); + } + assertTrue(Files.size(tmpfile) == size * 2); + } + + + if (append) { + sb.append(str).append(str); + expected = sb.toString(); + } else { + expected = str; + } + + String read; + if (cs == null) { + read = Files.readString(result); + } else { + read = Files.readString(result, cs); + } + //System.out.println("chars read: " + read.length()); + //System.out.println(read); + //System.out.println("---end---"); + assertTrue(read.equals(expected), "String read not the same as written"); + } + + static final char[] CHARS = "abcdefghijklmnopqrstuvwxyz \r\n".toCharArray(); + StringBuilder sb = new StringBuilder(512); + Random random = new Random(); + + private String generateString(int size) { + sb.setLength(0); + for (int i = 0; i < size; i++) { + char c = CHARS[random.nextInt(CHARS.length)]; + sb.append(c); + } + + return sb.toString(); + } +}