1 /*
2 * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
572 return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
573 } else {
574 return result.with(StringLatin1.inflate(ba, off, len), UTF16);
575 }
576 }
577
578 @HotSpotIntrinsicCandidate
579 private static int implEncodeISOArray(byte[] sa, int sp,
580 byte[] da, int dp, int len) {
581 int i = 0;
582 for (; i < len; i++) {
583 char c = StringUTF16.getChar(sa, sp++);
584 if (c > '\u00FF')
585 break;
586 da[dp++] = (byte)c;
587 }
588 return i;
589 }
590
591 private static byte[] encode8859_1(byte coder, byte[] val) {
592 if (coder == LATIN1) {
593 return Arrays.copyOf(val, val.length);
594 }
595 int len = val.length >> 1;
596 byte[] dst = new byte[len];
597 int dp = 0;
598 int sp = 0;
599 int sl = len;
600 while (sp < sl) {
601 int ret = implEncodeISOArray(val, sp, dst, dp, len);
602 sp = sp + ret;
603 dp = dp + ret;
604 if (ret != len) {
605 char c = StringUTF16.getChar(val, sp++);
606 if (Character.isHighSurrogate(c) && sp < sl &&
607 Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
608 sp++;
609 }
610 dst[dp++] = '?';
611 len = sl - sp;
612 }
613 }
614 if (dp == dst.length) {
615 return dst;
616 }
617 return Arrays.copyOf(dst, dp);
618 }
619
620 //////////////////////////////// utf8 ////////////////////////////////////
621
622 private static boolean isNotContinuation(int b) {
623 return (b & 0xc0) != 0x80;
624 }
659 int b1 = src[sp++] & 0xff;
660 int b2 = src[sp++] & 0xff;
661 if (b1 > 0xf4 ||
662 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
663 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
664 isNotContinuation(b2))
665 return 1;
666 if (isNotContinuation(src[sp++]))
667 return 2;
668 return 3;
669 }
670 assert false;
671 return -1;
672 }
673
674 private static void throwMalformed(int off, int nb) {
675 throw new IllegalArgumentException("malformed input off : " + off +
676 ", length : " + nb);
677 }
678
679 private static char repl = '\ufffd';
680
681 private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
682 // ascii-bais, which has a relative impact to the non-ascii-only bytes
683 if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
684 return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
685 LATIN1);
686 return decodeUTF8_0(src, sp, len, doReplace);
687 }
688
689 private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
690 Result ret = resultCached.get();
691
692 int sl = sp + len;
693 int dp = 0;
694 byte[] dst = new byte[len];
695
696 if (COMPACT_STRINGS) {
697 while (sp < sl) {
698 int b1 = src[sp];
928 return Arrays.copyOf(dst, dp);
929 }
930
931 ////////////////////// for j.u.z.ZipCoder //////////////////////////
932
933 /*
934 * Throws iae, instead of replacing, if malformed or unmappble.
935 */
936 static String newStringUTF8NoRepl(byte[] src, int off, int len) {
937 if (COMPACT_STRINGS && !hasNegatives(src, off, len))
938 return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
939 Result ret = decodeUTF8_0(src, off, len, false);
940 return new String(ret.value, ret.coder);
941 }
942
943 /*
944 * Throws iae, instead of replacing, if unmappble.
945 */
946 static byte[] getBytesUTF8NoRepl(String s) {
947 return encodeUTF8(s.coder(), s.value(), false);
948 }
949 }
|
1 /*
2 * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
572 return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
573 } else {
574 return result.with(StringLatin1.inflate(ba, off, len), UTF16);
575 }
576 }
577
578 @HotSpotIntrinsicCandidate
579 private static int implEncodeISOArray(byte[] sa, int sp,
580 byte[] da, int dp, int len) {
581 int i = 0;
582 for (; i < len; i++) {
583 char c = StringUTF16.getChar(sa, sp++);
584 if (c > '\u00FF')
585 break;
586 da[dp++] = (byte)c;
587 }
588 return i;
589 }
590
591 private static byte[] encode8859_1(byte coder, byte[] val) {
592 return encode8859_1(coder, val, true);
593 }
594
595 private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
596 if (coder == LATIN1) {
597 return Arrays.copyOf(val, val.length);
598 }
599 int len = val.length >> 1;
600 byte[] dst = new byte[len];
601 int dp = 0;
602 int sp = 0;
603 int sl = len;
604 while (sp < sl) {
605 int ret = implEncodeISOArray(val, sp, dst, dp, len);
606 sp = sp + ret;
607 dp = dp + ret;
608 if (ret != len) {
609 if (!doReplace) {
610 throwMalformed(sp, 1);
611 }
612 char c = StringUTF16.getChar(val, sp++);
613 if (Character.isHighSurrogate(c) && sp < sl &&
614 Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
615 sp++;
616 }
617 dst[dp++] = '?';
618 len = sl - sp;
619 }
620 }
621 if (dp == dst.length) {
622 return dst;
623 }
624 return Arrays.copyOf(dst, dp);
625 }
626
627 //////////////////////////////// utf8 ////////////////////////////////////
628
629 private static boolean isNotContinuation(int b) {
630 return (b & 0xc0) != 0x80;
631 }
666 int b1 = src[sp++] & 0xff;
667 int b2 = src[sp++] & 0xff;
668 if (b1 > 0xf4 ||
669 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
670 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
671 isNotContinuation(b2))
672 return 1;
673 if (isNotContinuation(src[sp++]))
674 return 2;
675 return 3;
676 }
677 assert false;
678 return -1;
679 }
680
681 private static void throwMalformed(int off, int nb) {
682 throw new IllegalArgumentException("malformed input off : " + off +
683 ", length : " + nb);
684 }
685
686 private static void throwMalformed(byte[] val) {
687 int dp = 0;
688 while (dp < val.length && val[dp] >=0) { dp++; }
689 throwMalformed(dp, 1);
690 }
691
692 private static char repl = '\ufffd';
693
694 private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
695 // ascii-bais, which has a relative impact to the non-ascii-only bytes
696 if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
697 return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
698 LATIN1);
699 return decodeUTF8_0(src, sp, len, doReplace);
700 }
701
702 private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
703 Result ret = resultCached.get();
704
705 int sl = sp + len;
706 int dp = 0;
707 byte[] dst = new byte[len];
708
709 if (COMPACT_STRINGS) {
710 while (sp < sl) {
711 int b1 = src[sp];
941 return Arrays.copyOf(dst, dp);
942 }
943
944 ////////////////////// for j.u.z.ZipCoder //////////////////////////
945
946 /*
947 * Throws iae, instead of replacing, if malformed or unmappble.
948 */
949 static String newStringUTF8NoRepl(byte[] src, int off, int len) {
950 if (COMPACT_STRINGS && !hasNegatives(src, off, len))
951 return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
952 Result ret = decodeUTF8_0(src, off, len, false);
953 return new String(ret.value, ret.coder);
954 }
955
956 /*
957 * Throws iae, instead of replacing, if unmappble.
958 */
959 static byte[] getBytesUTF8NoRepl(String s) {
960 return encodeUTF8(s.coder(), s.value(), false);
961 }
962
963 ////////////////////// for j.n.f.Files //////////////////////////
964
965 private static boolean isASCII(byte[] src) {
966 return !hasNegatives(src, 0, src.length);
967 }
968
969 private static String newStringLatin1(byte[] src) {
970 if (COMPACT_STRINGS)
971 return new String(src, LATIN1);
972 return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
973 }
974
975 static String newStringNoRepl(byte[] src, Charset cs) {
976 if (cs == UTF_8) {
977 if (COMPACT_STRINGS && isASCII(src))
978 return new String(src, LATIN1);
979 Result ret = decodeUTF8_0(src, 0, src.length, false);
980 return new String(ret.value, ret.coder);
981 }
982 if (cs == ISO_8859_1) {
983 return newStringLatin1(src);
984 }
985 if (cs == US_ASCII) {
986 if (isASCII(src)) {
987 return newStringLatin1(src);
988 } else {
989 throwMalformed(src);
990 }
991 }
992
993 CharsetDecoder cd = cs.newDecoder();
994 // ascii fastpath
995 if ((cd instanceof ArrayDecoder) &&
996 ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
997 return newStringLatin1(src);
998 }
999 int len = src.length;
1000 if (len == 0) {
1001 return "";
1002 }
1003 int en = scale(len, cd.maxCharsPerByte());
1004 char[] ca = new char[en];
1005 if (cs.getClass().getClassLoader0() != null &&
1006 System.getSecurityManager() != null) {
1007 src = Arrays.copyOf(src, len);
1008 }
1009 ByteBuffer bb = ByteBuffer.wrap(src);
1010 CharBuffer cb = CharBuffer.wrap(ca);
1011 try {
1012 CoderResult cr = cd.decode(bb, cb, true);
1013 if (!cr.isUnderflow())
1014 cr.throwException();
1015 cr = cd.flush(cb);
1016 if (!cr.isUnderflow())
1017 cr.throwException();
1018 } catch (CharacterCodingException x) {
1019 throw new IllegalArgumentException(x); // todo
1020 }
1021 Result ret = resultCached.get().with(ca, 0, cb.position());
1022 return new String(ret.value, ret.coder);
1023 }
1024
1025 /*
1026 * Throws iae, instead of replacing, if unmappble.
1027 */
1028 static byte[] getBytesNoRepl(String s, Charset cs) {
1029 byte[] val = s.value();
1030 byte coder = s.coder();
1031 if (cs == UTF_8) {
1032 if (isASCII(val)) {
1033 return val;
1034 }
1035 return encodeUTF8(coder, val, false);
1036 }
1037 if (cs == ISO_8859_1) {
1038 if (coder == LATIN1) {
1039 return val;
1040 }
1041 return encode8859_1(coder, val, false);
1042 }
1043 if (cs == US_ASCII) {
1044 if (coder == LATIN1) {
1045 if (isASCII(val)) {
1046 return val;
1047 } else {
1048 throwMalformed(val);
1049 }
1050 }
1051 }
1052 CharsetEncoder ce = cs.newEncoder();
1053 // fastpath for ascii compatible
1054 if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
1055 ((ArrayEncoder)ce).isASCIICompatible() &&
1056 isASCII(val)))) {
1057 return val;
1058 }
1059 int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
1060 int en = scale(len, ce.maxBytesPerChar());
1061 byte[] ba = new byte[en];
1062 if (len == 0) {
1063 return ba;
1064 }
1065 if (ce instanceof ArrayEncoder) {
1066 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
1067 : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
1068 if (blen != -1) {
1069 return safeTrim(ba, blen, true);
1070 }
1071 }
1072 boolean isTrusted = cs.getClass().getClassLoader0() == null ||
1073 System.getSecurityManager() == null;
1074 char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
1075 : StringUTF16.toChars(val);
1076 ByteBuffer bb = ByteBuffer.wrap(ba);
1077 CharBuffer cb = CharBuffer.wrap(ca, 0, len);
1078 try {
1079 CoderResult cr = ce.encode(cb, bb, true);
1080 if (!cr.isUnderflow())
1081 cr.throwException();
1082 cr = ce.flush(bb);
1083 if (!cr.isUnderflow())
1084 cr.throwException();
1085 } catch (CharacterCodingException x) {
1086 throw new Error(x);
1087 }
1088 return safeTrim(ba, bb.position(), isTrusted);
1089 }
1090 }
|