1 /*
   2  * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * @LastModified: Oct 2017
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 
  23 package com.sun.org.apache.xml.internal.serialize;
  24 
  25 
  26 import com.sun.org.apache.xerces.internal.util.EncodingMap;
  27 import java.io.UnsupportedEncodingException;
  28 import java.util.Locale;
  29 import java.util.Map;
  30 import java.util.concurrent.ConcurrentHashMap;
  31 
  32 
  33 /**
  34  * Provides information about encodings. Depends on the Java runtime
  35  * to provides writers for the different encodings, but can be used
  36  * to override encoding names and provide the last printable character
  37  * for each encoding.
  38  *
  39  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  40  *
  41  * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation
  42  * is replaced by that of Xalan. Main class
  43  * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced
  44  * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}.
  45  */
  46 @Deprecated
  47 class Encodings
  48 {
  49 
  50 
  51     /**
  52      * The last printable character for unknown encodings.
  53      */
  54     static final int DEFAULT_LAST_PRINTABLE = 0x7F;
  55 
  56     // last printable character for Unicode-compatible encodings
  57     static final int LAST_PRINTABLE_UNICODE = 0xffff;
  58     // unicode-compliant encodings; can express plane 0
  59     static final String[] UNICODE_ENCODINGS = {
  60         "Unicode", "UnicodeBig", "UnicodeLittle", "GB2312", "UTF8", "UTF-16",
  61     };
  62     // default (Java) encoding if none supplied:
  63     static final String DEFAULT_ENCODING = "UTF8";
  64 
  65     // note that the size of this Map
  66     // is bounded by the number of encodings recognized by EncodingMap;
  67     // therefore it poses no static mutability risk.
  68     private static final Map<String, EncodingInfo> _encodings = new ConcurrentHashMap<>();
  69 
  70     /**
  71      * @param encoding a MIME charset name, or null.
  72      */
  73     static EncodingInfo getEncodingInfo(String encoding, boolean allowJavaNames) throws UnsupportedEncodingException {
  74         EncodingInfo eInfo = null;
  75         if (encoding == null) {
  76             if((eInfo = _encodings.get(DEFAULT_ENCODING)) != null)
  77                 return eInfo;
  78             eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(DEFAULT_ENCODING), DEFAULT_ENCODING, LAST_PRINTABLE_UNICODE);
  79             _encodings.put(DEFAULT_ENCODING, eInfo);
  80             return eInfo;
  81         }
  82         // need to convert it to upper case:
  83         encoding = encoding.toUpperCase(Locale.ENGLISH);
  84         String jName = EncodingMap.getIANA2JavaMapping(encoding);
  85         if(jName == null) {
  86             // see if the encoding passed in is a Java encoding name.
  87             if(allowJavaNames ) {
  88                 EncodingInfo.testJavaEncodingName(encoding);
  89                 if((eInfo = _encodings.get(encoding)) != null)
  90                     return eInfo;
  91                 // is it known to be unicode-compliant?
  92                 int i=0;
  93                 for(; i<UNICODE_ENCODINGS.length; i++) {
  94                     if(UNICODE_ENCODINGS[i].equalsIgnoreCase(encoding)) {
  95                         eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, LAST_PRINTABLE_UNICODE);
  96                         break;
  97                     }
  98                 }
  99                 if(i == UNICODE_ENCODINGS.length) {
 100                     eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, DEFAULT_LAST_PRINTABLE);
 101                 }
 102                 _encodings.put(encoding, eInfo);
 103                 return eInfo;
 104             } else {
 105                 throw new UnsupportedEncodingException(encoding);
 106             }
 107         }
 108         if ((eInfo = _encodings.get(jName)) != null)
 109             return eInfo;
 110         // have to create one...
 111         // is it known to be unicode-compliant?
 112         int i=0;
 113         for(; i<UNICODE_ENCODINGS.length; i++) {
 114             if(UNICODE_ENCODINGS[i].equalsIgnoreCase(jName)) {
 115                 eInfo = new EncodingInfo(encoding, jName, LAST_PRINTABLE_UNICODE);
 116                 break;
 117             }
 118         }
 119         if(i == UNICODE_ENCODINGS.length) {
 120             eInfo = new EncodingInfo(encoding, jName, DEFAULT_LAST_PRINTABLE);
 121         }
 122         _encodings.put(jName, eInfo);
 123         return eInfo;
 124     }
 125 
 126     static final String JIS_DANGER_CHARS
 127     = "\\\u007e\u007f\u00a2\u00a3\u00a5\u00ac"
 128     +"\u2014\u2015\u2016\u2026\u203e\u203e\u2225\u222f\u301c"
 129     +"\uff3c\uff5e\uffe0\uffe1\uffe2\uffe3";
 130 
 131 }