1 /* 2 * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 // Sep 14, 2000: 22 // Fixed comments to preserve whitespaces and add a line break 23 // when indenting. Reported by Gervase Markham <gerv@gerv.net> 24 // Sep 14, 2000: 25 // Fixed serializer to report IO exception directly, instead at 26 // the end of document processing. 27 // Reported by Patrick Higgins <phiggins@transzap.com> 28 // Sep 13, 2000: 29 // CR in character data will print as D; 30 // Aug 25, 2000: 31 // Fixed processing instruction printing inside element content 32 // to not escape content. Reported by Mikael Staldal 33 // <d96-mst@d.kth.se> 34 // Aug 25, 2000: 35 // Added ability to omit comments. 36 // Contributed by Anupam Bagchi <abagchi@jtcsv.com> 37 // Aug 26, 2000: 38 // Fixed bug in newline handling when preserving spaces. 39 // Contributed by Mike Dusseault <mdusseault@home.com> 40 // Aug 29, 2000: 41 // Fixed state.unescaped not being set to false when 42 // entering element state. 43 // Reported by Lowell Vaughn <lvaughn@agillion.com> 44 45 46 package com.sun.org.apache.xml.internal.serialize; 47 48 49 import com.sun.org.apache.xerces.internal.dom.DOMErrorImpl; 50 import com.sun.org.apache.xerces.internal.dom.DOMLocatorImpl; 51 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter; 52 import com.sun.org.apache.xerces.internal.util.XMLChar; 53 import java.io.IOException; 54 import java.io.OutputStream; 55 import java.io.Writer; 56 import java.util.ArrayList; 57 import java.util.HashMap; 58 import java.util.List; 59 import java.util.Map; 60 import org.w3c.dom.DOMError; 61 import org.w3c.dom.DOMErrorHandler; 62 import org.w3c.dom.Document; 63 import org.w3c.dom.DocumentFragment; 64 import org.w3c.dom.DocumentType; 65 import org.w3c.dom.Element; 66 import org.w3c.dom.Node; 67 import org.w3c.dom.ls.LSException; 68 import org.w3c.dom.ls.LSSerializerFilter; 69 import org.w3c.dom.traversal.NodeFilter; 70 import org.xml.sax.ContentHandler; 71 import org.xml.sax.DTDHandler; 72 import org.xml.sax.DocumentHandler; 73 import org.xml.sax.Locator; 74 import org.xml.sax.SAXException; 75 import org.xml.sax.ext.DeclHandler; 76 import org.xml.sax.ext.LexicalHandler; 77 78 /** 79 * Base class for a serializer supporting both DOM and SAX pretty 80 * serializing of XML/HTML/XHTML documents. Derives classes perform 81 * the method-specific serializing, this class provides the common 82 * serializing mechanisms. 83 * <p> 84 * The serializer must be initialized with the proper writer and 85 * output format before it can be used by calling {@link #setOutputCharStream} 86 * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat} 87 * for the output format. 88 * <p> 89 * The serializer can be reused any number of times, but cannot 90 * be used concurrently by two threads. 91 * <p> 92 * If an output stream is used, the encoding is taken from the 93 * output format (defaults to <tt>UTF-8</tt>). If a writer is 94 * used, make sure the writer uses the same encoding (if applies) 95 * as specified in the output format. 96 * <p> 97 * The serializer supports both DOM and SAX. DOM serializing is done 98 * by calling {@link #serialize(Document)} and SAX serializing is done by firing 99 * SAX events and using the serializer as a document handler. 100 * This also applies to derived class. 101 * <p> 102 * If an I/O exception occurs while serializing, the serializer 103 * will not throw an exception directly, but only throw it 104 * at the end of serializing (either DOM or SAX's {@link 105 * org.xml.sax.DocumentHandler#endDocument}. 106 * <p> 107 * For elements that are not specified as whitespace preserving, 108 * the serializer will potentially break long text lines at space 109 * boundaries, indent lines, and serialize elements on separate 110 * lines. Line terminators will be regarded as spaces, and 111 * spaces at beginning of line will be stripped. 112 * <p> 113 * When indenting, the serializer is capable of detecting seemingly 114 * element content, and serializing these elements indented on separate 115 * lines. An element is serialized indented when it is the first or 116 * last child of an element, or immediate following or preceding 117 * another element. 118 * 119 * 120 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 121 * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a> 122 * @author Elena Litani, IBM 123 * @author Sunitha Reddy, Sun Microsystems 124 * @see Serializer 125 * @see org.w3c.dom.ls.LSSerializer 126 * 127 * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation 128 * is replaced by that of Xalan. Main class 129 * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced 130 * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}. 131 * 132 * @LastModified: Nov 2017 133 */ 134 @Deprecated 135 public abstract class BaseMarkupSerializer 136 implements ContentHandler, DocumentHandler, LexicalHandler, 137 DTDHandler, DeclHandler, DOMSerializer, Serializer 138 { 139 140 // DOM L3 implementation 141 protected short features = 0xFFFFFFFF; 142 protected DOMErrorHandler fDOMErrorHandler; 143 protected final DOMErrorImpl fDOMError = new DOMErrorImpl(); 144 protected LSSerializerFilter fDOMFilter; 145 146 protected EncodingInfo _encodingInfo; 147 148 149 /** 150 * Holds array of all element states that have been entered. 151 * The array is automatically resized. When leaving an element, 152 * it's state is not removed but reused when later returning 153 * to the same nesting level. 154 */ 155 private ElementState[] _elementStates; 156 157 158 /** 159 * The index of the next state to place in the array, 160 * or one plus the index of the current state. When zero, 161 * we are in no state. 162 */ 163 private int _elementStateCount; 164 165 166 /** 167 * List holding comments and PIs that come before the root 168 * element (even after it), see {@link #serializePreRoot}. 169 */ 170 private List<String> _preRoot; 171 172 173 /** 174 * If the document has been started (header serialized), this 175 * flag is set to true so it's not started twice. 176 */ 177 protected boolean _started; 178 179 180 /** 181 * True if the serializer has been prepared. This flag is set 182 * to false when the serializer is reset prior to using it, 183 * and to true after it has been prepared for usage. 184 */ 185 private boolean _prepared; 186 187 188 /** 189 * Association between namespace URIs (keys) and prefixes (values). 190 * Accumulated here prior to starting an element and placing this 191 * list in the element state. 192 */ 193 protected Map<String, String> _prefixes; 194 195 196 /** 197 * The system identifier of the document type, if known. 198 */ 199 protected String _docTypePublicId; 200 201 202 /** 203 * The system identifier of the document type, if known. 204 */ 205 protected String _docTypeSystemId; 206 207 208 /** 209 * The output format associated with this serializer. This will never 210 * be a null reference. If no format was passed to the constructor, 211 * the default one for this document type will be used. The format 212 * object is never changed by the serializer. 213 */ 214 protected OutputFormat _format; 215 216 217 /** 218 * The printer used for printing text parts. 219 */ 220 protected Printer _printer; 221 222 223 /** 224 * True if indenting printer. 225 */ 226 protected boolean _indenting; 227 228 /** Temporary buffer to store character data */ 229 protected final StringBuffer fStrBuffer = new StringBuffer(40); 230 231 /** 232 * The underlying writer. 233 */ 234 private Writer _writer; 235 236 237 /** 238 * The output stream. 239 */ 240 private OutputStream _output; 241 242 /** Current node that is being processed */ 243 protected Node fCurrentNode = null; 244 245 246 247 //--------------------------------// 248 // Constructor and initialization // 249 //--------------------------------// 250 251 252 /** 253 * Protected constructor can only be used by derived class. 254 * Must initialize the serializer before serializing any document, 255 * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 256 * first 257 */ 258 protected BaseMarkupSerializer( OutputFormat format ) 259 { 260 int i; 261 262 _elementStates = new ElementState[ 10 ]; 263 for ( i = 0 ; i < _elementStates.length ; ++i ) 264 _elementStates[ i ] = new ElementState(); 265 _format = format; 266 } 267 268 269 public DocumentHandler asDocumentHandler() 270 throws IOException 271 { 272 prepare(); 273 return this; 274 } 275 276 277 public ContentHandler asContentHandler() 278 throws IOException 279 { 280 prepare(); 281 return this; 282 } 283 284 285 public DOMSerializer asDOMSerializer() 286 throws IOException 287 { 288 prepare(); 289 return this; 290 } 291 292 293 public void setOutputByteStream( OutputStream output ) 294 { 295 if ( output == null ) { 296 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 297 "ArgumentIsNull", new Object[]{"output"}); 298 throw new NullPointerException(msg); 299 } 300 _output = output; 301 _writer = null; 302 reset(); 303 } 304 305 306 public void setOutputCharStream( Writer writer ) 307 { 308 if ( writer == null ) { 309 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 310 "ArgumentIsNull", new Object[]{"writer"}); 311 throw new NullPointerException(msg); 312 } 313 _writer = writer; 314 _output = null; 315 reset(); 316 } 317 318 319 public void setOutputFormat( OutputFormat format ) 320 { 321 if ( format == null ) { 322 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 323 "ArgumentIsNull", new Object[]{"format"}); 324 throw new NullPointerException(msg); 325 } 326 _format = format; 327 reset(); 328 } 329 330 331 public boolean reset() 332 { 333 if ( _elementStateCount > 1 ) { 334 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 335 "ResetInMiddle", null); 336 throw new IllegalStateException(msg); 337 } 338 _prepared = false; 339 fCurrentNode = null; 340 fStrBuffer.setLength(0); 341 return true; 342 } 343 344 protected void cleanup() { 345 fCurrentNode = null; 346 } 347 348 protected void prepare() 349 throws IOException 350 { 351 if ( _prepared ) 352 return; 353 354 if ( _writer == null && _output == null ) { 355 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, 356 "NoWriterSupplied", null); 357 throw new IOException(msg); 358 } 359 // If the output stream has been set, use it to construct 360 // the writer. It is possible that the serializer has been 361 // reused with the same output stream and different encoding. 362 363 _encodingInfo = _format.getEncodingInfo(); 364 365 if ( _output != null ) { 366 _writer = _encodingInfo.getWriter(_output); 367 } 368 369 if ( _format.getIndenting() ) { 370 _indenting = true; 371 _printer = new IndentPrinter( _writer, _format ); 372 } else { 373 _indenting = false; 374 _printer = new Printer( _writer, _format ); 375 } 376 377 ElementState state; 378 379 _elementStateCount = 0; 380 state = _elementStates[ 0 ]; 381 state.namespaceURI = null; 382 state.localName = null; 383 state.rawName = null; 384 state.preserveSpace = _format.getPreserveSpace(); 385 state.empty = true; 386 state.afterElement = false; 387 state.afterComment = false; 388 state.doCData = state.inCData = false; 389 state.prefixes = null; 390 391 _docTypePublicId = _format.getDoctypePublic(); 392 _docTypeSystemId = _format.getDoctypeSystem(); 393 _started = false; 394 _prepared = true; 395 } 396 397 398 399 //----------------------------------// 400 // DOM document serializing methods // 401 //----------------------------------// 402 403 404 /** 405 * Serializes the DOM element using the previously specified 406 * writer and output format. Throws an exception only if 407 * an I/O exception occured while serializing. 408 * 409 * @param elem The element to serialize 410 * @throws IOException An I/O exception occured while 411 * serializing 412 */ 413 public void serialize( Element elem ) 414 throws IOException 415 { 416 reset(); 417 prepare(); 418 serializeNode( elem ); 419 cleanup(); 420 _printer.flush(); 421 if ( _printer.getException() != null ) 422 throw _printer.getException(); 423 } 424 425 /** 426 * Serializes a node using the previously specified 427 * writer and output format. Throws an exception only if 428 * an I/O exception occured while serializing. 429 * 430 * @param node Node to serialize 431 * @throws IOException An I/O exception occured while serializing 432 */ 433 public void serialize( Node node ) throws IOException { 434 reset(); 435 prepare(); 436 serializeNode( node ); 437 //Print any PIs and Comments which appeared in 'node' 438 serializePreRoot(); 439 _printer.flush(); 440 if ( _printer.getException() != null ) 441 throw _printer.getException(); 442 } 443 444 /** 445 * Serializes the DOM document fragmnt using the previously specified 446 * writer and output format. Throws an exception only if 447 * an I/O exception occured while serializing. 448 * 449 * @param frag The document fragment to serialize 450 * @throws IOException An I/O exception occured while 451 * serializing 452 */ 453 public void serialize( DocumentFragment frag ) 454 throws IOException 455 { 456 reset(); 457 prepare(); 458 serializeNode( frag ); 459 cleanup(); 460 _printer.flush(); 461 if ( _printer.getException() != null ) 462 throw _printer.getException(); 463 } 464 465 466 /** 467 * Serializes the DOM document using the previously specified 468 * writer and output format. Throws an exception only if 469 * an I/O exception occured while serializing. 470 * 471 * @param doc The document to serialize 472 * @throws IOException An I/O exception occured while 473 * serializing 474 */ 475 public void serialize( Document doc ) 476 throws IOException 477 { 478 reset(); 479 prepare(); 480 serializeNode( doc ); 481 serializePreRoot(); 482 cleanup(); 483 _printer.flush(); 484 if ( _printer.getException() != null ) 485 throw _printer.getException(); 486 } 487 488 489 //------------------------------------------// 490 // SAX document handler serializing methods // 491 //------------------------------------------// 492 493 494 public void startDocument() 495 throws SAXException 496 { 497 try { 498 prepare(); 499 } catch ( IOException except ) { 500 throw new SAXException( except.toString() ); 501 } 502 // Nothing to do here. All the magic happens in startDocument(String) 503 } 504 505 506 public void characters( char[] chars, int start, int length ) 507 throws SAXException 508 { 509 ElementState state; 510 511 try { 512 state = content(); 513 514 // Check if text should be print as CDATA section or unescaped 515 // based on elements listed in the output format (the element 516 // state) or whether we are inside a CDATA section or entity. 517 518 if ( state.inCData || state.doCData ) { 519 int saveIndent; 520 521 // Print a CDATA section. The text is not escaped, but ']]>' 522 // appearing in the code must be identified and dealt with. 523 // The contents of a text node is considered space preserving. 524 if ( ! state.inCData ) { 525 _printer.printText( "<![CDATA[" ); 526 state.inCData = true; 527 } 528 saveIndent = _printer.getNextIndent(); 529 _printer.setNextIndent( 0 ); 530 char ch; 531 final int end = start + length; 532 for ( int index = start ; index < end; ++index ) { 533 ch = chars[index]; 534 if ( ch == ']' && index + 2 < end && 535 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) { 536 _printer.printText("]]]]><![CDATA[>"); 537 index +=2; 538 continue; 539 } 540 if (!XMLChar.isValid(ch)) { 541 // check if it is surrogate 542 if (++index < end) { 543 surrogates(ch, chars[index],true); 544 } 545 else { 546 fatalError("The character '"+ch+"' is an invalid XML character"); 547 } 548 continue; 549 } 550 if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0x7F ) || 551 ch == '\n' || ch == '\r' || ch == '\t' ) { 552 _printer.printText(ch); 553 } 554 else { 555 // The character is not printable -- split CDATA section 556 _printer.printText("]]>&#x"); 557 _printer.printText(Integer.toHexString(ch)); 558 _printer.printText(";<![CDATA["); 559 } 560 } 561 _printer.setNextIndent( saveIndent ); 562 563 } else { 564 565 int saveIndent; 566 567 if ( state.preserveSpace ) { 568 // If preserving space then hold of indentation so no 569 // excessive spaces are printed at line breaks, escape 570 // the text content without replacing spaces and print 571 // the text breaking only at line breaks. 572 saveIndent = _printer.getNextIndent(); 573 _printer.setNextIndent( 0 ); 574 printText( chars, start, length, true, state.unescaped ); 575 _printer.setNextIndent( saveIndent ); 576 } else { 577 printText( chars, start, length, false, state.unescaped ); 578 } 579 } 580 } catch ( IOException except ) { 581 throw new SAXException( except ); 582 } 583 } 584 585 586 public void ignorableWhitespace( char[] chars, int start, int length ) 587 throws SAXException 588 { 589 int i; 590 591 try { 592 content(); 593 594 // Print ignorable whitespaces only when indenting, after 595 // all they are indentation. Cancel the indentation to 596 // not indent twice. 597 if ( _indenting ) { 598 _printer.setThisIndent( 0 ); 599 for ( i = start ; length-- > 0 ; ++i ) 600 _printer.printText( chars[ i ] ); 601 } 602 } catch ( IOException except ) { 603 throw new SAXException( except ); 604 } 605 } 606 607 608 public final void processingInstruction( String target, String code ) 609 throws SAXException 610 { 611 try { 612 processingInstructionIO( target, code ); 613 } catch ( IOException except ) { 614 throw new SAXException( except ); 615 } 616 } 617 618 public void processingInstructionIO( String target, String code ) 619 throws IOException 620 { 621 int index; 622 ElementState state; 623 624 state = content(); 625 626 // Create the processing instruction textual representation. 627 // Make sure we don't have '?>' inside either target or code. 628 index = target.indexOf( "?>" ); 629 if ( index >= 0 ) 630 fStrBuffer.append( "<?" ).append( target.substring( 0, index ) ); 631 else 632 fStrBuffer.append( "<?" ).append( target ); 633 if ( code != null ) { 634 fStrBuffer.append( ' ' ); 635 index = code.indexOf( "?>" ); 636 if ( index >= 0 ) 637 fStrBuffer.append( code.substring( 0, index ) ); 638 else 639 fStrBuffer.append( code ); 640 } 641 fStrBuffer.append( "?>" ); 642 643 // If before the root element (or after it), do not print 644 // the PI directly but place it in the pre-root vector. 645 if ( isDocumentState() ) { 646 if ( _preRoot == null ) 647 _preRoot = new ArrayList<>(); 648 _preRoot.add( fStrBuffer.toString() ); 649 } else { 650 _printer.indent(); 651 printText( fStrBuffer.toString(), true, true ); 652 _printer.unindent(); 653 if ( _indenting ) 654 state.afterElement = true; 655 } 656 657 fStrBuffer.setLength(0); 658 } 659 660 661 public void comment( char[] chars, int start, int length ) 662 throws SAXException 663 { 664 try { 665 comment( new String( chars, start, length ) ); 666 } catch ( IOException except ) { 667 throw new SAXException( except ); 668 } 669 } 670 671 672 public void comment( String text ) 673 throws IOException 674 { 675 int index; 676 ElementState state; 677 678 if ( _format.getOmitComments() ) 679 return; 680 681 state = content(); 682 // Create the processing comment textual representation. 683 // Make sure we don't have '-->' inside the comment. 684 index = text.indexOf( "-->" ); 685 if ( index >= 0 ) 686 fStrBuffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" ); 687 else 688 fStrBuffer.append( "<!--" ).append( text ).append( "-->" ); 689 690 // If before the root element (or after it), do not print 691 // the comment directly but place it in the pre-root vector. 692 if ( isDocumentState() ) { 693 if ( _preRoot == null ) 694 _preRoot = new ArrayList<>(); 695 _preRoot.add( fStrBuffer.toString() ); 696 } else { 697 // Indent this element on a new line if the first 698 // content of the parent element or immediately 699 // following an element. 700 if ( _indenting && ! state.preserveSpace) 701 _printer.breakLine(); 702 _printer.indent(); 703 printText( fStrBuffer.toString(), true, true ); 704 _printer.unindent(); 705 if ( _indenting ) 706 state.afterElement = true; 707 } 708 709 fStrBuffer.setLength(0); 710 state.afterComment = true; 711 state.afterElement = false; 712 } 713 714 715 public void startCDATA() 716 { 717 ElementState state; 718 719 state = getElementState(); 720 state.doCData = true; 721 } 722 723 724 public void endCDATA() 725 { 726 ElementState state; 727 728 state = getElementState(); 729 state.doCData = false; 730 } 731 732 733 public void startNonEscaping() 734 { 735 ElementState state; 736 737 state = getElementState(); 738 state.unescaped = true; 739 } 740 741 742 public void endNonEscaping() 743 { 744 ElementState state; 745 746 state = getElementState(); 747 state.unescaped = false; 748 } 749 750 751 public void startPreserving() 752 { 753 ElementState state; 754 755 state = getElementState(); 756 state.preserveSpace = true; 757 } 758 759 760 public void endPreserving() 761 { 762 ElementState state; 763 764 state = getElementState(); 765 state.preserveSpace = false; 766 } 767 768 769 /** 770 * Called at the end of the document to wrap it up. 771 * Will flush the output stream and throw an exception 772 * if any I/O error occured while serializing. 773 * 774 * @throws SAXException An I/O exception occured during 775 * serializing 776 */ 777 public void endDocument() 778 throws SAXException 779 { 780 try { 781 // Print all the elements accumulated outside of 782 // the root element. 783 serializePreRoot(); 784 // Flush the output, this is necessary for fStrBuffered output. 785 _printer.flush(); 786 } catch ( IOException except ) { 787 throw new SAXException( except ); 788 } 789 } 790 791 792 public void startEntity( String name ) 793 { 794 // ??? 795 } 796 797 798 public void endEntity( String name ) 799 { 800 // ??? 801 } 802 803 804 public void setDocumentLocator( Locator locator ) 805 { 806 // Nothing to do 807 } 808 809 810 //-----------------------------------------// 811 // SAX content handler serializing methods // 812 //-----------------------------------------// 813 814 815 public void skippedEntity ( String name ) 816 throws SAXException 817 { 818 try { 819 endCDATA(); 820 content(); 821 _printer.printText( '&' ); 822 _printer.printText( name ); 823 _printer.printText( ';' ); 824 } catch ( IOException except ) { 825 throw new SAXException( except ); 826 } 827 } 828 829 830 public void startPrefixMapping( String prefix, String uri ) 831 throws SAXException 832 { 833 if ( _prefixes == null ) 834 _prefixes = new HashMap<>(); 835 _prefixes.put( uri, prefix == null ? "" : prefix ); 836 } 837 838 839 public void endPrefixMapping( String prefix ) 840 throws SAXException 841 { 842 } 843 844 845 //------------------------------------------// 846 // SAX DTD/Decl handler serializing methods // 847 //------------------------------------------// 848 849 850 public final void startDTD( String name, String publicId, String systemId ) 851 throws SAXException 852 { 853 try { 854 _printer.enterDTD(); 855 _docTypePublicId = publicId; 856 _docTypeSystemId = systemId; 857 858 } catch ( IOException except ) { 859 throw new SAXException( except ); 860 } 861 } 862 863 864 public void endDTD() 865 { 866 // Nothing to do here, all the magic occurs in startDocument(String). 867 } 868 869 870 public void elementDecl( String name, String model ) 871 throws SAXException 872 { 873 try { 874 _printer.enterDTD(); 875 _printer.printText( "<!ELEMENT " ); 876 _printer.printText( name ); 877 _printer.printText( ' ' ); 878 _printer.printText( model ); 879 _printer.printText( '>' ); 880 if ( _indenting ) 881 _printer.breakLine(); 882 } catch ( IOException except ) { 883 throw new SAXException( except ); 884 } 885 } 886 887 888 public void attributeDecl( String eName, String aName, String type, 889 String valueDefault, String value ) 890 throws SAXException 891 { 892 try { 893 _printer.enterDTD(); 894 _printer.printText( "<!ATTLIST " ); 895 _printer.printText( eName ); 896 _printer.printText( ' ' ); 897 _printer.printText( aName ); 898 _printer.printText( ' ' ); 899 _printer.printText( type ); 900 if ( valueDefault != null ) { 901 _printer.printText( ' ' ); 902 _printer.printText( valueDefault ); 903 } 904 if ( value != null ) { 905 _printer.printText( " \"" ); 906 printEscaped( value ); 907 _printer.printText( '"' ); 908 } 909 _printer.printText( '>' ); 910 if ( _indenting ) 911 _printer.breakLine(); 912 } catch ( IOException except ) { 913 throw new SAXException( except ); 914 } 915 } 916 917 918 public void internalEntityDecl( String name, String value ) 919 throws SAXException 920 { 921 try { 922 _printer.enterDTD(); 923 _printer.printText( "<!ENTITY " ); 924 _printer.printText( name ); 925 _printer.printText( " \"" ); 926 printEscaped( value ); 927 _printer.printText( "\">" ); 928 if ( _indenting ) 929 _printer.breakLine(); 930 } catch ( IOException except ) { 931 throw new SAXException( except ); 932 } 933 } 934 935 936 public void externalEntityDecl( String name, String publicId, String systemId ) 937 throws SAXException 938 { 939 try { 940 _printer.enterDTD(); 941 unparsedEntityDecl( name, publicId, systemId, null ); 942 } catch ( IOException except ) { 943 throw new SAXException( except ); 944 } 945 } 946 947 948 public void unparsedEntityDecl( String name, String publicId, 949 String systemId, String notationName ) 950 throws SAXException 951 { 952 try { 953 _printer.enterDTD(); 954 if ( publicId == null ) { 955 _printer.printText( "<!ENTITY " ); 956 _printer.printText( name ); 957 _printer.printText( " SYSTEM " ); 958 printDoctypeURL( systemId ); 959 } else { 960 _printer.printText( "<!ENTITY " ); 961 _printer.printText( name ); 962 _printer.printText( " PUBLIC " ); 963 printDoctypeURL( publicId ); 964 _printer.printText( ' ' ); 965 printDoctypeURL( systemId ); 966 } 967 if ( notationName != null ) { 968 _printer.printText( " NDATA " ); 969 _printer.printText( notationName ); 970 } 971 _printer.printText( '>' ); 972 if ( _indenting ) 973 _printer.breakLine(); 974 } catch ( IOException except ) { 975 throw new SAXException( except ); 976 } 977 } 978 979 980 public void notationDecl( String name, String publicId, String systemId ) 981 throws SAXException 982 { 983 try { 984 _printer.enterDTD(); 985 if ( publicId != null ) { 986 _printer.printText( "<!NOTATION " ); 987 _printer.printText( name ); 988 _printer.printText( " PUBLIC " ); 989 printDoctypeURL( publicId ); 990 if ( systemId != null ) { 991 _printer.printText( ' ' ); 992 printDoctypeURL( systemId ); 993 } 994 } else { 995 _printer.printText( "<!NOTATION " ); 996 _printer.printText( name ); 997 _printer.printText( " SYSTEM " ); 998 printDoctypeURL( systemId ); 999 } 1000 _printer.printText( '>' ); 1001 if ( _indenting ) 1002 _printer.breakLine(); 1003 } catch ( IOException except ) { 1004 throw new SAXException( except ); 1005 } 1006 } 1007 1008 1009 //------------------------------------------// 1010 // Generic node serializing methods methods // 1011 //------------------------------------------// 1012 1013 1014 /** 1015 * Serialize the DOM node. This method is shared across XML, HTML and XHTML 1016 * serializers and the differences are masked out in a separate {@link 1017 * #serializeElement}. 1018 * 1019 * @param node The node to serialize 1020 * @see #serializeElement 1021 * @throws IOException An I/O exception occured while 1022 * serializing 1023 */ 1024 @SuppressWarnings("fallthrough") // by design at case Node.DOCUMENT_FRAGMENT_NODE 1025 protected void serializeNode( Node node ) 1026 throws IOException 1027 { 1028 fCurrentNode = node; 1029 1030 // Based on the node type call the suitable SAX handler. 1031 // Only comments entities and documents which are not 1032 // handled by SAX are serialized directly. 1033 switch ( node.getNodeType() ) { 1034 case Node.TEXT_NODE : { 1035 String text; 1036 1037 text = node.getNodeValue(); 1038 if ( text != null ) { 1039 if (fDOMFilter !=null && 1040 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT)!= 0) { 1041 short code = fDOMFilter.acceptNode(node); 1042 switch (code) { 1043 case NodeFilter.FILTER_REJECT: 1044 case NodeFilter.FILTER_SKIP: { 1045 break; 1046 } 1047 default: { 1048 characters(text); 1049 } 1050 } 1051 } 1052 else if ( !_indenting || getElementState().preserveSpace 1053 || (text.replace('\n',' ').trim().length() != 0)) 1054 characters( text ); 1055 1056 } 1057 break; 1058 } 1059 1060 case Node.CDATA_SECTION_NODE : { 1061 String text = node.getNodeValue(); 1062 if ((features & DOMSerializerImpl.CDATA) != 0) { 1063 if (text != null) { 1064 if (fDOMFilter != null 1065 && (fDOMFilter.getWhatToShow() 1066 & NodeFilter.SHOW_CDATA_SECTION) 1067 != 0) { 1068 short code = fDOMFilter.acceptNode(node); 1069 switch (code) { 1070 case NodeFilter.FILTER_REJECT : 1071 case NodeFilter.FILTER_SKIP : 1072 { 1073 // skip the CDATA node 1074 return; 1075 } 1076 default : 1077 { 1078 //fall through.. 1079 } 1080 } 1081 } 1082 startCDATA(); 1083 characters(text); 1084 endCDATA(); 1085 } 1086 } else { 1087 // transform into a text node 1088 characters(text); 1089 } 1090 break; 1091 } 1092 case Node.COMMENT_NODE : { 1093 String text; 1094 1095 if ( ! _format.getOmitComments() ) { 1096 text = node.getNodeValue(); 1097 if ( text != null ) { 1098 1099 if (fDOMFilter !=null && 1100 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT)!= 0) { 1101 short code = fDOMFilter.acceptNode(node); 1102 switch (code) { 1103 case NodeFilter.FILTER_REJECT: 1104 case NodeFilter.FILTER_SKIP: { 1105 // skip the comment node 1106 return; 1107 } 1108 default: { 1109 // fall through 1110 } 1111 } 1112 } 1113 comment( text ); 1114 } 1115 } 1116 break; 1117 } 1118 1119 case Node.ENTITY_REFERENCE_NODE : { 1120 Node child; 1121 1122 endCDATA(); 1123 content(); 1124 1125 if (((features & DOMSerializerImpl.ENTITIES) != 0) 1126 || (node.getFirstChild() == null)) { 1127 if (fDOMFilter !=null && 1128 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) { 1129 short code = fDOMFilter.acceptNode(node); 1130 switch (code) { 1131 case NodeFilter.FILTER_REJECT:{ 1132 return; // remove the node 1133 } 1134 case NodeFilter.FILTER_SKIP: { 1135 child = node.getFirstChild(); 1136 while ( child != null ) { 1137 serializeNode( child ); 1138 child = child.getNextSibling(); 1139 } 1140 return; 1141 } 1142 1143 default: { 1144 // fall through 1145 } 1146 } 1147 } 1148 checkUnboundNamespacePrefixedNode(node); 1149 1150 _printer.printText("&"); 1151 _printer.printText(node.getNodeName()); 1152 _printer.printText(";"); 1153 } 1154 else { 1155 child = node.getFirstChild(); 1156 while ( child != null ) { 1157 serializeNode( child ); 1158 child = child.getNextSibling(); 1159 } 1160 } 1161 1162 break; 1163 } 1164 1165 case Node.PROCESSING_INSTRUCTION_NODE : { 1166 1167 if (fDOMFilter !=null && 1168 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION)!= 0) { 1169 short code = fDOMFilter.acceptNode(node); 1170 switch (code) { 1171 case NodeFilter.FILTER_REJECT: 1172 case NodeFilter.FILTER_SKIP: { 1173 return; // skip this node 1174 } 1175 default: { // fall through 1176 } 1177 } 1178 } 1179 processingInstructionIO( node.getNodeName(), node.getNodeValue() ); 1180 break; 1181 } 1182 case Node.ELEMENT_NODE : { 1183 1184 if (fDOMFilter !=null && 1185 (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT)!= 0) { 1186 short code = fDOMFilter.acceptNode(node); 1187 switch (code) { 1188 case NodeFilter.FILTER_REJECT: { 1189 return; 1190 } 1191 case NodeFilter.FILTER_SKIP: { 1192 Node child = node.getFirstChild(); 1193 while ( child != null ) { 1194 serializeNode( child ); 1195 child = child.getNextSibling(); 1196 } 1197 return; // skip this node 1198 } 1199 1200 default: { // fall through 1201 } 1202 } 1203 } 1204 serializeElement( (Element) node ); 1205 break; 1206 } 1207 case Node.DOCUMENT_NODE : { 1208 DocumentType docType; 1209 1210 serializeDocument(); 1211 1212 // If there is a document type, use the SAX events to 1213 // serialize it. 1214 docType = ( (Document) node ).getDoctype(); 1215 if (docType != null) { 1216 // DOM Level 2 (or higher) 1217 try { 1218 String internal; 1219 1220 _printer.enterDTD(); 1221 _docTypePublicId = docType.getPublicId(); 1222 _docTypeSystemId = docType.getSystemId(); 1223 internal = docType.getInternalSubset(); 1224 if ( internal != null && internal.length() > 0 ) 1225 _printer.printText( internal ); 1226 endDTD(); 1227 } catch (Exception e) { 1228 // ignore 1229 _printer.enterDTD(); 1230 _docTypePublicId = null; 1231 _docTypeSystemId = null; 1232 endDTD(); 1233 } 1234 serializeDTD(docType.getName()); 1235 1236 } 1237 _started = true; 1238 1239 // !! Fall through 1240 } 1241 case Node.DOCUMENT_FRAGMENT_NODE : { 1242 Node child; 1243 1244 // By definition this will happen if the node is a document, 1245 // document fragment, etc. Just serialize its contents. It will 1246 // work well for other nodes that we do not know how to serialize. 1247 child = node.getFirstChild(); 1248 while ( child != null ) { 1249 serializeNode( child ); 1250 child = child.getNextSibling(); 1251 } 1252 break; 1253 } 1254 1255 default: 1256 break; 1257 } 1258 } 1259 1260 1261 /* Serializes XML Declaration, according to 'xml-declaration' property. 1262 */ 1263 protected void serializeDocument()throws IOException { 1264 int i; 1265 1266 String dtd = _printer.leaveDTD(); 1267 if (! _started) { 1268 1269 if (! _format.getOmitXMLDeclaration()) { 1270 StringBuffer buffer; 1271 1272 // Serialize the document declaration appreaing at the head 1273 // of very XML document (unless asked not to). 1274 buffer = new StringBuffer( "<?xml version=\"" ); 1275 if (_format.getVersion() != null) 1276 buffer.append( _format.getVersion() ); 1277 else 1278 buffer.append( "1.0" ); 1279 buffer.append( '"' ); 1280 String format_encoding = _format.getEncoding(); 1281 if (format_encoding != null) { 1282 buffer.append( " encoding=\"" ); 1283 buffer.append( format_encoding ); 1284 buffer.append( '"' ); 1285 } 1286 if (_format.getStandalone() && _docTypeSystemId == null && 1287 _docTypePublicId == null) 1288 buffer.append( " standalone=\"yes\"" ); 1289 buffer.append( "?>" ); 1290 _printer.printText( buffer ); 1291 _printer.breakLine(); 1292 } 1293 } 1294 1295 // Always serialize these, even if not te first root element. 1296 serializePreRoot(); 1297 1298 } 1299 1300 /* Serializes DTD, if present. 1301 */ 1302 protected void serializeDTD(String name) throws IOException{ 1303 1304 String dtd = _printer.leaveDTD(); 1305 if (! _format.getOmitDocumentType()) { 1306 if (_docTypeSystemId != null) { 1307 // System identifier must be specified to print DOCTYPE. 1308 // If public identifier is specified print 'PUBLIC 1309 // <public> <system>', if not, print 'SYSTEM <system>'. 1310 _printer.printText( "<!DOCTYPE " ); 1311 _printer.printText( name ); 1312 if (_docTypePublicId != null) { 1313 _printer.printText( " PUBLIC " ); 1314 printDoctypeURL( _docTypePublicId ); 1315 if (_indenting) { 1316 _printer.breakLine(); 1317 for (int i = 0 ; i < 18 + name.length() ; ++i) 1318 _printer.printText( " " ); 1319 } else 1320 _printer.printText( " " ); 1321 printDoctypeURL( _docTypeSystemId ); 1322 } else { 1323 _printer.printText( " SYSTEM " ); 1324 printDoctypeURL( _docTypeSystemId ); 1325 } 1326 1327 // If we accumulated any DTD contents while printing. 1328 // this would be the place to print it. 1329 if (dtd != null && dtd.length() > 0) { 1330 _printer.printText( " [" ); 1331 printText( dtd, true, true ); 1332 _printer.printText( ']' ); 1333 } 1334 1335 _printer.printText( ">" ); 1336 _printer.breakLine(); 1337 } else if (dtd != null && dtd.length() > 0) { 1338 _printer.printText( "<!DOCTYPE " ); 1339 _printer.printText( name ); 1340 _printer.printText( " [" ); 1341 printText( dtd, true, true ); 1342 _printer.printText( "]>" ); 1343 _printer.breakLine(); 1344 } 1345 } 1346 } 1347 1348 1349 /** 1350 * Must be called by a method about to print any type of content. 1351 * If the element was just opened, the opening tag is closed and 1352 * will be matched to a closing tag. Returns the current element 1353 * state with <tt>empty</tt> and <tt>afterElement</tt> set to false. 1354 * 1355 * @return The current element state 1356 * @throws IOException An I/O exception occurred while 1357 * serializing 1358 */ 1359 protected ElementState content() 1360 throws IOException 1361 { 1362 ElementState state; 1363 1364 state = getElementState(); 1365 if ( ! isDocumentState() ) { 1366 // Need to close CData section first 1367 if ( state.inCData && ! state.doCData ) { 1368 _printer.printText( "]]>" ); 1369 state.inCData = false; 1370 } 1371 // If this is the first content in the element, 1372 // change the state to not-empty and close the 1373 // opening element tag. 1374 if ( state.empty ) { 1375 _printer.printText( '>' ); 1376 state.empty = false; 1377 } 1378 // Except for one content type, all of them 1379 // are not last element. That one content 1380 // type will take care of itself. 1381 state.afterElement = false; 1382 // Except for one content type, all of them 1383 // are not last comment. That one content 1384 // type will take care of itself. 1385 state.afterComment = false; 1386 } 1387 return state; 1388 } 1389 1390 1391 /** 1392 * Called to print the text contents in the prevailing element format. 1393 * Since this method is capable of printing text as CDATA, it is used 1394 * for that purpose as well. White space handling is determined by the 1395 * current element state. In addition, the output format can dictate 1396 * whether the text is printed as CDATA or unescaped. 1397 * 1398 * @param text The text to print 1399 * @throws IOException An I/O exception occured while 1400 * serializing 1401 */ 1402 protected void characters( String text ) 1403 throws IOException 1404 { 1405 ElementState state; 1406 1407 state = content(); 1408 // Check if text should be print as CDATA section or unescaped 1409 // based on elements listed in the output format (the element 1410 // state) or whether we are inside a CDATA section or entity. 1411 1412 if ( state.inCData || state.doCData ) { 1413 // Print a CDATA section. The text is not escaped, but ']]>' 1414 // appearing in the code must be identified and dealt with. 1415 // The contents of a text node is considered space preserving. 1416 if ( ! state.inCData ) { 1417 _printer.printText("<![CDATA["); 1418 state.inCData = true; 1419 } 1420 int saveIndent = _printer.getNextIndent(); 1421 _printer.setNextIndent( 0 ); 1422 printCDATAText( text); 1423 _printer.setNextIndent( saveIndent ); 1424 1425 } else { 1426 1427 int saveIndent; 1428 1429 if ( state.preserveSpace ) { 1430 // If preserving space then hold of indentation so no 1431 // excessive spaces are printed at line breaks, escape 1432 // the text content without replacing spaces and print 1433 // the text breaking only at line breaks. 1434 saveIndent = _printer.getNextIndent(); 1435 _printer.setNextIndent( 0 ); 1436 printText( text, true, state.unescaped ); 1437 _printer.setNextIndent( saveIndent ); 1438 } else { 1439 printText( text, false, state.unescaped ); 1440 } 1441 } 1442 } 1443 1444 1445 /** 1446 * Returns the suitable entity reference for this character value, 1447 * or null if no such entity exists. Calling this method with <tt>'&'</tt> 1448 * will return <tt>"&amp;"</tt>. 1449 * 1450 * @param ch Character value 1451 * @return Character entity name, or null 1452 */ 1453 protected abstract String getEntityRef( int ch ); 1454 1455 1456 /** 1457 * Called to serializee the DOM element. The element is serialized based on 1458 * the serializer's method (XML, HTML, XHTML). 1459 * 1460 * @param elem The element to serialize 1461 * @throws IOException An I/O exception occured while 1462 * serializing 1463 */ 1464 protected abstract void serializeElement( Element elem ) 1465 throws IOException; 1466 1467 1468 /** 1469 * Comments and PIs cannot be serialized before the root element, 1470 * because the root element serializes the document type, which 1471 * generally comes first. Instead such PIs and comments are 1472 * accumulated inside a vector and serialized by calling this 1473 * method. Will be called when the root element is serialized 1474 * and when the document finished serializing. 1475 * 1476 * @throws IOException An I/O exception occured while 1477 * serializing 1478 */ 1479 protected void serializePreRoot() 1480 throws IOException 1481 { 1482 int i; 1483 1484 if ( _preRoot != null ) { 1485 for ( i = 0 ; i < _preRoot.size() ; ++i ) { 1486 printText(_preRoot.get( i ), true, true ); 1487 if ( _indenting ) 1488 _printer.breakLine(); 1489 } 1490 _preRoot.clear(); 1491 } 1492 } 1493 1494 1495 //---------------------------------------------// 1496 // Text pretty printing and formatting methods // 1497 //---------------------------------------------// 1498 1499 protected void printCDATAText( String text ) throws IOException { 1500 int length = text.length(); 1501 char ch; 1502 1503 for ( int index = 0 ; index < length; ++index ) { 1504 ch = text.charAt( index ); 1505 if (ch == ']' 1506 && index + 2 < length 1507 && text.charAt(index + 1) == ']' 1508 && text.charAt(index + 2) == '>') { // check for ']]>' 1509 if (fDOMErrorHandler != null) { 1510 // REVISIT: this means that if DOM Error handler is not registered we don't report any 1511 // fatal errors and might serialize not wellformed document 1512 if ((features & DOMSerializerImpl.SPLITCDATA) == 0) { 1513 String msg = DOMMessageFormatter.formatMessage( 1514 DOMMessageFormatter.SERIALIZER_DOMAIN, 1515 "EndingCDATA", 1516 null); 1517 if ((features & DOMSerializerImpl.WELLFORMED) != 0) { 1518 // issue fatal error 1519 modifyDOMError(msg, DOMError.SEVERITY_FATAL_ERROR, "wf-invalid-character", fCurrentNode); 1520 fDOMErrorHandler.handleError(fDOMError); 1521 throw new LSException(LSException.SERIALIZE_ERR, msg); 1522 } 1523 // issue error 1524 modifyDOMError(msg, DOMError.SEVERITY_ERROR, "cdata-section-not-splitted", fCurrentNode); 1525 if (!fDOMErrorHandler.handleError(fDOMError)) { 1526 throw new LSException(LSException.SERIALIZE_ERR, msg); 1527 } 1528 } else { 1529 // issue warning 1530 String msg = 1531 DOMMessageFormatter.formatMessage( 1532 DOMMessageFormatter.SERIALIZER_DOMAIN, 1533 "SplittingCDATA", 1534 null); 1535 modifyDOMError( 1536 msg, 1537 DOMError.SEVERITY_WARNING, 1538 null, fCurrentNode); 1539 fDOMErrorHandler.handleError(fDOMError); 1540 } 1541 } 1542 // split CDATA section 1543 _printer.printText("]]]]><![CDATA[>"); 1544 index += 2; 1545 continue; 1546 } 1547 1548 if (!XMLChar.isValid(ch)) { 1549 // check if it is surrogate 1550 if (++index <length) { 1551 surrogates(ch, text.charAt(index),true); 1552 } 1553 else { 1554 fatalError("The character '"+ch+"' is an invalid XML character"); 1555 } 1556 continue; 1557 } 1558 if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0x7F ) || 1559 ch == '\n' || ch == '\r' || ch == '\t' ) { 1560 _printer.printText(ch); 1561 } 1562 else { 1563 1564 // The character is not printable -- split CDATA section 1565 _printer.printText("]]>&#x"); 1566 _printer.printText(Integer.toHexString(ch)); 1567 _printer.printText(";<![CDATA["); 1568 } 1569 } 1570 } 1571 1572 1573 protected void surrogates(int high, int low, boolean inContent) throws IOException{ 1574 if (XMLChar.isHighSurrogate(high)) { 1575 if (!XMLChar.isLowSurrogate(low)) { 1576 //Invalid XML 1577 fatalError("The character '"+(char)low+"' is an invalid XML character"); 1578 } 1579 else { 1580 int supplemental = XMLChar.supplemental((char)high, (char)low); 1581 if (!XMLChar.isValid(supplemental)) { 1582 //Invalid XML 1583 fatalError("The character '"+(char)supplemental+"' is an invalid XML character"); 1584 } 1585 else { 1586 if (inContent && content().inCData) { 1587 _printer.printText("]]>&#x"); 1588 _printer.printText(Integer.toHexString(supplemental)); 1589 _printer.printText(";<![CDATA["); 1590 } 1591 else { 1592 printHex(supplemental); 1593 } 1594 } 1595 } 1596 } else { 1597 fatalError("The character '"+(char)high+"' is an invalid XML character"); 1598 } 1599 1600 } 1601 1602 /** 1603 * Called to print additional text with whitespace handling. 1604 * If spaces are preserved, the text is printed as if by calling 1605 * {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine} 1606 * for each new line. If spaces are not preserved, the text is 1607 * broken at space boundaries if longer than the line width; 1608 * Multiple spaces are printed as such, but spaces at beginning 1609 * of line are removed. 1610 * 1611 * @param chars The text to print 1612 * @param start The start offset 1613 * @param length The number of characters 1614 * @param preserveSpace Space preserving flag 1615 * @param unescaped Print unescaped 1616 */ 1617 protected void printText( char[] chars, int start, int length, 1618 boolean preserveSpace, boolean unescaped ) 1619 throws IOException 1620 { 1621 1622 if ( preserveSpace ) { 1623 // Preserving spaces: the text must print exactly as it is, 1624 // without breaking when spaces appear in the text and without 1625 // consolidating spaces. If a line terminator is used, a line 1626 // break will occur. 1627 while ( length-- > 0 ) { 1628 char ch = chars[ start ]; 1629 ++start; 1630 if ( ch == '\n' || ch == '\r' || unescaped ) { 1631 _printer.printText( ch ); 1632 } 1633 else { 1634 printEscaped( ch ); 1635 } 1636 } 1637 } else { 1638 // Not preserving spaces: print one part at a time, and 1639 // use spaces between parts to break them into different 1640 // lines. Spaces at beginning of line will be stripped 1641 // by printing mechanism. Line terminator is treated 1642 // no different than other text part. 1643 while ( length-- > 0 ) { 1644 char ch = chars[ start ]; 1645 ++start; 1646 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) { 1647 _printer.printSpace(); 1648 } 1649 else if ( unescaped ) { 1650 _printer.printText( ch ); 1651 } 1652 else { 1653 printEscaped( ch ); 1654 } 1655 } 1656 } 1657 } 1658 1659 1660 protected void printText( String text, boolean preserveSpace, boolean unescaped ) 1661 throws IOException 1662 { 1663 int index; 1664 char ch; 1665 1666 if ( preserveSpace ) { 1667 // Preserving spaces: the text must print exactly as it is, 1668 // without breaking when spaces appear in the text and without 1669 // consolidating spaces. If a line terminator is used, a line 1670 // break will occur. 1671 for ( index = 0 ; index < text.length() ; ++index ) { 1672 ch = text.charAt( index ); 1673 if ( ch == '\n' || ch == '\r' || unescaped ) 1674 _printer.printText( ch ); 1675 else 1676 printEscaped( ch ); 1677 } 1678 } else { 1679 // Not preserving spaces: print one part at a time, and 1680 // use spaces between parts to break them into different 1681 // lines. Spaces at beginning of line will be stripped 1682 // by printing mechanism. Line terminator is treated 1683 // no different than other text part. 1684 for ( index = 0 ; index < text.length() ; ++index ) { 1685 ch = text.charAt( index ); 1686 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) { 1687 _printer.printSpace(); 1688 } 1689 else if ( unescaped ) { 1690 _printer.printText( ch ); 1691 } 1692 else { 1693 printEscaped( ch ); 1694 } 1695 } 1696 } 1697 } 1698 1699 1700 /** 1701 * Print a document type public or system identifier URL. 1702 * Encapsulates the URL in double quotes, escapes non-printing 1703 * characters and print it equivalent to {@link #printText}. 1704 * 1705 * @param url The document type url to print 1706 */ 1707 protected void printDoctypeURL( String url ) 1708 throws IOException 1709 { 1710 int i; 1711 1712 _printer.printText( '"' ); 1713 for( i = 0 ; i < url.length() ; ++i ) { 1714 if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) { 1715 _printer.printText( '%' ); 1716 _printer.printText( Integer.toHexString( url.charAt( i ) ) ); 1717 } else 1718 _printer.printText( url.charAt( i ) ); 1719 } 1720 _printer.printText( '"' ); 1721 } 1722 1723 1724 protected void printEscaped( int ch ) 1725 throws IOException 1726 { 1727 String charRef; 1728 // If there is a suitable entity reference for this 1729 // character, print it. The list of available entity 1730 // references is almost but not identical between 1731 // XML and HTML. 1732 charRef = getEntityRef( ch ); 1733 if ( charRef != null ) { 1734 _printer.printText( '&' ); 1735 _printer.printText( charRef ); 1736 _printer.printText( ';' ); 1737 } else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0x7F ) || 1738 ch == '\n' || ch == '\r' || ch == '\t' ) { 1739 // Non printables are below ASCII space but not tab or line 1740 // terminator, ASCII delete, or above a certain Unicode threshold. 1741 if (ch < 0x10000) { 1742 _printer.printText((char)ch ); 1743 } else { 1744 _printer.printText((char)(((ch-0x10000)>>10)+0xd800)); 1745 _printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00)); 1746 } 1747 } else { 1748 printHex(ch); 1749 } 1750 } 1751 1752 /** 1753 * Escapes chars 1754 */ 1755 final void printHex( int ch) throws IOException { 1756 _printer.printText( "&#x" ); 1757 _printer.printText(Integer.toHexString(ch)); 1758 _printer.printText( ';' ); 1759 1760 } 1761 1762 1763 /** 1764 * Escapes a string so it may be printed as text content or attribute 1765 * value. Non printable characters are escaped using character references. 1766 * Where the format specifies a deault entity reference, that reference 1767 * is used (e.g. <tt>&lt;</tt>). 1768 * 1769 * @param source The string to escape 1770 */ 1771 protected void printEscaped( String source ) 1772 throws IOException 1773 { 1774 for ( int i = 0 ; i < source.length() ; ++i ) { 1775 int ch = source.charAt(i); 1776 if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) { 1777 int lowch = source.charAt(i+1); 1778 if ((lowch & 0xfc00) == 0xdc00) { 1779 ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00; 1780 i++; 1781 } 1782 } 1783 printEscaped(ch); 1784 } 1785 } 1786 1787 1788 //--------------------------------// 1789 // Element state handling methods // 1790 //--------------------------------// 1791 1792 1793 /** 1794 * Return the state of the current element. 1795 * 1796 * @return Current element state 1797 */ 1798 protected ElementState getElementState() 1799 { 1800 return _elementStates[ _elementStateCount ]; 1801 } 1802 1803 1804 /** 1805 * Enter a new element state for the specified element. 1806 * Tag name and space preserving is specified, element 1807 * state is initially empty. 1808 * 1809 * @return Current element state, or null 1810 */ 1811 protected ElementState enterElementState( String namespaceURI, String localName, 1812 String rawName, boolean preserveSpace ) 1813 { 1814 ElementState state; 1815 1816 if ( _elementStateCount + 1 == _elementStates.length ) { 1817 ElementState[] newStates; 1818 1819 // Need to create a larger array of states. This does not happen 1820 // often, unless the document is really deep. 1821 newStates = new ElementState[ _elementStates.length + 10 ]; 1822 for ( int i = 0 ; i < _elementStates.length ; ++i ) 1823 newStates[ i ] = _elementStates[ i ]; 1824 for ( int i = _elementStates.length ; i < newStates.length ; ++i ) 1825 newStates[ i ] = new ElementState(); 1826 _elementStates = newStates; 1827 } 1828 1829 ++_elementStateCount; 1830 state = _elementStates[ _elementStateCount ]; 1831 state.namespaceURI = namespaceURI; 1832 state.localName = localName; 1833 state.rawName = rawName; 1834 state.preserveSpace = preserveSpace; 1835 state.empty = true; 1836 state.afterElement = false; 1837 state.afterComment = false; 1838 state.doCData = state.inCData = false; 1839 state.unescaped = false; 1840 state.prefixes = _prefixes; 1841 1842 _prefixes = null; 1843 return state; 1844 } 1845 1846 1847 /** 1848 * Leave the current element state and return to the 1849 * state of the parent element. If this was the root 1850 * element, return to the state of the document. 1851 * 1852 * @return Previous element state 1853 */ 1854 protected ElementState leaveElementState() 1855 { 1856 if ( _elementStateCount > 0 ) { 1857 /*Corrected by David Blondeau (blondeau@intalio.com)*/ 1858 _prefixes = null; 1859 //_prefixes = _elementStates[ _elementStateCount ].prefixes; 1860 -- _elementStateCount; 1861 return _elementStates[ _elementStateCount ]; 1862 } 1863 String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null); 1864 throw new IllegalStateException(msg); 1865 } 1866 1867 1868 /** 1869 * Returns true if in the state of the document. 1870 * Returns true before entering any element and after 1871 * leaving the root element. 1872 * 1873 * @return True if in the state of the document 1874 */ 1875 protected boolean isDocumentState() { 1876 return _elementStateCount == 0; 1877 } 1878 1879 /** Clears document state. **/ 1880 final void clearDocumentState() { 1881 _elementStateCount = 0; 1882 } 1883 1884 /** 1885 * Returns the namespace prefix for the specified URI. 1886 * If the URI has been mapped to a prefix, returns the 1887 * prefix, otherwise returns null. 1888 * 1889 * @param namespaceURI The namespace URI 1890 * @return The namespace prefix if known, or null 1891 */ 1892 protected String getPrefix( String namespaceURI ) 1893 { 1894 String prefix; 1895 1896 if ( _prefixes != null ) { 1897 prefix = _prefixes.get( namespaceURI ); 1898 if ( prefix != null ) 1899 return prefix; 1900 } 1901 if ( _elementStateCount == 0 ) { 1902 return null; 1903 } 1904 for ( int i = _elementStateCount ; i > 0 ; --i ) { 1905 if ( _elementStates[ i ].prefixes != null ) { 1906 prefix = _elementStates[ i ].prefixes.get( namespaceURI ); 1907 if ( prefix != null ) 1908 return prefix; 1909 } 1910 } 1911 return null; 1912 } 1913 1914 /** 1915 * The method modifies global DOM error object 1916 * 1917 * @param message 1918 * @param severity 1919 * @param type 1920 * @return a DOMError 1921 */ 1922 protected DOMError modifyDOMError(String message, short severity, String type, Node node){ 1923 fDOMError.reset(); 1924 fDOMError.fMessage = message; 1925 fDOMError.fType = type; 1926 fDOMError.fSeverity = severity; 1927 fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null); 1928 return fDOMError; 1929 1930 } 1931 1932 1933 protected void fatalError(String message) throws IOException{ 1934 if (fDOMErrorHandler != null) { 1935 modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, null, fCurrentNode); 1936 fDOMErrorHandler.handleError(fDOMError); 1937 } 1938 else { 1939 throw new IOException(message); 1940 } 1941 } 1942 1943 /** 1944 * DOM level 3: 1945 * Check a node to determine if it contains unbound namespace prefixes. 1946 * 1947 * @param node The node to check for unbound namespace prefices 1948 */ 1949 protected void checkUnboundNamespacePrefixedNode (Node node) throws IOException{ 1950 1951 } 1952 }