1 /* 2 * Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.utils; 22 23 import com.sun.org.apache.xml.internal.res.XMLErrorResources; 24 import com.sun.org.apache.xml.internal.res.XMLMessages; 25 import java.util.List; 26 import java.util.Stack; 27 import org.w3c.dom.CDATASection; 28 import org.w3c.dom.Document; 29 import org.w3c.dom.DocumentFragment; 30 import org.w3c.dom.Element; 31 import org.w3c.dom.Node; 32 import org.w3c.dom.Text; 33 import org.xml.sax.Attributes; 34 import org.xml.sax.ContentHandler; 35 import org.xml.sax.Locator; 36 import org.xml.sax.ext.LexicalHandler; 37 /** 38 * This class takes SAX events (in addition to some extra events 39 * that SAX doesn't handle yet) and adds the result to a document 40 * or document fragment. 41 * @xsl.usage general 42 * @LastModified: Oct 2017 43 */ 44 public class DOMBuilder 45 implements ContentHandler, LexicalHandler 46 { 47 48 /** Root document */ 49 public Document m_doc; 50 51 /** Current node */ 52 protected Node m_currentNode = null; 53 54 /** The root node */ 55 protected Node m_root = null; 56 57 /** The next sibling node */ 58 protected Node m_nextSibling = null; 59 60 /** First node of document fragment or null if not a DocumentFragment */ 61 public DocumentFragment m_docFrag = null; 62 63 /** Stack of element nodes */ 64 protected Stack<Node> m_elemStack = new Stack<>(); 65 66 /** 67 * DOMBuilder instance constructor... it will add the DOM nodes 68 * to the document fragment. 69 * 70 * @param doc Root document 71 * @param node Current node 72 */ 73 public DOMBuilder(Document doc, Node node) 74 { 75 m_doc = doc; 76 m_currentNode = m_root = node; 77 78 if (node instanceof Element) 79 m_elemStack.push(node); 80 } 81 82 /** 83 * DOMBuilder instance constructor... it will add the DOM nodes 84 * to the document fragment. 85 * 86 * @param doc Root document 87 * @param docFrag Document fragment 88 */ 89 public DOMBuilder(Document doc, DocumentFragment docFrag) 90 { 91 m_doc = doc; 92 m_docFrag = docFrag; 93 } 94 95 /** 96 * DOMBuilder instance constructor... it will add the DOM nodes 97 * to the document. 98 * 99 * @param doc Root document 100 */ 101 public DOMBuilder(Document doc) 102 { 103 m_doc = doc; 104 } 105 106 /** 107 * Get the root document or DocumentFragment of the DOM being created. 108 * 109 * @return The root document or document fragment if not null 110 */ 111 public Node getRootDocument() 112 { 113 return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc; 114 } 115 116 /** 117 * Get the root node of the DOM tree. 118 */ 119 public Node getRootNode() 120 { 121 return m_root; 122 } 123 124 /** 125 * Get the node currently being processed. 126 * 127 * @return the current node being processed 128 */ 129 public Node getCurrentNode() 130 { 131 return m_currentNode; 132 } 133 134 /** 135 * Set the next sibling node, which is where the result nodes 136 * should be inserted before. 137 * 138 * @param nextSibling the next sibling node. 139 */ 140 public void setNextSibling(Node nextSibling) 141 { 142 m_nextSibling = nextSibling; 143 } 144 145 /** 146 * Return the next sibling node. 147 * 148 * @return the next sibling node. 149 */ 150 public Node getNextSibling() 151 { 152 return m_nextSibling; 153 } 154 155 /** 156 * Return null since there is no Writer for this class. 157 * 158 * @return null 159 */ 160 public java.io.Writer getWriter() 161 { 162 return null; 163 } 164 165 /** 166 * Append a node to the current container. 167 * 168 * @param newNode New node to append 169 */ 170 protected void append(Node newNode) throws org.xml.sax.SAXException 171 { 172 173 Node currentNode = m_currentNode; 174 175 if (null != currentNode) 176 { 177 if (currentNode == m_root && m_nextSibling != null) 178 currentNode.insertBefore(newNode, m_nextSibling); 179 else 180 currentNode.appendChild(newNode); 181 182 // System.out.println(newNode.getNodeName()); 183 } 184 else if (null != m_docFrag) 185 { 186 if (m_nextSibling != null) 187 m_docFrag.insertBefore(newNode, m_nextSibling); 188 else 189 m_docFrag.appendChild(newNode); 190 } 191 else 192 { 193 boolean ok = true; 194 short type = newNode.getNodeType(); 195 196 if (type == Node.TEXT_NODE) 197 { 198 String data = newNode.getNodeValue(); 199 200 if ((null != data) && (data.trim().length() > 0)) 201 { 202 throw new org.xml.sax.SAXException( 203 XMLMessages.createXMLMessage( 204 XMLErrorResources.ER_CANT_OUTPUT_TEXT_BEFORE_DOC, null)); //"Warning: can't output text before document element! Ignoring..."); 205 } 206 207 ok = false; 208 } 209 else if (type == Node.ELEMENT_NODE) 210 { 211 if (m_doc.getDocumentElement() != null) 212 { 213 ok = false; 214 215 throw new org.xml.sax.SAXException( 216 XMLMessages.createXMLMessage( 217 XMLErrorResources.ER_CANT_HAVE_MORE_THAN_ONE_ROOT, null)); //"Can't have more than one root on a DOM!"); 218 } 219 } 220 221 if (ok) 222 { 223 if (m_nextSibling != null) 224 m_doc.insertBefore(newNode, m_nextSibling); 225 else 226 m_doc.appendChild(newNode); 227 } 228 } 229 } 230 231 /** 232 * Receive an object for locating the origin of SAX document events. 233 * 234 * <p>SAX parsers are strongly encouraged (though not absolutely 235 * required) to supply a locator: if it does so, it must supply 236 * the locator to the application by invoking this method before 237 * invoking any of the other methods in the ContentHandler 238 * interface.</p> 239 * 240 * <p>The locator allows the application to determine the end 241 * position of any document-related event, even if the parser is 242 * not reporting an error. Typically, the application will 243 * use this information for reporting its own errors (such as 244 * character content that does not match an application's 245 * business rules). The information returned by the locator 246 * is probably not sufficient for use with a search engine.</p> 247 * 248 * <p>Note that the locator will return correct information only 249 * during the invocation of the events in this interface. The 250 * application should not attempt to use it at any other time.</p> 251 * 252 * @param locator An object that can return the location of 253 * any SAX document event. 254 * @see org.xml.sax.Locator 255 */ 256 public void setDocumentLocator(Locator locator) 257 { 258 259 // No action for the moment. 260 } 261 262 /** 263 * Receive notification of the beginning of a document. 264 * 265 * <p>The SAX parser will invoke this method only once, before any 266 * other methods in this interface or in DTDHandler (except for 267 * setDocumentLocator).</p> 268 */ 269 public void startDocument() throws org.xml.sax.SAXException 270 { 271 272 // No action for the moment. 273 } 274 275 /** 276 * Receive notification of the end of a document. 277 * 278 * <p>The SAX parser will invoke this method only once, and it will 279 * be the last method invoked during the parse. The parser shall 280 * not invoke this method until it has either abandoned parsing 281 * (because of an unrecoverable error) or reached the end of 282 * input.</p> 283 */ 284 public void endDocument() throws org.xml.sax.SAXException 285 { 286 287 // No action for the moment. 288 } 289 290 /** 291 * Receive notification of the beginning of an element. 292 * 293 * <p>The Parser will invoke this method at the beginning of every 294 * element in the XML document; there will be a corresponding 295 * endElement() event for every startElement() event (even when the 296 * element is empty). All of the element's content will be 297 * reported, in order, before the corresponding endElement() 298 * event.</p> 299 * 300 * <p>If the element name has a namespace prefix, the prefix will 301 * still be attached. Note that the attribute list provided will 302 * contain only attributes with explicit values (specified or 303 * defaulted): #IMPLIED attributes will be omitted.</p> 304 * 305 * 306 * @param ns The namespace of the node 307 * @param localName The local part of the qualified name 308 * @param name The element name. 309 * @param atts The attributes attached to the element, if any. 310 * @see #endElement 311 * @see org.xml.sax.Attributes 312 */ 313 public void startElement( 314 String ns, String localName, String name, Attributes atts) 315 throws org.xml.sax.SAXException 316 { 317 318 Element elem; 319 320 // Note that the namespace-aware call must be used to correctly 321 // construct a Level 2 DOM, even for non-namespaced nodes. 322 if ((null == ns) || (ns.length() == 0)) 323 elem = m_doc.createElementNS(null,name); 324 else 325 elem = m_doc.createElementNS(ns, name); 326 327 append(elem); 328 329 try 330 { 331 int nAtts = atts.getLength(); 332 333 if (0 != nAtts) 334 { 335 for (int i = 0; i < nAtts; i++) 336 { 337 338 //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) ); 339 // First handle a possible ID attribute 340 if (atts.getType(i).equalsIgnoreCase("ID")) 341 setIDAttribute(atts.getValue(i), elem); 342 343 String attrNS = atts.getURI(i); 344 345 if("".equals(attrNS)) 346 attrNS = null; // DOM represents no-namespace as null 347 348 // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i) 349 // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i)); 350 // Crimson won't let us set an xmlns: attribute on the DOM. 351 String attrQName = atts.getQName(i); 352 353 // In SAX, xmlns[:] attributes have an empty namespace, while in DOM they 354 // should have the xmlns namespace 355 if (attrQName.startsWith("xmlns:") || attrQName.equals("xmlns")) { 356 attrNS = "http://www.w3.org/2000/xmlns/"; 357 } 358 359 // ALWAYS use the DOM Level 2 call! 360 elem.setAttributeNS(attrNS,attrQName, atts.getValue(i)); 361 } 362 } 363 364 // append(elem); 365 366 m_elemStack.push(elem); 367 368 m_currentNode = elem; 369 370 // append(elem); 371 } 372 catch(java.lang.Exception de) 373 { 374 // de.printStackTrace(); 375 throw new org.xml.sax.SAXException(de); 376 } 377 378 } 379 380 /** 381 382 383 384 * Receive notification of the end of an element. 385 * 386 * <p>The SAX parser will invoke this method at the end of every 387 * element in the XML document; there will be a corresponding 388 * startElement() event for every endElement() event (even when the 389 * element is empty).</p> 390 * 391 * <p>If the element name has a namespace prefix, the prefix will 392 * still be attached to the name.</p> 393 * 394 * 395 * @param ns the namespace of the element 396 * @param localName The local part of the qualified name of the element 397 * @param name The element name 398 */ 399 public void endElement(String ns, String localName, String name) 400 throws org.xml.sax.SAXException 401 { 402 m_elemStack.pop(); 403 m_currentNode = m_elemStack.isEmpty() ? null : m_elemStack.peek(); 404 } 405 406 /** 407 * Set an ID string to node association in the ID table. 408 * 409 * @param id The ID string. 410 * @param elem The associated ID. 411 */ 412 public void setIDAttribute(String id, Element elem) 413 { 414 415 // Do nothing. This method is meant to be overiden. 416 } 417 418 /** 419 * Receive notification of character data. 420 * 421 * <p>The Parser will call this method to report each chunk of 422 * character data. SAX parsers may return all contiguous character 423 * data in a single chunk, or they may split it into several 424 * chunks; however, all of the characters in any single event 425 * must come from the same external entity, so that the Locator 426 * provides useful information.</p> 427 * 428 * <p>The application must not attempt to read from the array 429 * outside of the specified range.</p> 430 * 431 * <p>Note that some parsers will report whitespace using the 432 * ignorableWhitespace() method rather than this one (validating 433 * parsers must do so).</p> 434 * 435 * @param ch The characters from the XML document. 436 * @param start The start position in the array. 437 * @param length The number of characters to read from the array. 438 * @see #ignorableWhitespace 439 * @see org.xml.sax.Locator 440 */ 441 public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException 442 { 443 if(isOutsideDocElem() 444 && com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) 445 return; // avoid DOM006 Hierarchy request error 446 447 if (m_inCData) 448 { 449 cdata(ch, start, length); 450 451 return; 452 } 453 454 String s = new String(ch, start, length); 455 Node childNode; 456 childNode = m_currentNode != null ? m_currentNode.getLastChild(): null; 457 if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){ 458 ((Text)childNode).appendData(s); 459 } 460 else{ 461 Text text = m_doc.createTextNode(s); 462 append(text); 463 } 464 } 465 466 /** 467 * If available, when the disable-output-escaping attribute is used, 468 * output raw text without escaping. A PI will be inserted in front 469 * of the node with the name "lotusxsl-next-is-raw" and a value of 470 * "formatter-to-dom". 471 * 472 * @param ch Array containing the characters 473 * @param start Index to start of characters in the array 474 * @param length Number of characters in the array 475 */ 476 public void charactersRaw(char ch[], int start, int length) 477 throws org.xml.sax.SAXException 478 { 479 if(isOutsideDocElem() 480 && com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) 481 return; // avoid DOM006 Hierarchy request error 482 483 484 String s = new String(ch, start, length); 485 486 append(m_doc.createProcessingInstruction("xslt-next-is-raw", 487 "formatter-to-dom")); 488 append(m_doc.createTextNode(s)); 489 } 490 491 /** 492 * Report the beginning of an entity. 493 * 494 * The start and end of the document entity are not reported. 495 * The start and end of the external DTD subset are reported 496 * using the pseudo-name "[dtd]". All other events must be 497 * properly nested within start/end entity events. 498 * 499 * @param name The name of the entity. If it is a parameter 500 * entity, the name will begin with '%'. 501 * @see #endEntity 502 * @see org.xml.sax.ext.DeclHandler#internalEntityDecl 503 * @see org.xml.sax.ext.DeclHandler#externalEntityDecl 504 */ 505 public void startEntity(String name) throws org.xml.sax.SAXException 506 { 507 508 // Almost certainly the wrong behavior... 509 // entityReference(name); 510 } 511 512 /** 513 * Report the end of an entity. 514 * 515 * @param name The name of the entity that is ending. 516 * @see #startEntity 517 */ 518 public void endEntity(String name) throws org.xml.sax.SAXException{} 519 520 /** 521 * Receive notivication of a entityReference. 522 * 523 * @param name name of the entity reference 524 */ 525 public void entityReference(String name) throws org.xml.sax.SAXException 526 { 527 append(m_doc.createEntityReference(name)); 528 } 529 530 /** 531 * Receive notification of ignorable whitespace in element content. 532 * 533 * <p>Validating Parsers must use this method to report each chunk 534 * of ignorable whitespace (see the W3C XML 1.0 recommendation, 535 * section 2.10): non-validating parsers may also use this method 536 * if they are capable of parsing and using content models.</p> 537 * 538 * <p>SAX parsers may return all contiguous whitespace in a single 539 * chunk, or they may split it into several chunks; however, all of 540 * the characters in any single event must come from the same 541 * external entity, so that the Locator provides useful 542 * information.</p> 543 * 544 * <p>The application must not attempt to read from the array 545 * outside of the specified range.</p> 546 * 547 * @param ch The characters from the XML document. 548 * @param start The start position in the array. 549 * @param length The number of characters to read from the array. 550 * @see #characters 551 */ 552 public void ignorableWhitespace(char ch[], int start, int length) 553 throws org.xml.sax.SAXException 554 { 555 if(isOutsideDocElem()) 556 return; // avoid DOM006 Hierarchy request error 557 558 String s = new String(ch, start, length); 559 560 append(m_doc.createTextNode(s)); 561 } 562 563 /** 564 * Tell if the current node is outside the document element. 565 * 566 * @return true if the current node is outside the document element. 567 */ 568 private boolean isOutsideDocElem() 569 { 570 return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE); 571 } 572 573 /** 574 * Receive notification of a processing instruction. 575 * 576 * <p>The Parser will invoke this method once for each processing 577 * instruction found: note that processing instructions may occur 578 * before or after the main document element.</p> 579 * 580 * <p>A SAX parser should never report an XML declaration (XML 1.0, 581 * section 2.8) or a text declaration (XML 1.0, section 4.3.1) 582 * using this method.</p> 583 * 584 * @param target The processing instruction target. 585 * @param data The processing instruction data, or null if 586 * none was supplied. 587 */ 588 public void processingInstruction(String target, String data) 589 throws org.xml.sax.SAXException 590 { 591 append(m_doc.createProcessingInstruction(target, data)); 592 } 593 594 /** 595 * Report an XML comment anywhere in the document. 596 * 597 * This callback will be used for comments inside or outside the 598 * document element, including comments in the external DTD 599 * subset (if read). 600 * 601 * @param ch An array holding the characters in the comment. 602 * @param start The starting position in the array. 603 * @param length The number of characters to use from the array. 604 */ 605 public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException 606 { 607 append(m_doc.createComment(new String(ch, start, length))); 608 } 609 610 /** Flag indicating that we are processing a CData section */ 611 protected boolean m_inCData = false; 612 613 /** 614 * Report the start of a CDATA section. 615 * 616 * @see #endCDATA 617 */ 618 public void startCDATA() throws org.xml.sax.SAXException 619 { 620 m_inCData = true; 621 append(m_doc.createCDATASection("")); 622 } 623 624 /** 625 * Report the end of a CDATA section. 626 * 627 * @see #startCDATA 628 */ 629 public void endCDATA() throws org.xml.sax.SAXException 630 { 631 m_inCData = false; 632 } 633 634 /** 635 * Receive notification of cdata. 636 * 637 * <p>The Parser will call this method to report each chunk of 638 * character data. SAX parsers may return all contiguous character 639 * data in a single chunk, or they may split it into several 640 * chunks; however, all of the characters in any single event 641 * must come from the same external entity, so that the Locator 642 * provides useful information.</p> 643 * 644 * <p>The application must not attempt to read from the array 645 * outside of the specified range.</p> 646 * 647 * <p>Note that some parsers will report whitespace using the 648 * ignorableWhitespace() method rather than this one (validating 649 * parsers must do so).</p> 650 * 651 * @param ch The characters from the XML document. 652 * @param start The start position in the array. 653 * @param length The number of characters to read from the array. 654 * @see #ignorableWhitespace 655 * @see org.xml.sax.Locator 656 */ 657 public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException 658 { 659 if(isOutsideDocElem() 660 && com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) 661 return; // avoid DOM006 Hierarchy request error 662 663 String s = new String(ch, start, length); 664 665 CDATASection section =(CDATASection) m_currentNode.getLastChild(); 666 section.appendData(s); 667 } 668 669 /** 670 * Report the start of DTD declarations, if any. 671 * 672 * Any declarations are assumed to be in the internal subset 673 * unless otherwise indicated. 674 * 675 * @param name The document type name. 676 * @param publicId The declared public identifier for the 677 * external DTD subset, or null if none was declared. 678 * @param systemId The declared system identifier for the 679 * external DTD subset, or null if none was declared. 680 * @see #endDTD 681 * @see #startEntity 682 */ 683 public void startDTD(String name, String publicId, String systemId) 684 throws org.xml.sax.SAXException 685 { 686 687 // Do nothing for now. 688 } 689 690 /** 691 * Report the end of DTD declarations. 692 * 693 * @see #startDTD 694 */ 695 public void endDTD() throws org.xml.sax.SAXException 696 { 697 698 // Do nothing for now. 699 } 700 701 /** 702 * Begin the scope of a prefix-URI Namespace mapping. 703 * 704 * <p>The information from this event is not necessary for 705 * normal Namespace processing: the SAX XML reader will 706 * automatically replace prefixes for element and attribute 707 * names when the http://xml.org/sax/features/namespaces 708 * feature is true (the default).</p> 709 * 710 * <p>There are cases, however, when applications need to 711 * use prefixes in character data or in attribute values, 712 * where they cannot safely be expanded automatically; the 713 * start/endPrefixMapping event supplies the information 714 * to the application to expand prefixes in those contexts 715 * itself, if necessary.</p> 716 * 717 * <p>Note that start/endPrefixMapping events are not 718 * guaranteed to be properly nested relative to each-other: 719 * all startPrefixMapping events will occur before the 720 * corresponding startElement event, and all endPrefixMapping 721 * events will occur after the corresponding endElement event, 722 * but their order is not guaranteed.</p> 723 * 724 * @param prefix The Namespace prefix being declared. 725 * @param uri The Namespace URI the prefix is mapped to. 726 * @see #endPrefixMapping 727 * @see #startElement 728 */ 729 public void startPrefixMapping(String prefix, String uri) 730 throws org.xml.sax.SAXException 731 { 732 733 /* 734 // Not sure if this is needed or wanted 735 // Also, it fails in the stree. 736 if((null != m_currentNode) 737 && (m_currentNode.getNodeType() == Node.ELEMENT_NODE)) 738 { 739 String qname; 740 if(((null != prefix) && (prefix.length() == 0)) 741 || (null == prefix)) 742 qname = "xmlns"; 743 else 744 qname = "xmlns:"+prefix; 745 746 Element elem = (Element)m_currentNode; 747 String val = elem.getAttribute(qname); // Obsolete, should be DOM2...? 748 if(val == null) 749 { 750 elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", 751 qname, uri); 752 } 753 } 754 */ 755 } 756 757 /** 758 * End the scope of a prefix-URI mapping. 759 * 760 * <p>See startPrefixMapping for details. This event will 761 * always occur after the corresponding endElement event, 762 * but the order of endPrefixMapping events is not otherwise 763 * guaranteed.</p> 764 * 765 * @param prefix The prefix that was being mapping. 766 * @see #startPrefixMapping 767 * @see #endElement 768 */ 769 public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{} 770 771 /** 772 * Receive notification of a skipped entity. 773 * 774 * <p>The Parser will invoke this method once for each entity 775 * skipped. Non-validating processors may skip entities if they 776 * have not seen the declarations (because, for example, the 777 * entity was declared in an external DTD subset). All processors 778 * may skip external entities, depending on the values of the 779 * http://xml.org/sax/features/external-general-entities and the 780 * http://xml.org/sax/features/external-parameter-entities 781 * properties.</p> 782 * 783 * @param name The name of the skipped entity. If it is a 784 * parameter entity, the name will begin with '%'. 785 */ 786 public void skippedEntity(String name) throws org.xml.sax.SAXException{} 787 }