1 /* 2 * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. 3 * @LastModified: Sep 2017 4 */ 5 6 /* 7 * Licensed to the Apache Software Foundation (ASF) under one or more 8 * contributor license agreements. See the NOTICE file distributed with 9 * this work for additional information regarding copyright ownership. 10 * The ASF licenses this file to You under the Apache License, Version 2.0 11 * (the "License"); you may not use this file except in compliance with 12 * the License. You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 */ 22 23 package com.sun.org.apache.xerces.internal.impl; 24 25 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 26 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 27 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 28 import com.sun.org.apache.xerces.internal.util.XMLChar; 29 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 30 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 31 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 32 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 34 import com.sun.org.apache.xerces.internal.xni.Augmentations; 35 import com.sun.org.apache.xerces.internal.xni.QName; 36 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 37 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 46 import com.sun.xml.internal.stream.XMLBufferListener; 47 import com.sun.xml.internal.stream.XMLEntityStorage; 48 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 49 import java.io.EOFException; 50 import java.io.IOException; 51 import javax.xml.XMLConstants; 52 import javax.xml.stream.XMLInputFactory; 53 import javax.xml.stream.XMLStreamConstants; 54 import javax.xml.stream.events.XMLEvent; 55 import jdk.xml.internal.JdkXmlUtils; 56 import jdk.xml.internal.SecuritySupport; 57 58 /** 59 * 60 * This class is responsible for scanning the structure and content 61 * of document fragments. 62 * 63 * This class has been modified as per the new design which is more suited to 64 * efficiently build pull parser. Lot of improvements have been done and 65 * the code has been added to support stax functionality/features. 66 * 67 * @author Neeraj Bajaj SUN Microsystems 68 * @author K.Venugopal SUN Microsystems 69 * @author Glenn Marcy, IBM 70 * @author Andy Clark, IBM 71 * @author Arnaud Le Hors, IBM 72 * @author Eric Ye, IBM 73 * @author Sunitha Reddy, SUN Microsystems 74 * 75 */ 76 public class XMLDocumentFragmentScannerImpl 77 extends XMLScanner 78 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 79 80 // 81 // Constants 82 // 83 84 protected int fElementAttributeLimit, fXMLNameLimit; 85 86 /** External subset resolver. **/ 87 protected ExternalSubsetResolver fExternalSubsetResolver; 88 89 // scanner states 90 91 //XXX this should be divided into more states. 92 /** Scanner state: start of markup. */ 93 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 94 95 /** Scanner state: content. */ 96 protected static final int SCANNER_STATE_CONTENT = 22; 97 98 /** Scanner state: processing instruction. */ 99 protected static final int SCANNER_STATE_PI = 23; 100 101 /** Scanner state: DOCTYPE. */ 102 protected static final int SCANNER_STATE_DOCTYPE = 24; 103 104 /** Scanner state: XML Declaration */ 105 protected static final int SCANNER_STATE_XML_DECL = 25; 106 107 /** Scanner state: root element. */ 108 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 109 110 /** Scanner state: comment. */ 111 protected static final int SCANNER_STATE_COMMENT = 27; 112 113 /** Scanner state: reference. */ 114 protected static final int SCANNER_STATE_REFERENCE = 28; 115 116 // <book type="hard"> reading attribute name 'type' 117 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 118 119 // <book type="hard"> //reading attribute value. 120 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 121 122 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 123 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 124 125 /** Scanner state: end of input. */ 126 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 127 128 /** Scanner state: terminated. */ 129 protected static final int SCANNER_STATE_TERMINATED = 34; 130 131 /** Scanner state: CDATA section. */ 132 protected static final int SCANNER_STATE_CDATA = 35; 133 134 /** Scanner state: Text declaration. */ 135 protected static final int SCANNER_STATE_TEXT_DECL = 36; 136 137 /** Scanner state: Text declaration. */ 138 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 139 140 //<book type="hard">foo</book> 141 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 142 143 //<book type="hard">foo</book> reading </book> 144 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 145 146 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 147 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 148 149 // feature identifiers 150 151 152 /** Feature identifier: notify built-in refereces. */ 153 protected static final String NOTIFY_BUILTIN_REFS = 154 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 155 156 /** Property identifier: entity resolver. */ 157 protected static final String ENTITY_RESOLVER = 158 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 159 160 /** Feature identifier: standard uri conformant */ 161 protected static final String STANDARD_URI_CONFORMANT = 162 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 163 164 /** Property identifier: Security property manager. */ 165 private static final String XML_SECURITY_PROPERTY_MANAGER = 166 Constants.XML_SECURITY_PROPERTY_MANAGER; 167 168 /** access external dtd: file protocol 169 * For DOM/SAX, the secure feature is set to true by default 170 */ 171 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 172 173 // recognized features and properties 174 175 /** Recognized features. */ 176 private static final String[] RECOGNIZED_FEATURES = { 177 NAMESPACES, 178 VALIDATION, 179 NOTIFY_BUILTIN_REFS, 180 NOTIFY_CHAR_REFS, 181 Constants.STAX_REPORT_CDATA_EVENT, 182 XMLConstants.USE_CATALOG 183 }; 184 185 /** Feature defaults. */ 186 private static final Boolean[] FEATURE_DEFAULTS = { 187 Boolean.TRUE, 188 null, 189 Boolean.FALSE, 190 Boolean.FALSE, 191 Boolean.TRUE, 192 JdkXmlUtils.USE_CATALOG_DEFAULT 193 }; 194 195 /** Recognized properties. */ 196 private static final String[] RECOGNIZED_PROPERTIES = { 197 SYMBOL_TABLE, 198 ERROR_REPORTER, 199 ENTITY_MANAGER, 200 XML_SECURITY_PROPERTY_MANAGER, 201 JdkXmlUtils.CATALOG_DEFER, 202 JdkXmlUtils.CATALOG_FILES, 203 JdkXmlUtils.CATALOG_PREFER, 204 JdkXmlUtils.CATALOG_RESOLVE, 205 JdkXmlUtils.CDATA_CHUNK_SIZE 206 }; 207 208 /** Property defaults. */ 209 private static final Object[] PROPERTY_DEFAULTS = { 210 null, 211 null, 212 null, 213 null, 214 null, 215 null, 216 null, 217 null, 218 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT 219 }; 220 221 222 private static final char [] CDATA = {'[','C','D','A','T','A','['}; 223 static final char [] XMLDECL = {'<','?','x','m','l'}; 224 // private static final char [] endTag = {'<','/'}; 225 // debugging 226 227 /** Debug scanner state. */ 228 private static final boolean DEBUG_SCANNER_STATE = false; 229 230 /** Debug driver. */ 231 private static final boolean DEBUG_DISPATCHER = false; 232 233 /** Debug content driver scanning. */ 234 protected static final boolean DEBUG_START_END_ELEMENT = false; 235 236 /** Debug driver next */ 237 protected static final boolean DEBUG = false; 238 239 // 240 // Data 241 // 242 243 // protected data 244 245 /** Document handler. */ 246 protected XMLDocumentHandler fDocumentHandler; 247 protected int fScannerLastState ; 248 249 /** Entity Storage */ 250 protected XMLEntityStorage fEntityStore; 251 252 /** Entity stack. */ 253 protected int[] fEntityStack = new int[4]; 254 255 /** Markup depth. */ 256 protected int fMarkupDepth; 257 258 //is the element empty 259 protected boolean fEmptyElement ; 260 261 //track if we are reading attributes, this is usefule while 262 //there is a callback 263 protected boolean fReadingAttributes = false; 264 265 /** Scanner state. */ 266 protected int fScannerState; 267 268 /** SubScanner state: inside scanContent method. */ 269 protected boolean fInScanContent = false; 270 protected boolean fLastSectionWasCData = false; 271 protected boolean fCDataStart = false; 272 protected boolean fInCData = false; 273 protected boolean fCDataEnd = false; 274 protected boolean fLastSectionWasEntityReference = false; 275 protected boolean fLastSectionWasCharacterData = false; 276 277 /** has external dtd */ 278 protected boolean fHasExternalDTD; 279 280 /** Standalone. */ 281 protected boolean fStandaloneSet; 282 protected boolean fStandalone; 283 protected String fVersion; 284 285 // element information 286 287 /** Current element. */ 288 protected QName fCurrentElement; 289 290 /** Element stack. */ 291 protected ElementStack fElementStack = new ElementStack(); 292 protected ElementStack2 fElementStack2 = new ElementStack2(); 293 294 // other info 295 296 /** Document system identifier. 297 * REVISIT: So what's this used for? - NG 298 * protected String fDocumentSystemId; 299 ******/ 300 301 protected String fPITarget ; 302 303 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 304 protected XMLString fPIData = new XMLString(); 305 306 // features 307 308 309 /** Notify built-in references. */ 310 protected boolean fNotifyBuiltInRefs = false; 311 312 //STAX related properties 313 //defaultValues. 314 protected boolean fSupportDTD = true; 315 protected boolean fReplaceEntityReferences = true; 316 protected boolean fSupportExternalEntities = false; 317 protected boolean fReportCdataEvent = false ; 318 protected boolean fIsCoalesce = false ; 319 protected String fDeclaredEncoding = null; 320 /** Xerces Feature: Disallow doctype declaration. */ 321 protected boolean fDisallowDoctype = false; 322 323 /** 324 * CDATA chunk size limit 325 */ 326 private int fChunkSize; 327 328 /** 329 * comma-delimited list of protocols that are allowed for the purpose 330 * of accessing external dtd or entity references 331 */ 332 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 333 334 /** 335 * standard uri conformant (strict uri). 336 * http://apache.org/xml/features/standard-uri-conformant 337 */ 338 protected boolean fStrictURI; 339 340 // drivers 341 342 /** Active driver. */ 343 protected Driver fDriver; 344 345 /** Content driver. */ 346 protected Driver fContentDriver = createContentDriver(); 347 348 // temporary variables 349 350 /** Element QName. */ 351 protected QName fElementQName = new QName(); 352 353 /** Attribute QName. */ 354 protected QName fAttributeQName = new QName(); 355 356 /** 357 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 358 * implements Iterator interface so we can directly give Attributes in the form of 359 * iterator. 360 */ 361 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 362 363 364 /** String. */ 365 protected XMLString fTempString = new XMLString(); 366 367 /** String. */ 368 protected XMLString fTempString2 = new XMLString(); 369 370 /** Array of 3 strings. */ 371 private final String[] fStrings = new String[3]; 372 373 /** Making the buffer accessible to derived class -- String buffer. */ 374 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 375 376 /** Making the buffer accessible to derived class -- String buffer. */ 377 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 378 379 /** stores character data. */ 380 /** Making the buffer accessible to derived class -- stores PI data */ 381 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 382 383 /** Single character array. */ 384 private final char[] fSingleChar = new char[1]; 385 private String fCurrentEntityName = null; 386 387 // New members 388 protected boolean fScanToEnd = false; 389 390 protected DTDGrammarUtil dtdGrammarUtil= null; 391 392 protected boolean fAddDefaultAttr = false; 393 394 protected boolean foundBuiltInRefs = false; 395 396 /** Built-in reference character event */ 397 protected boolean builtInRefCharacterHandled = false; 398 399 //skip element algorithm 400 static final short MAX_DEPTH_LIMIT = 5 ; 401 static final short ELEMENT_ARRAY_LENGTH = 200 ; 402 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 403 static final boolean DEBUG_SKIP_ALGORITHM = false; 404 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 405 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 406 //pointer location where last element was skipped 407 short fLastPointerLocation = 0 ; 408 short fElementPointer = 0 ; 409 //2D array to store pointer info 410 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 411 protected String fElementRawname ; 412 protected boolean fShouldSkip = false; 413 protected boolean fAdd = false ; 414 protected boolean fSkip = false; 415 416 /** Reusable Augmentations. */ 417 private Augmentations fTempAugmentations = null; 418 // 419 // Constructors 420 // 421 422 /** Default constructor. */ 423 public XMLDocumentFragmentScannerImpl() { 424 } // <init>() 425 426 // 427 // XMLDocumentScanner methods 428 // 429 430 /** 431 * Sets the input source. 432 * 433 * @param inputSource The input source. 434 * 435 * @throws IOException Thrown on i/o error. 436 */ 437 public void setInputSource(XMLInputSource inputSource) throws IOException { 438 fEntityManager.setEntityHandler(this); 439 fEntityManager.startEntity(false, "$fragment$", inputSource, false, true); 440 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 441 } // setInputSource(XMLInputSource) 442 443 /** 444 * Scans a document. 445 * 446 * @param complete True if the scanner should scan the document 447 * completely, pushing all events to the registered 448 * document handler. A value of false indicates that 449 * that the scanner should only scan the next portion 450 * of the document and return. A scanner instance is 451 * permitted to completely scan a document if it does 452 * not support this "pull" scanning model. 453 * 454 * @return True if there is more to scan, false otherwise. 455 */ 456 public boolean scanDocument(boolean complete) 457 throws IOException, XNIException { 458 459 // keep dispatching "events" 460 fEntityManager.setEntityHandler(this); 461 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 462 463 int event = next(); 464 do { 465 switch (event) { 466 case XMLStreamConstants.START_DOCUMENT : 467 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 468 break; 469 case XMLStreamConstants.START_ELEMENT : 470 //System.out.println(" in scann element"); 471 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 472 break; 473 case XMLStreamConstants.CHARACTERS : 474 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 475 fDocumentHandler.characters(getCharacterData(),null); 476 break; 477 case XMLStreamConstants.SPACE: 478 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 479 //System.out.println("in the space"); 480 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 481 break; 482 case XMLStreamConstants.ENTITY_REFERENCE : 483 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 484 //entity reference callback are given in startEntity 485 break; 486 case XMLStreamConstants.PROCESSING_INSTRUCTION : 487 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 488 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 489 break; 490 case XMLStreamConstants.COMMENT : 491 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 492 fDocumentHandler.comment(getCharacterData(),null); 493 break; 494 case XMLStreamConstants.DTD : 495 //all DTD related callbacks are handled in DTDScanner. 496 //1. Stax doesn't define DTD states as it does for XML Document. 497 //therefore we don't need to take care of anything here. So Just break; 498 break; 499 case XMLStreamConstants.CDATA: 500 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 501 if (fCDataStart) { 502 fDocumentHandler.startCDATA(null); 503 fCDataStart = false; 504 fInCData = true; 505 } 506 507 fDocumentHandler.characters(getCharacterData(),null); 508 if (fCDataEnd) { 509 fDocumentHandler.endCDATA(null); 510 fCDataEnd = false; 511 } 512 break; 513 case XMLStreamConstants.NOTATION_DECLARATION : 514 break; 515 case XMLStreamConstants.ENTITY_DECLARATION : 516 break; 517 case XMLStreamConstants.NAMESPACE : 518 break; 519 case XMLStreamConstants.ATTRIBUTE : 520 break; 521 case XMLStreamConstants.END_ELEMENT : 522 //do not give callback here. 523 //this callback is given in scanEndElement function. 524 //fDocumentHandler.endElement(getElementQName(),null); 525 break; 526 default : 527 // Errors should have already been handled by the Scanner 528 return false; 529 530 } 531 //System.out.println("here in before calling next"); 532 event = next(); 533 //System.out.println("here in after calling next"); 534 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 535 536 if(event == XMLStreamConstants.END_DOCUMENT) { 537 fDocumentHandler.endDocument(null); 538 return false; 539 } 540 541 return true; 542 543 } // scanDocument(boolean):boolean 544 545 546 547 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 548 if(fScannerLastState == XMLEvent.END_ELEMENT){ 549 fElementQName.setValues(fElementStack.getLastPoppedElement()); 550 } 551 return fElementQName ; 552 } 553 554 /** return the next state on the input 555 * @return int 556 */ 557 558 public int next() throws IOException, XNIException { 559 return fDriver.next(); 560 } 561 562 // 563 // XMLComponent methods 564 // 565 566 /** 567 * Resets the component. The component can query the component manager 568 * about any features and properties that affect the operation of the 569 * component. 570 * 571 * @param componentManager The component manager. 572 * 573 * @throws SAXException Thrown by component on initialization error. 574 * For example, if a feature or property is 575 * required for the operation of the component, the 576 * component manager may throw a 577 * SAXNotRecognizedException or a 578 * SAXNotSupportedException. 579 */ 580 581 public void reset(XMLComponentManager componentManager) 582 throws XMLConfigurationException { 583 584 super.reset(componentManager); 585 586 // other settings 587 // fDocumentSystemId = null; 588 589 // sax features 590 //fAttributes.setNamespaces(fNamespaces); 591 592 // xerces features 593 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 594 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 595 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 596 597 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 598 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 599 (ExternalSubsetResolver) resolver : null; 600 601 //attribute 602 fReadingAttributes = false; 603 //xxx: external entities are supported in Xerces 604 // it would be good to define feature for this case 605 fSupportExternalEntities = true; 606 fReplaceEntityReferences = true; 607 fIsCoalesce = false; 608 609 // setup Driver 610 setScannerState(SCANNER_STATE_CONTENT); 611 setDriver(fContentDriver); 612 613 // JAXP 1.5 features and properties 614 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 615 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 616 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 617 618 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 619 fChunkSize = JdkXmlUtils.getValue(componentManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 620 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 621 622 resetCommon(); 623 //fEntityManager.test(); 624 } // reset(XMLComponentManager) 625 626 627 public void reset(PropertyManager propertyManager){ 628 629 super.reset(propertyManager); 630 631 // other settings 632 // fDocumentSystemId = null; 633 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)); 634 fNotifyBuiltInRefs = false ; 635 636 //fElementStack2.clear(); 637 //fReplaceEntityReferences = true; 638 //fSupportExternalEntities = true; 639 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 640 fReplaceEntityReferences = bo; 641 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 642 fSupportExternalEntities = bo; 643 Boolean cdata = (Boolean)propertyManager.getProperty( 644 Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 645 if(cdata != null) 646 fReportCdataEvent = cdata ; 647 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 648 if(coalesce != null) 649 fIsCoalesce = coalesce; 650 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 651 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 652 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 653 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 654 // setup Driver 655 //we dont need to do this -- nb. 656 //setScannerState(SCANNER_STATE_CONTENT); 657 //setDriver(fContentDriver); 658 //fEntityManager.test(); 659 660 // JAXP 1.5 features and properties 661 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 662 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 663 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 664 665 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 666 fChunkSize = JdkXmlUtils.getValue(propertyManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 667 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 668 resetCommon(); 669 } // reset(XMLComponentManager) 670 671 void resetCommon() { 672 // initialize vars 673 fMarkupDepth = 0; 674 fCurrentElement = null; 675 fElementStack.clear(); 676 fHasExternalDTD = false; 677 fStandaloneSet = false; 678 fStandalone = false; 679 fInScanContent = false; 680 //skipping algorithm 681 fShouldSkip = false; 682 fAdd = false; 683 fSkip = false; 684 685 fEntityStore = fEntityManager.getEntityStore(); 686 dtdGrammarUtil = null; 687 688 if (fSecurityManager != null) { 689 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 690 fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT); 691 } else { 692 fElementAttributeLimit = 0; 693 fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue(); 694 } 695 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 696 } 697 698 /** 699 * Returns a list of feature identifiers that are recognized by 700 * this component. This method may return null if no features 701 * are recognized by this component. 702 */ 703 public String[] getRecognizedFeatures() { 704 return RECOGNIZED_FEATURES.clone(); 705 } // getRecognizedFeatures():String[] 706 707 /** 708 * Sets the state of a feature. This method is called by the component 709 * manager any time after reset when a feature changes state. 710 * <p> 711 * <strong>Note:</strong> Components should silently ignore features 712 * that do not affect the operation of the component. 713 * 714 * @param featureId The feature identifier. 715 * @param state The state of the feature. 716 * 717 * @throws SAXNotRecognizedException The component should not throw 718 * this exception. 719 * @throws SAXNotSupportedException The component should not throw 720 * this exception. 721 */ 722 public void setFeature(String featureId, boolean state) 723 throws XMLConfigurationException { 724 725 super.setFeature(featureId, state); 726 727 // Xerces properties 728 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 729 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 730 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 731 fNotifyBuiltInRefs = state; 732 } 733 } 734 735 } // setFeature(String,boolean) 736 737 /** 738 * Returns a list of property identifiers that are recognized by 739 * this component. This method may return null if no properties 740 * are recognized by this component. 741 */ 742 public String[] getRecognizedProperties() { 743 return RECOGNIZED_PROPERTIES.clone(); 744 } // getRecognizedProperties():String[] 745 746 /** 747 * Sets the value of a property. This method is called by the component 748 * manager any time after reset when a property changes value. 749 * <p> 750 * <strong>Note:</strong> Components should silently ignore properties 751 * that do not affect the operation of the component. 752 * 753 * @param propertyId The property identifier. 754 * @param value The value of the property. 755 * 756 * @throws SAXNotRecognizedException The component should not throw 757 * this exception. 758 * @throws SAXNotSupportedException The component should not throw 759 * this exception. 760 */ 761 public void setProperty(String propertyId, Object value) 762 throws XMLConfigurationException { 763 764 super.setProperty(propertyId, value); 765 766 // Xerces properties 767 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 768 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 769 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 770 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 771 fEntityManager = (XMLEntityManager)value; 772 return; 773 } 774 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 775 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 776 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 777 (ExternalSubsetResolver) value : null; 778 return; 779 } 780 } 781 782 783 // Xerces properties 784 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 785 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 786 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 787 fEntityManager = (XMLEntityManager)value; 788 } 789 return; 790 } 791 792 //JAXP 1.5 properties 793 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 794 { 795 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 796 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 797 } 798 799 } // setProperty(String,Object) 800 801 /** 802 * Returns the default state for a feature, or null if this 803 * component does not want to report a default value for this 804 * feature. 805 * 806 * @param featureId The feature identifier. 807 * 808 * @since Xerces 2.2.0 809 */ 810 public Boolean getFeatureDefault(String featureId) { 811 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 812 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 813 return FEATURE_DEFAULTS[i]; 814 } 815 } 816 return null; 817 } // getFeatureDefault(String):Boolean 818 819 /** 820 * Returns the default state for a property, or null if this 821 * component does not want to report a default value for this 822 * property. 823 * 824 * @param propertyId The property identifier. 825 * 826 * @since Xerces 2.2.0 827 */ 828 public Object getPropertyDefault(String propertyId) { 829 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 830 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 831 return PROPERTY_DEFAULTS[i]; 832 } 833 } 834 return null; 835 } // getPropertyDefault(String):Object 836 837 // 838 // XMLDocumentSource methods 839 // 840 841 /** 842 * setDocumentHandler 843 * 844 * @param documentHandler 845 */ 846 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 847 fDocumentHandler = documentHandler; 848 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 849 } // setDocumentHandler(XMLDocumentHandler) 850 851 852 /** Returns the document handler */ 853 public XMLDocumentHandler getDocumentHandler(){ 854 return fDocumentHandler; 855 } 856 857 // 858 // XMLEntityHandler methods 859 // 860 861 /** 862 * This method notifies of the start of an entity. The DTD has the 863 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 864 * general entities are just specified by their name. 865 * 866 * @param name The name of the entity. 867 * @param identifier The resource identifier. 868 * @param encoding The auto-detected IANA encoding name of the entity 869 * stream. This value will be null in those situations 870 * where the entity encoding is not auto-detected (e.g. 871 * internal entities or a document entity that is 872 * parsed from a java.io.Reader). 873 * @param augs Additional information that may include infoset augmentations 874 * 875 * @throws XNIException Thrown by handler to signal an error. 876 */ 877 public void startEntity(String name, 878 XMLResourceIdentifier identifier, 879 String encoding, Augmentations augs) throws XNIException { 880 881 // keep track of this entity before fEntityDepth is increased 882 if (fEntityDepth == fEntityStack.length) { 883 int[] entityarray = new int[fEntityStack.length * 2]; 884 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 885 fEntityStack = entityarray; 886 } 887 fEntityStack[fEntityDepth] = fMarkupDepth; 888 889 super.startEntity(name, identifier, encoding, augs); 890 891 // WFC: entity declared in external subset in standalone doc 892 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 893 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 894 new Object[]{name}); 895 } 896 897 /** we are not calling the handlers yet.. */ 898 // call handler 899 if (fDocumentHandler != null && !fScanningAttribute) { 900 if (!name.equals("[xml]")) { 901 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 902 } 903 } 904 905 } // startEntity(String,XMLResourceIdentifier,String) 906 907 /** 908 * This method notifies the end of an entity. The DTD has the pseudo-name 909 * of "[dtd]" parameter entity names start with '%'; and general entities 910 * are just specified by their name. 911 * 912 * @param name The name of the entity. 913 * @param augs Additional information that may include infoset augmentations 914 * 915 * @throws XNIException Thrown by handler to signal an error. 916 */ 917 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 918 919 /** 920 * // flush possible pending output buffer - see scanContent 921 * if (fInScanContent && fStringBuffer.length != 0 922 * && fDocumentHandler != null) { 923 * fDocumentHandler.characters(fStringBuffer, null); 924 * fStringBuffer.length = 0; // make sure we know it's been flushed 925 * } 926 */ 927 super.endEntity(name, augs); 928 929 // make sure markup is properly balanced 930 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 931 reportFatalError("MarkupEntityMismatch", null); 932 } 933 934 /**/ 935 // call handler 936 if (fDocumentHandler != null && !fScanningAttribute) { 937 if (!name.equals("[xml]")) { 938 fDocumentHandler.endGeneralEntity(name, augs); 939 } 940 } 941 942 943 } // endEntity(String) 944 945 // 946 // Protected methods 947 // 948 949 // Driver factory methods 950 951 /** Creates a content Driver. */ 952 protected Driver createContentDriver() { 953 return new FragmentContentDriver(); 954 } // createContentDriver():Driver 955 956 // scanning methods 957 958 /** 959 * Scans an XML or text declaration. 960 * <p> 961 * <pre> 962 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 963 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 964 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 965 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 966 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 967 * | ('"' ('yes' | 'no') '"')) 968 * 969 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 970 * </pre> 971 * 972 * @param scanningTextDecl True if a text declaration is to 973 * be scanned instead of an XML 974 * declaration. 975 */ 976 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 977 throws IOException, XNIException { 978 979 // scan decl 980 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 981 fMarkupDepth--; 982 983 // pseudo-attribute values 984 String version = fStrings[0]; 985 String encoding = fStrings[1]; 986 String standalone = fStrings[2]; 987 fDeclaredEncoding = encoding; 988 // set standalone 989 fStandaloneSet = standalone != null; 990 fStandalone = fStandaloneSet && standalone.equals("yes"); 991 ///xxx see where its used.. this is not used anywhere. 992 //it may be useful for entity to store this information 993 //but this information is only related with Document Entity. 994 fEntityManager.setStandalone(fStandalone); 995 996 997 // call handler 998 if (fDocumentHandler != null) { 999 if (scanningTextDecl) { 1000 fDocumentHandler.textDecl(version, encoding, null); 1001 } else { 1002 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 1003 } 1004 } 1005 1006 if(version != null){ 1007 fEntityScanner.setVersion(version); 1008 fEntityScanner.setXMLVersion(version); 1009 } 1010 // set encoding on reader, only if encoding was not specified by the application explicitly 1011 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 1012 fEntityScanner.setEncoding(encoding); 1013 } 1014 1015 } // scanXMLDeclOrTextDecl(boolean) 1016 1017 public String getPITarget(){ 1018 return fPITarget ; 1019 } 1020 1021 public XMLStringBuffer getPIData(){ 1022 return fContentBuffer ; 1023 } 1024 1025 //XXX: why not this function behave as per the state of the parser? 1026 public XMLString getCharacterData(){ 1027 if(fUsebuffer){ 1028 return fContentBuffer ; 1029 }else{ 1030 return fTempString; 1031 } 1032 1033 } 1034 1035 1036 /** 1037 * Scans a processing data. This is needed to handle the situation 1038 * where a document starts with a processing instruction whose 1039 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1040 * 1041 * @param target The PI target 1042 * @param data The XMLStringBuffer to fill in with the data 1043 */ 1044 protected void scanPIData(String target, XMLStringBuffer data) 1045 throws IOException, XNIException { 1046 1047 super.scanPIData(target, data); 1048 1049 //set the PI target and values 1050 fPITarget = target ; 1051 1052 fMarkupDepth--; 1053 1054 } // scanPIData(String) 1055 1056 /** 1057 * Scans a comment. 1058 * <p> 1059 * <pre> 1060 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1061 * </pre> 1062 * <p> 1063 * <strong>Note:</strong> Called after scanning past '<!--' 1064 */ 1065 protected void scanComment() throws IOException, XNIException { 1066 fContentBuffer.clear(); 1067 scanComment(fContentBuffer); 1068 //getTextCharacters can also be called for reading comments 1069 fUsebuffer = true; 1070 fMarkupDepth--; 1071 1072 } // scanComment() 1073 1074 //xxx value returned by this function may not remain valid if another event is scanned. 1075 public String getComment(){ 1076 return fContentBuffer.toString(); 1077 } 1078 1079 void addElement(String rawname){ 1080 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1081 //storing element raw name in a linear list of array 1082 fElementArray[fElementPointer] = rawname ; 1083 //storing elemnetPointer for particular element depth 1084 1085 if(DEBUG_SKIP_ALGORITHM){ 1086 StringBuffer sb = new StringBuffer() ; 1087 sb.append(" Storing element information ") ; 1088 sb.append(" fElementPointer = " + fElementPointer) ; 1089 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1090 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1091 System.out.println(sb.toString()) ; 1092 } 1093 1094 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1095 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1096 short column = storePointerForADepth(fElementPointer); 1097 if(column > 0){ 1098 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1099 //identity comparison shouldn't take much time and we can rely on this 1100 //since its guaranteed to have same object id for same string. 1101 if(rawname == fElementArray[pointer]){ 1102 fShouldSkip = true ; 1103 fLastPointerLocation = pointer ; 1104 //reset the things and return. 1105 resetPointer((short)fElementStack.fDepth , column) ; 1106 fElementArray[fElementPointer] = null ; 1107 return ; 1108 }else{ 1109 fShouldSkip = false ; 1110 } 1111 } 1112 } 1113 fElementPointer++ ; 1114 } 1115 } 1116 1117 1118 void resetPointer(short depth, short column){ 1119 fPointerInfo[depth] [column] = (short)0; 1120 } 1121 1122 //returns column information at which pointer was stored. 1123 short storePointerForADepth(short elementPointer){ 1124 short depth = (short) fElementStack.fDepth ; 1125 1126 //Stores element pointer locations at particular depth , only 4 pointer locations 1127 //are stored at particular depth for now. 1128 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1129 1130 if(canStore(depth, i)){ 1131 fPointerInfo[depth][i] = elementPointer ; 1132 if(DEBUG_SKIP_ALGORITHM){ 1133 StringBuffer sb = new StringBuffer() ; 1134 sb.append(" Pointer information ") ; 1135 sb.append(" fElementPointer = " + fElementPointer) ; 1136 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1137 sb.append(" column = " + i ) ; 1138 System.out.println(sb.toString()) ; 1139 } 1140 return i; 1141 } 1142 //else 1143 //pointer was not stored because we reached the limit 1144 } 1145 return -1 ; 1146 } 1147 1148 boolean canStore(short depth, short column){ 1149 //colum = 0 , means first element at particular depth 1150 //column = 1, means second element at particular depth 1151 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1152 return fPointerInfo[depth][column] == 0 ? true : false ; 1153 } 1154 1155 1156 short getElementPointer(short depth, short column){ 1157 //colum = 0 , means first element at particular depth 1158 //column = 1, means second element at particular depth 1159 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1160 return fPointerInfo[depth][column] ; 1161 } 1162 1163 //this function assumes that string passed is not null and skips 1164 //the following string from the buffer this makes sure 1165 boolean skipFromTheBuffer(String rawname) throws IOException{ 1166 if(fEntityScanner.skipString(rawname)){ 1167 char c = (char)fEntityScanner.peekChar() ; 1168 //If the start element was completely skipped we should encounter either ' '(space), 1169 //or '/' (in case of empty element) or '>' 1170 if( c == ' ' || c == '/' || c == '>'){ 1171 fElementRawname = rawname ; 1172 return true ; 1173 } else{ 1174 return false; 1175 } 1176 } else 1177 return false ; 1178 } 1179 1180 boolean skipQElement(String rawname) throws IOException{ 1181 1182 final int c = fEntityScanner.getChar(rawname.length()); 1183 //if this character is still valid element name -- this means string can't match 1184 if(XMLChar.isName(c)){ 1185 return false; 1186 }else{ 1187 return fEntityScanner.skipString(rawname); 1188 } 1189 } 1190 1191 protected boolean skipElement() throws IOException { 1192 1193 if(!fShouldSkip) return false ; 1194 1195 if(fLastPointerLocation != 0){ 1196 //Look at the next element stored in the array list.. we might just get a match. 1197 String rawname = fElementArray[fLastPointerLocation + 1] ; 1198 if(rawname != null && skipFromTheBuffer(rawname)){ 1199 fLastPointerLocation++ ; 1200 if(DEBUG_SKIP_ALGORITHM){ 1201 System.out.println("Element " + fElementRawname + 1202 " was SKIPPED at pointer location = " + fLastPointerLocation); 1203 } 1204 return true ; 1205 } else{ 1206 //reset it back to zero... we haven't got the correct subset yet. 1207 fLastPointerLocation = 0 ; 1208 1209 } 1210 } 1211 //xxx: we can put some logic here as from what column it should start looking 1212 //for now we always start at 0 1213 //fallback to tolerant algorithm, it would look for differnt element stored at different 1214 //depth and get us the pointer location. 1215 return fShouldSkip && skipElement((short)0); 1216 1217 } 1218 1219 //start of the column at which it should try searching 1220 boolean skipElement(short column) throws IOException { 1221 short depth = (short)fElementStack.fDepth ; 1222 1223 if(depth > MAX_DEPTH_LIMIT){ 1224 return fShouldSkip = false ; 1225 } 1226 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1227 short pointer = getElementPointer(depth , i ) ; 1228 1229 if(pointer == 0){ 1230 return fShouldSkip = false ; 1231 } 1232 1233 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1234 if(DEBUG_SKIP_ALGORITHM){ 1235 System.out.println(); 1236 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + 1237 fElementStack.fDepth + " column = " + column ); 1238 System.out.println(); 1239 } 1240 fLastPointerLocation = pointer ; 1241 return fShouldSkip = true ; 1242 } 1243 } 1244 return fShouldSkip = false ; 1245 } 1246 1247 /** 1248 * Scans a start element. This method will handle the binding of 1249 * namespace information and notifying the handler of the start 1250 * of the element. 1251 * <p> 1252 * <pre> 1253 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1254 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1255 * </pre> 1256 * <p> 1257 * <strong>Note:</strong> This method assumes that the leading 1258 * '<' character has been consumed. 1259 * <p> 1260 * <strong>Note:</strong> This method uses the fElementQName and 1261 * fAttributes variables. The contents of these variables will be 1262 * destroyed. The caller should copy important information out of 1263 * these variables before calling this method. 1264 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1265 * 1266 * @return True if element is empty. (i.e. It matches 1267 * production [44]. 1268 */ 1269 // fElementQName will have the details of element just read.. 1270 // fAttributes will have the details of all the attributes. 1271 protected boolean scanStartElement() 1272 throws IOException, XNIException { 1273 1274 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1275 //when skipping is true and no more elements should be added 1276 if(fSkip && !fAdd){ 1277 //get the stored element -- if everything goes right this should match the 1278 //token in the buffer 1279 1280 QName name = fElementStack.getNext(); 1281 1282 if(DEBUG_SKIP_ALGORITHM){ 1283 System.out.println("Trying to skip String = " + name.rawname); 1284 } 1285 1286 //Be conservative -- if skipping fails -- stop. 1287 fSkip = fEntityScanner.skipString(name.rawname); 1288 1289 if(fSkip){ 1290 if(DEBUG_SKIP_ALGORITHM){ 1291 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1292 } 1293 fElementStack.push(); 1294 fElementQName = name; 1295 }else{ 1296 //if skipping fails reposition the stack or fallback to normal way of processing 1297 fElementStack.reposition(); 1298 if(DEBUG_SKIP_ALGORITHM){ 1299 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1300 } 1301 } 1302 } 1303 1304 //we are still at the stage of adding elements 1305 //the elements were not matched or 1306 //fSkip is not set to true 1307 if(!fSkip || fAdd){ 1308 //get the next element from the stack 1309 fElementQName = fElementStack.nextElement(); 1310 // name 1311 if (fNamespaces) { 1312 fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART); 1313 } else { 1314 String name = fEntityScanner.scanName(NameType.ELEMENTSTART); 1315 fElementQName.setValues(null, name, name, null); 1316 } 1317 1318 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1319 if(DEBUG_SKIP_ALGORITHM){ 1320 if(fAdd){ 1321 System.out.println("Elements are being ADDED -- elemet added is = " + 1322 fElementQName.rawname + " at count = " + fElementStack.fCount); 1323 } 1324 } 1325 1326 } 1327 1328 //when the elements are being added , we need to check if we are set for skipping the elements 1329 if(fAdd){ 1330 //this sets the value of fAdd variable 1331 fElementStack.matchElement(fElementQName); 1332 } 1333 1334 1335 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1336 fCurrentElement = fElementQName; 1337 1338 String rawname = fElementQName.rawname; 1339 1340 fEmptyElement = false; 1341 1342 fAttributes.removeAllAttributes(); 1343 1344 checkDepth(rawname); 1345 if(!seekCloseOfStartTag()){ 1346 fReadingAttributes = true; 1347 fAttributeCacheUsedCount =0; 1348 fStringBufferIndex =0; 1349 fAddDefaultAttr = true; 1350 do { 1351 scanAttribute(fAttributes); 1352 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1353 fAttributes.getLength() > fElementAttributeLimit){ 1354 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1355 "ElementAttributeLimit", 1356 new Object[]{rawname, fElementAttributeLimit }, 1357 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1358 } 1359 1360 } while (!seekCloseOfStartTag()); 1361 fReadingAttributes=false; 1362 } 1363 1364 if (fEmptyElement) { 1365 //decrease the markup depth.. 1366 fMarkupDepth--; 1367 1368 // check that this element was opened in the same entity 1369 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1370 reportFatalError("ElementEntityMismatch", 1371 new Object[]{fCurrentElement.rawname}); 1372 } 1373 // call handler 1374 if (fDocumentHandler != null) { 1375 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1376 } 1377 1378 //We should not be popping out the context here in endELement becaause the namespace context is still 1379 //valid when parser is at the endElement state. 1380 //if (fNamespaces) { 1381 // fNamespaceContext.popContext(); 1382 //} 1383 1384 //pop the element off the stack.. 1385 fElementStack.popElement(); 1386 1387 } else { 1388 1389 if(dtdGrammarUtil != null) 1390 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1391 if(fDocumentHandler != null){ 1392 //complete element and attributes are traversed in this function so we can send a callback 1393 //here. 1394 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1395 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1396 } 1397 } 1398 1399 1400 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + 1401 "<<< scanStartElement(): "+fEmptyElement); 1402 return fEmptyElement; 1403 1404 } // scanStartElement():boolean 1405 1406 /** 1407 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1408 * Characters are consumed. 1409 */ 1410 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1411 // spaces 1412 boolean sawSpace = fEntityScanner.skipSpaces(); 1413 1414 // end tag? 1415 final int c = fEntityScanner.peekChar(); 1416 if (c == '>') { 1417 fEntityScanner.scanChar(null); 1418 return true; 1419 } else if (c == '/') { 1420 fEntityScanner.scanChar(null); 1421 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1422 reportFatalError("ElementUnterminated", 1423 new Object[]{fElementQName.rawname}); 1424 } 1425 fEmptyElement = true; 1426 return true; 1427 } else if (!isValidNameStartChar(c) || !sawSpace) { 1428 // Second chance. Check if this character is a high 1429 // surrogate of a valid name start character. 1430 if (!isValidNameStartHighSurrogate(c) || !sawSpace) { 1431 reportFatalError("ElementUnterminated", 1432 new Object[]{fElementQName.rawname}); 1433 } 1434 } 1435 1436 return false; 1437 } 1438 1439 public boolean hasAttributes(){ 1440 return fAttributes.getLength() > 0; 1441 } 1442 1443 /** return the attribute iterator implementation */ 1444 public XMLAttributesIteratorImpl getAttributeIterator(){ 1445 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1446 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1447 fAddDefaultAttr = false; 1448 } 1449 return fAttributes; 1450 } 1451 1452 /** return if standalone is set */ 1453 public boolean standaloneSet(){ 1454 return fStandaloneSet; 1455 } 1456 /** return if the doucment is standalone */ 1457 public boolean isStandAlone(){ 1458 return fStandalone ; 1459 } 1460 /** 1461 * Scans an attribute name value pair. 1462 * <p> 1463 * <pre> 1464 * [41] Attribute ::= Name Eq AttValue 1465 * </pre> 1466 * <p> 1467 * <strong>Note:</strong> This method assumes that the next 1468 * character on the stream is the first character of the attribute 1469 * name. 1470 * <p> 1471 * <strong>Note:</strong> This method uses the fAttributeQName and 1472 * fQName variables. The contents of these variables will be 1473 * destroyed. 1474 * 1475 * @param attributes The attributes list for the scanned attribute. 1476 */ 1477 1478 protected void scanAttribute(XMLAttributes attributes) 1479 throws IOException, XNIException { 1480 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1481 1482 // name 1483 if (fNamespaces) { 1484 fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME); 1485 } else { 1486 String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME); 1487 fAttributeQName.setValues(null, name, name, null); 1488 } 1489 1490 // equals 1491 fEntityScanner.skipSpaces(); 1492 if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) { 1493 reportFatalError("EqRequiredInAttribute", 1494 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1495 } 1496 fEntityScanner.skipSpaces(); 1497 1498 int attIndex = 0 ; 1499 //REVISIT: one more case needs to be included: external PE and standalone is no 1500 boolean isVC = fHasExternalDTD && !fStandalone; 1501 //fTempString would store attribute value 1502 ///fTempString2 would store attribute non-normalized value 1503 1504 //this function doesn't use 'attIndex'. We are adding the attribute later 1505 //after we have figured out that current attribute is not namespace declaration 1506 //since scanAttributeValue doesn't use attIndex parameter therefore we 1507 //can safely add the attribute later.. 1508 XMLString tmpStr = getString(); 1509 1510 scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes, 1511 attIndex, isVC, fCurrentElement.rawname, false); 1512 1513 // content 1514 int oldLen = attributes.getLength(); 1515 //if the attribute name already exists.. new value is replaced with old value 1516 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1517 1518 // WFC: Unique Att Spec 1519 //attributes count will be same if the current attribute name already exists for this element name. 1520 //this means there are two duplicate attributes. 1521 if (oldLen == attributes.getLength()) { 1522 reportFatalError("AttributeNotUnique", 1523 new Object[]{fCurrentElement.rawname, 1524 fAttributeQName.rawname}); 1525 } 1526 1527 //tmpString contains attribute value 1528 //we are passing null as the attribute value 1529 attributes.setValue(attIndex, null, tmpStr); 1530 1531 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1532 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1533 attributes.setSpecified(attIndex, true); 1534 1535 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1536 1537 } // scanAttribute(XMLAttributes) 1538 1539 /** 1540 * Scans element content. 1541 * 1542 * @return Returns the next character on the stream. 1543 */ 1544 //CHANGED: 1545 //EARLIER: scanContent() 1546 //NOW: scanContent(XMLStringBuffer) 1547 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1548 //this function appends the data to the buffer. 1549 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1550 //set the fTempString length to 0 before passing it on to scanContent 1551 //scanContent sets the correct co-ordinates as per the content read 1552 fTempString.length = 0; 1553 int c = fEntityScanner.scanContent(fTempString); 1554 content.append(fTempString); 1555 fTempString.length = 0; 1556 if (c == '\r') { 1557 // happens when there is the character reference 1558 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1559 fEntityScanner.scanChar(null); 1560 content.append((char)c); 1561 c = -1; 1562 } else if (c == ']') { 1563 //fStringBuffer.clear(); 1564 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1565 content.append((char)fEntityScanner.scanChar(null)); 1566 // remember where we are in case we get an endEntity before we 1567 // could flush the buffer out - this happens when we're parsing an 1568 // entity which ends with a ] 1569 fInScanContent = true; 1570 // 1571 // We work on a single character basis to handle cases such as: 1572 // ']]]>' which we might otherwise miss. 1573 // 1574 if (fEntityScanner.skipChar(']', null)) { 1575 content.append(']'); 1576 while (fEntityScanner.skipChar(']', null)) { 1577 content.append(']'); 1578 } 1579 if (fEntityScanner.skipChar('>', null)) { 1580 reportFatalError("CDEndInContent", null); 1581 } 1582 } 1583 fInScanContent = false; 1584 c = -1; 1585 } 1586 if (fDocumentHandler != null && content.length > 0) { 1587 //fDocumentHandler.characters(content, null); 1588 } 1589 return c; 1590 1591 } // scanContent():int 1592 1593 1594 /** 1595 * Scans a CDATA section. 1596 * <p> 1597 * <strong>Note:</strong> This method uses the fTempString and 1598 * fStringBuffer variables. 1599 * 1600 * @param complete True if the CDATA section is to be scanned 1601 * completely. 1602 * 1603 * @return True if CDATA is completely scanned. 1604 */ 1605 //CHANGED: 1606 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1607 throws IOException, XNIException { 1608 1609 // call handler 1610 if (fDocumentHandler != null) { 1611 //fDocumentHandler.startCDATA(null); 1612 } 1613 1614 while (true) { 1615 //scanData will fill the contentBuffer 1616 if (!fEntityScanner.scanData("]]>", contentBuffer, fChunkSize)) { 1617 fInCData = false; 1618 fCDataEnd = true; 1619 fMarkupDepth--; 1620 break ; 1621 } else { 1622 int c = fEntityScanner.peekChar(); 1623 if (c != -1 && isInvalidLiteral(c)) { 1624 if (XMLChar.isHighSurrogate(c)) { 1625 //contentBuffer.clear(); 1626 //scan surrogates if any.... 1627 scanSurrogates(contentBuffer); 1628 } else { 1629 reportFatalError("InvalidCharInCDSect", 1630 new Object[]{Integer.toString(c,16)}); 1631 fEntityScanner.scanChar(null); 1632 } 1633 } else { 1634 //CData partially returned due to the size limit 1635 break; 1636 } 1637 //by this time we have also read surrogate contents if any... 1638 if (fDocumentHandler != null) { 1639 //fDocumentHandler.characters(contentBuffer, null); 1640 } 1641 } 1642 } 1643 1644 return true; 1645 1646 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1647 1648 /** 1649 * Scans an end element. 1650 * <p> 1651 * <pre> 1652 * [42] ETag ::= '</' Name S? '>' 1653 * </pre> 1654 * <p> 1655 * <strong>Note:</strong> This method uses the fElementQName variable. 1656 * The contents of this variable will be destroyed. The caller should 1657 * copy the needed information out of this variable before calling 1658 * this method. 1659 * 1660 * @return The element depth. 1661 */ 1662 protected int scanEndElement() throws IOException, XNIException { 1663 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1664 1665 // pop context 1666 QName endElementName = fElementStack.popElement(); 1667 1668 String rawname = endElementName.rawname; 1669 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1670 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1671 //In scanners most of the time is consumed on checks done for XML characters, we can 1672 // optimize on it and avoid the checks done for endElement, 1673 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1674 1675 // this should work both for namespace processing true or false... 1676 1677 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1678 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1679 1680 if (!fEntityScanner.skipString(endElementName.rawname)) { 1681 reportFatalError("ETagRequired", new Object[]{rawname}); 1682 } 1683 1684 // end 1685 fEntityScanner.skipSpaces(); 1686 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1687 reportFatalError("ETagUnterminated", 1688 new Object[]{rawname}); 1689 } 1690 fMarkupDepth--; 1691 1692 //we have increased the depth for two markup "<" characters 1693 fMarkupDepth--; 1694 1695 // check that this element was opened in the same entity 1696 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1697 reportFatalError("ElementEntityMismatch", 1698 new Object[]{rawname}); 1699 } 1700 1701 //We should not be popping out the context here in endELement becaause the namespace context is still 1702 //valid when parser is at the endElement state. 1703 1704 //if (fNamespaces) { 1705 // fNamespaceContext.popContext(); 1706 //} 1707 1708 // call handler 1709 if (fDocumentHandler != null ) { 1710 //end element is scanned in this function so we can send a callback 1711 //here. 1712 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1713 1714 fDocumentHandler.endElement(endElementName, null); 1715 } 1716 if(dtdGrammarUtil != null) 1717 dtdGrammarUtil.endElement(endElementName); 1718 1719 return fMarkupDepth; 1720 1721 } // scanEndElement():int 1722 1723 /** 1724 * Scans a character reference. 1725 * <p> 1726 * <pre> 1727 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1728 * </pre> 1729 */ 1730 protected void scanCharReference() 1731 throws IOException, XNIException { 1732 1733 fStringBuffer2.clear(); 1734 int ch = scanCharReferenceValue(fStringBuffer2, null); 1735 fMarkupDepth--; 1736 if (ch != -1) { 1737 // call handler 1738 1739 if (fDocumentHandler != null) { 1740 if (fNotifyCharRefs) { 1741 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1742 } 1743 Augmentations augs = null; 1744 if (fValidation && ch <= 0x20) { 1745 if (fTempAugmentations != null) { 1746 fTempAugmentations.removeAllItems(); 1747 } 1748 else { 1749 fTempAugmentations = new AugmentationsImpl(); 1750 } 1751 augs = fTempAugmentations; 1752 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1753 } 1754 //xxx: How do we deal with this - how to return charReferenceValues 1755 //now this is being commented because this is taken care in scanDocument() 1756 //fDocumentHandler.characters(fStringBuffer2, null); 1757 if (fNotifyCharRefs) { 1758 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1759 } 1760 } 1761 } 1762 1763 } // scanCharReference() 1764 1765 1766 /** 1767 * Scans an entity reference. 1768 * 1769 * @return returns true if the new entity is started. If it was built-in entity 1770 * 'false' is returned. 1771 * @throws IOException Thrown if i/o error occurs. 1772 * @throws XNIException Thrown if handler throws exception upon 1773 * notification. 1774 */ 1775 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1776 String name = fEntityScanner.scanName(NameType.REFERENCE); 1777 if (name == null) { 1778 reportFatalError("NameRequiredInReference", null); 1779 return; 1780 } 1781 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1782 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1783 } 1784 if (fEntityStore.isUnparsedEntity(name)) { 1785 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1786 } 1787 fMarkupDepth--; 1788 fCurrentEntityName = name; 1789 1790 // handle built-in entities 1791 if (name == fAmpSymbol) { 1792 handleCharacter('&', fAmpSymbol, content); 1793 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1794 return ; 1795 } else if (name == fLtSymbol) { 1796 handleCharacter('<', fLtSymbol, content); 1797 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1798 return ; 1799 } else if (name == fGtSymbol) { 1800 handleCharacter('>', fGtSymbol, content); 1801 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1802 return ; 1803 } else if (name == fQuotSymbol) { 1804 handleCharacter('"', fQuotSymbol, content); 1805 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1806 return ; 1807 } else if (name == fAposSymbol) { 1808 handleCharacter('\'', fAposSymbol, content); 1809 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1810 return ; 1811 } 1812 1813 //1. if the entity is external and support to external entities is not required 1814 // 2. or entities should not be replaced 1815 //3. or if it is built in entity reference. 1816 boolean isEE = fEntityStore.isExternalEntity(name); 1817 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1818 fScannerState = SCANNER_STATE_REFERENCE; 1819 return ; 1820 } 1821 // start general entity 1822 if (!fEntityStore.isDeclaredEntity(name)) { 1823 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1824 if (!fSupportDTD && fReplaceEntityReferences) { 1825 reportFatalError("EntityNotDeclared", new Object[]{name}); 1826 return; 1827 } 1828 //REVISIT: one more case needs to be included: external PE and standalone is no 1829 if ( fHasExternalDTD && !fStandalone) { 1830 if (fValidation) 1831 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1832 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1833 } else 1834 reportFatalError("EntityNotDeclared", new Object[]{name}); 1835 } 1836 //we are starting the entity even if the entity was not declared 1837 //if that was the case it its taken care in XMLEntityManager.startEntity() 1838 //we immediately call the endEntity. Application gets to know if there was 1839 //any entity that was not declared. 1840 fEntityManager.startEntity(true, name, false); 1841 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1842 //setScannerState(SCANNER_STATE_CONTENT); 1843 //return true ; 1844 } // scanEntityReference() 1845 1846 // utility methods 1847 1848 /** 1849 * Check if the depth exceeds the maxElementDepth limit 1850 * @param elementName name of the current element 1851 */ 1852 void checkDepth(String elementName) { 1853 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1854 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1855 fSecurityManager.debugPrint(fLimitAnalyzer); 1856 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1857 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1858 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1859 "maxElementDepth"}); 1860 } 1861 } 1862 1863 /** 1864 * Calls document handler with a single character resulting from 1865 * built-in entity resolution. 1866 * 1867 * @param c 1868 * @param entity built-in name 1869 * @param XMLStringBuffer append the character to buffer 1870 * 1871 * we really dont need to call this function -- this function is only required when 1872 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1873 * calling this function to hanlde built-in entity reference. 1874 * 1875 */ 1876 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1877 foundBuiltInRefs = true; 1878 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1879 content.append(c); 1880 if (fDocumentHandler != null) { 1881 fSingleChar[0] = c; 1882 if (fNotifyBuiltInRefs) { 1883 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1884 } 1885 fTempString.setValues(fSingleChar, 0, 1); 1886 if(!fIsCoalesce){ 1887 fDocumentHandler.characters(fTempString, null); 1888 builtInRefCharacterHandled = true; 1889 } 1890 1891 if (fNotifyBuiltInRefs) { 1892 fDocumentHandler.endGeneralEntity(entity, null); 1893 } 1894 } 1895 } // handleCharacter(char) 1896 1897 // helper methods 1898 1899 /** 1900 * Sets the scanner state. 1901 * 1902 * @param state The new scanner state. 1903 */ 1904 protected final void setScannerState(int state) { 1905 1906 fScannerState = state; 1907 if (DEBUG_SCANNER_STATE) { 1908 System.out.print("### setScannerState: "); 1909 //System.out.print(fScannerState); 1910 System.out.print(getScannerStateName(state)); 1911 System.out.println(); 1912 } 1913 1914 } // setScannerState(int) 1915 1916 1917 /** 1918 * Sets the Driver. 1919 * 1920 * @param Driver The new Driver. 1921 */ 1922 protected final void setDriver(Driver driver) { 1923 fDriver = driver; 1924 if (DEBUG_DISPATCHER) { 1925 System.out.print("%%% setDriver: "); 1926 System.out.print(getDriverName(driver)); 1927 System.out.println(); 1928 } 1929 } 1930 1931 // 1932 // Private methods 1933 // 1934 1935 /** Returns the scanner state name. */ 1936 protected String getScannerStateName(int state) { 1937 1938 switch (state) { 1939 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1940 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1941 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1942 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1943 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1944 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1945 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1946 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1947 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1948 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1949 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1950 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1951 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 1952 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 1953 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 1954 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 1955 } 1956 1957 return "??? ("+state+')'; 1958 1959 } // getScannerStateName(int):String 1960 public String getEntityName(){ 1961 //return the cached name 1962 return fCurrentEntityName; 1963 } 1964 1965 /** Returns the driver name. */ 1966 public String getDriverName(Driver driver) { 1967 1968 if (DEBUG_DISPATCHER) { 1969 if (driver != null) { 1970 String name = driver.getClass().getName(); 1971 int index = name.lastIndexOf('.'); 1972 if (index != -1) { 1973 name = name.substring(index + 1); 1974 index = name.lastIndexOf('$'); 1975 if (index != -1) { 1976 name = name.substring(index + 1); 1977 } 1978 } 1979 return name; 1980 } 1981 } 1982 return "null"; 1983 1984 } // getDriverName():String 1985 1986 /** 1987 * Check the protocol used in the systemId against allowed protocols 1988 * 1989 * @param systemId the Id of the URI 1990 * @param allowedProtocols a list of allowed protocols separated by comma 1991 * @return the name of the protocol if rejected, null otherwise 1992 */ 1993 String checkAccess(String systemId, String allowedProtocols) throws IOException { 1994 String baseSystemId = fEntityScanner.getBaseSystemId(); 1995 String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI); 1996 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 1997 } 1998 1999 // 2000 // Classes 2001 // 2002 2003 /** 2004 * @author Neeraj Bajaj, Sun Microsystems. 2005 */ 2006 protected static final class Element { 2007 2008 // 2009 // Data 2010 // 2011 2012 /** Symbol. */ 2013 public QName qname; 2014 2015 //raw name stored as characters 2016 public char[] fRawname; 2017 2018 /** The next Element entry. */ 2019 public Element next; 2020 2021 // 2022 // Constructors 2023 // 2024 2025 /** 2026 * Constructs a new Element from the given QName and next Element 2027 * reference. 2028 */ 2029 public Element(QName qname, Element next) { 2030 this.qname.setValues(qname); 2031 this.fRawname = qname.rawname.toCharArray(); 2032 this.next = next; 2033 } 2034 2035 } // class Element 2036 2037 /** 2038 * Element stack. 2039 * 2040 * @author Neeraj Bajaj, Sun Microsystems. 2041 */ 2042 protected class ElementStack2 { 2043 2044 // 2045 // Data 2046 // 2047 2048 /** The stack data. */ 2049 protected QName [] fQName = new QName[20]; 2050 2051 //Element depth 2052 protected int fDepth; 2053 //total number of elements 2054 protected int fCount; 2055 //current position 2056 protected int fPosition; 2057 //Mark refers to the position 2058 protected int fMark; 2059 2060 protected int fLastDepth ; 2061 2062 // 2063 // Constructors 2064 // 2065 2066 /** Default constructor. */ 2067 public ElementStack2() { 2068 for (int i = 0; i < fQName.length; i++) { 2069 fQName[i] = new QName(); 2070 } 2071 fMark = fPosition = 1; 2072 } // <init>() 2073 2074 public void resize(){ 2075 /** 2076 * int length = fElements.length; 2077 * Element [] temp = new Element[length * 2]; 2078 * System.arraycopy(fElements, 0, temp, 0, length); 2079 * fElements = temp; 2080 */ 2081 //resize QNames 2082 int oldLength = fQName.length; 2083 QName [] tmp = new QName[oldLength * 2]; 2084 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2085 fQName = tmp; 2086 2087 for (int i = oldLength; i < fQName.length; i++) { 2088 fQName[i] = new QName(); 2089 } 2090 2091 } 2092 2093 2094 // 2095 // Public methods 2096 // 2097 2098 /** Check if the element scanned during the start element 2099 *matches the stored element. 2100 * 2101 *@return true if the match suceeds. 2102 */ 2103 public boolean matchElement(QName element) { 2104 //last depth is the depth when last elemnt was pushed 2105 //if last depth is greater than current depth 2106 if(DEBUG_SKIP_ALGORITHM){ 2107 System.out.println("fLastDepth = " + fLastDepth); 2108 System.out.println("fDepth = " + fDepth); 2109 } 2110 boolean match = false; 2111 if(fLastDepth > fDepth && fDepth <= 2){ 2112 if(DEBUG_SKIP_ALGORITHM){ 2113 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2114 } 2115 if(element.rawname == fQName[fDepth].rawname){ 2116 fAdd = false; 2117 //mark this position 2118 //decrease the depth by 1 as arrays are 0 based 2119 fMark = fDepth - 1; 2120 //we found the match and from next element skipping will start, add 1 2121 fPosition = fMark + 1 ; 2122 match = true; 2123 //Once we get match decrease the count -- this was increased by nextElement() 2124 --fCount; 2125 if(DEBUG_SKIP_ALGORITHM){ 2126 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2127 System.out.println("fMark = " + fMark); 2128 System.out.println("fPosition = " + fPosition); 2129 System.out.println("fDepth = " + fDepth); 2130 System.out.println("fCount = " + fCount); 2131 } 2132 }else{ 2133 fAdd = true; 2134 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2135 } 2136 } 2137 //store the last depth 2138 fLastDepth = fDepth++; 2139 return match; 2140 } // pushElement(QName):QName 2141 2142 /** 2143 * This function doesn't increase depth. The function in this function is 2144 *broken down into two functions for efficiency. <@see>matchElement</see>. 2145 * This function just returns the pointer to the object and its values are set. 2146 * 2147 *@return QName reference to the next element in the list 2148 */ 2149 public QName nextElement() { 2150 2151 //if number of elements becomes equal to the length of array -- stop the skipping 2152 if (fCount == fQName.length) { 2153 fShouldSkip = false; 2154 fAdd = false; 2155 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2156 //xxx: this is not correct, we are returning the last element 2157 //this wont make any difference since flag has been set to 'false' 2158 return fQName[--fCount]; 2159 } 2160 if(DEBUG_SKIP_ALGORITHM){ 2161 System.out.println("fCount = " + fCount); 2162 } 2163 return fQName[fCount++]; 2164 2165 } 2166 2167 /** Note that this function is considerably different than nextElement() 2168 * This function just returns the previously stored elements 2169 */ 2170 public QName getNext(){ 2171 //when position reaches number of elements in the list.. 2172 //set the position back to mark, making it a circular linked list. 2173 if(fPosition == fCount){ 2174 fPosition = fMark; 2175 } 2176 return fQName[fPosition++]; 2177 } 2178 2179 /** returns the current depth 2180 */ 2181 public int popElement(){ 2182 return fDepth--; 2183 } 2184 2185 2186 /** Clears the stack without throwing away existing QName objects. */ 2187 public void clear() { 2188 fLastDepth = 0; 2189 fDepth = 0; 2190 fCount = 0 ; 2191 fPosition = fMark = 1; 2192 } // clear() 2193 2194 } // class ElementStack 2195 2196 /** 2197 * Element stack. This stack operates without synchronization, error 2198 * checking, and it re-uses objects instead of throwing popped items 2199 * away. 2200 * 2201 * @author Andy Clark, IBM 2202 */ 2203 protected class ElementStack { 2204 2205 // 2206 // Data 2207 // 2208 2209 /** The stack data. */ 2210 protected QName[] fElements; 2211 protected int [] fInt = new int[20]; 2212 2213 2214 //Element depth 2215 protected int fDepth; 2216 //total number of elements 2217 protected int fCount; 2218 //current position 2219 protected int fPosition; 2220 //Mark refers to the position 2221 protected int fMark; 2222 2223 protected int fLastDepth ; 2224 2225 // 2226 // Constructors 2227 // 2228 2229 /** Default constructor. */ 2230 public ElementStack() { 2231 fElements = new QName[20]; 2232 for (int i = 0; i < fElements.length; i++) { 2233 fElements[i] = new QName(); 2234 } 2235 } // <init>() 2236 2237 // 2238 // Public methods 2239 // 2240 2241 /** 2242 * Pushes an element on the stack. 2243 * <p> 2244 * <strong>Note:</strong> The QName values are copied into the 2245 * stack. In other words, the caller does <em>not</em> orphan 2246 * the element to the stack. Also, the QName object returned 2247 * is <em>not</em> orphaned to the caller. It should be 2248 * considered read-only. 2249 * 2250 * @param element The element to push onto the stack. 2251 * 2252 * @return Returns the actual QName object that stores the 2253 */ 2254 //XXX: THIS FUNCTION IS NOT USED 2255 public QName pushElement(QName element) { 2256 if (fDepth == fElements.length) { 2257 QName[] array = new QName[fElements.length * 2]; 2258 System.arraycopy(fElements, 0, array, 0, fDepth); 2259 fElements = array; 2260 for (int i = fDepth; i < fElements.length; i++) { 2261 fElements[i] = new QName(); 2262 } 2263 } 2264 fElements[fDepth].setValues(element); 2265 return fElements[fDepth++]; 2266 } // pushElement(QName):QName 2267 2268 2269 /** Note that this function is considerably different than nextElement() 2270 * This function just returns the previously stored elements 2271 */ 2272 public QName getNext(){ 2273 //when position reaches number of elements in the list.. 2274 //set the position back to mark, making it a circular linked list. 2275 if(fPosition == fCount){ 2276 fPosition = fMark; 2277 } 2278 //store the position of last opened tag at particular depth 2279 //fInt[++fDepth] = fPosition; 2280 if(DEBUG_SKIP_ALGORITHM){ 2281 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2282 } 2283 //return fElements[fPosition++]; 2284 return fElements[fPosition]; 2285 } 2286 2287 /** This function should be called only when element was skipped sucessfully. 2288 * 1. Increase the depth - because element was sucessfully skipped. 2289 *2. Store the position of the element token in array "last opened tag" at depth. 2290 *3. increase the position counter so as to point to the next element in the array 2291 */ 2292 public void push(){ 2293 2294 fInt[++fDepth] = fPosition++; 2295 } 2296 2297 /** Check if the element scanned during the start element 2298 *matches the stored element. 2299 * 2300 *@return true if the match suceeds. 2301 */ 2302 public boolean matchElement(QName element) { 2303 //last depth is the depth when last elemnt was pushed 2304 //if last depth is greater than current depth 2305 //if(DEBUG_SKIP_ALGORITHM){ 2306 // System.out.println("Check if the element " + element.rawname + " matches"); 2307 // System.out.println("fLastDepth = " + fLastDepth); 2308 // System.out.println("fDepth = " + fDepth); 2309 //} 2310 boolean match = false; 2311 if(fLastDepth > fDepth && fDepth <= 3){ 2312 if(DEBUG_SKIP_ALGORITHM){ 2313 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2314 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2315 } 2316 if(element.rawname == fElements[fDepth - 1].rawname){ 2317 fAdd = false; 2318 //mark this position 2319 //decrease the depth by 1 as arrays are 0 based 2320 fMark = fDepth - 1; 2321 //we found the match 2322 fPosition = fMark; 2323 match = true; 2324 //Once we get match decrease the count -- this was increased by nextElement() 2325 --fCount; 2326 if(DEBUG_SKIP_ALGORITHM){ 2327 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2328 System.out.println("fMark = " + fMark); 2329 System.out.println("fPosition = " + fPosition); 2330 System.out.println("fDepth = " + fDepth); 2331 System.out.println("fCount = " + fCount); 2332 System.out.println("---------MATCH SUCEEDED-----------------"); 2333 System.out.println(""); 2334 } 2335 }else{ 2336 fAdd = true; 2337 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2338 } 2339 } 2340 //store the position for the current depth 2341 //when we are adding the elements, when skipping 2342 //starts even then this should be tracked ie. when 2343 //calling getNext() 2344 if(match){ 2345 //from next element skipping will start, add 1 2346 fInt[fDepth] = fPosition++; 2347 } else{ 2348 if(DEBUG_SKIP_ALGORITHM){ 2349 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2350 } 2351 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2352 fInt[fDepth] = fCount - 1; 2353 } 2354 2355 //if number of elements becomes equal to the length of array -- stop the skipping 2356 //xxx: should we do "fCount == fInt.length" 2357 if (fCount == fElements.length) { 2358 fSkip = false; 2359 fAdd = false; 2360 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2361 reposition(); 2362 if(DEBUG_SKIP_ALGORITHM){ 2363 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2364 System.out.println("REPOSITIONING THE STACK"); 2365 System.out.println("-----------SKIPPING STOPPED----------"); 2366 System.out.println(""); 2367 } 2368 return false; 2369 } 2370 if(DEBUG_SKIP_ALGORITHM){ 2371 if(match){ 2372 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2373 }else{ 2374 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2375 } 2376 } 2377 //store the last depth 2378 fLastDepth = fDepth; 2379 return match; 2380 } // matchElement(QName):QName 2381 2382 2383 /** 2384 * Returns the next element on the stack. 2385 * 2386 * @return Returns the actual QName object. Callee should 2387 * use this object to store the details of next element encountered. 2388 */ 2389 public QName nextElement() { 2390 if(fSkip){ 2391 fDepth++; 2392 //boundary checks are done in matchElement() 2393 return fElements[fCount++]; 2394 } else if (fDepth == fElements.length) { 2395 QName[] array = new QName[fElements.length * 2]; 2396 System.arraycopy(fElements, 0, array, 0, fDepth); 2397 fElements = array; 2398 for (int i = fDepth; i < fElements.length; i++) { 2399 fElements[i] = new QName(); 2400 } 2401 } 2402 2403 return fElements[fDepth++]; 2404 2405 } // pushElement(QName):QName 2406 2407 2408 /** 2409 * Pops an element off of the stack by setting the values of 2410 * the specified QName. 2411 * <p> 2412 * <strong>Note:</strong> The object returned is <em>not</em> 2413 * orphaned to the caller. Therefore, the caller should consider 2414 * the object to be read-only. 2415 */ 2416 public QName popElement() { 2417 //return the same object that was pushed -- this would avoid 2418 //setting the values for every end element. 2419 //STRONG: this object is read only -- this object reference shouldn't be stored. 2420 if(fSkip || fAdd ){ 2421 if(DEBUG_SKIP_ALGORITHM){ 2422 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2423 System.out.println(""); 2424 } 2425 return fElements[fInt[fDepth--]]; 2426 } else{ 2427 if(DEBUG_SKIP_ALGORITHM){ 2428 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2429 } 2430 return fElements[--fDepth] ; 2431 } 2432 //element.setValues(fElements[--fDepth]); 2433 } // popElement(QName) 2434 2435 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2436 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2437 *as per the depth. 2438 */ 2439 public void reposition(){ 2440 for( int i = 2 ; i <= fDepth ; i++){ 2441 fElements[i-1] = fElements[fInt[i]]; 2442 } 2443 if(DEBUG_SKIP_ALGORITHM){ 2444 for( int i = 0 ; i < fDepth ; i++){ 2445 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2446 } 2447 } 2448 } 2449 2450 /** Clears the stack without throwing away existing QName objects. */ 2451 public void clear() { 2452 fDepth = 0; 2453 fLastDepth = 0; 2454 fCount = 0 ; 2455 fPosition = fMark = 1; 2456 2457 } // clear() 2458 2459 /** 2460 * This function is as a result of optimization done for endElement -- 2461 * we dont need to set the value for every end element encouterd. 2462 * For Well formedness checks we can have the same QName object that was pushed. 2463 * the values will be set only if application need to know about the endElement 2464 * -- neeraj.bajaj@sun.com 2465 */ 2466 2467 public QName getLastPoppedElement(){ 2468 return fElements[fDepth]; 2469 } 2470 } // class ElementStack 2471 2472 /** 2473 * Drives the parser to the next state/event on the input. Parser is guaranteed 2474 * to stop at the next state/event. 2475 * 2476 * Internally XML document is divided into several states. Each state represents 2477 * a sections of XML document. When this functions returns normally, it has read 2478 * the section of XML document and returns the state corresponding to section of 2479 * document which has been read. For optimizations, a particular driver 2480 * can read ahead of the section of document (state returned) just read and 2481 * can maintain a different internal state. 2482 * 2483 * 2484 * @author Neeraj Bajaj, Sun Microsystems 2485 */ 2486 protected interface Driver { 2487 2488 2489 /** 2490 * Drives the parser to the next state/event on the input. Parser is guaranteed 2491 * to stop at the next state/event. 2492 * 2493 * Internally XML document is divided into several states. Each state represents 2494 * a sections of XML document. When this functions returns normally, it has read 2495 * the section of XML document and returns the state corresponding to section of 2496 * document which has been read. For optimizations, a particular driver 2497 * can read ahead of the section of document (state returned) just read and 2498 * can maintain a different internal state. 2499 * 2500 * @return state representing the section of document just read. 2501 * 2502 * @throws IOException Thrown on i/o error. 2503 * @throws XNIException Thrown on parse error. 2504 */ 2505 2506 public int next() throws IOException, XNIException; 2507 2508 } // interface Driver 2509 2510 /** 2511 * Driver to handle content scanning. This driver is capable of reading 2512 * the fragment of XML document. When it has finished reading fragment 2513 * of XML documents, it can pass the job of reading to another driver. 2514 * 2515 * This class has been modified as per the new design which is more suited to 2516 * efficiently build pull parser. Lot of performance improvements have been done and 2517 * the code has been added to support stax functionality/features. 2518 * 2519 * @author Neeraj Bajaj, Sun Microsystems 2520 * 2521 * 2522 * @author Andy Clark, IBM 2523 * @author Eric Ye, IBM 2524 */ 2525 protected class FragmentContentDriver 2526 implements Driver { 2527 2528 // 2529 // Driver methods 2530 // 2531 2532 /** 2533 * decides the appropriate state of the parser 2534 */ 2535 private void startOfMarkup() throws IOException { 2536 fMarkupDepth++; 2537 final int ch = fEntityScanner.peekChar(); 2538 if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { 2539 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2540 } else { 2541 switch(ch){ 2542 case '?' :{ 2543 setScannerState(SCANNER_STATE_PI); 2544 fEntityScanner.skipChar(ch, null); 2545 break; 2546 } 2547 case '!' :{ 2548 fEntityScanner.skipChar(ch, null); 2549 if (fEntityScanner.skipChar('-', null)) { 2550 if (!fEntityScanner.skipChar('-', NameType.COMMENT)) { 2551 reportFatalError("InvalidCommentStart", 2552 null); 2553 } 2554 setScannerState(SCANNER_STATE_COMMENT); 2555 } else if (fEntityScanner.skipString(CDATA)) { 2556 fCDataStart = true; 2557 setScannerState(SCANNER_STATE_CDATA ); 2558 } else if (!scanForDoctypeHook()) { 2559 reportFatalError("MarkupNotRecognizedInContent", 2560 null); 2561 } 2562 break; 2563 } 2564 case '/' :{ 2565 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2566 fEntityScanner.skipChar(ch, NameType.ELEMENTEND); 2567 break; 2568 } 2569 default :{ 2570 reportFatalError("MarkupNotRecognizedInContent", null); 2571 } 2572 } 2573 } 2574 2575 }//startOfMarkup 2576 2577 private void startOfContent() throws IOException { 2578 if (fEntityScanner.skipChar('<', null)) { 2579 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2580 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 2581 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2582 } else { 2583 //element content is there.. 2584 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2585 } 2586 }//startOfContent 2587 2588 2589 /** 2590 * 2591 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2592 * At any point of time when in doubt over the current state of the parser, the state should be 2593 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2594 * the parser to one of its sub state. 2595 * sub states are defined in the parser on the basis of different XML component like 2596 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2597 * These sub states help the parser to have fine control over the parsing. These are the 2598 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2599 * decided if paresr needs to stop at next milepost ?? 2600 * 2601 */ 2602 public void decideSubState() throws IOException { 2603 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2604 2605 switch (fScannerState) { 2606 2607 case SCANNER_STATE_CONTENT: { 2608 startOfContent() ; 2609 break; 2610 } 2611 2612 case SCANNER_STATE_START_OF_MARKUP: { 2613 startOfMarkup() ; 2614 break; 2615 } 2616 } 2617 } 2618 }//decideSubState 2619 2620 /** 2621 * Drives the parser to the next state/event on the input. Parser is guaranteed 2622 * to stop at the next state/event. Internally XML document 2623 * is divided into several states. Each state represents a sections of XML 2624 * document. When this functions returns normally, it has read the section 2625 * of XML document and returns the state corresponding to section of 2626 * document which has been read. For optimizations, a particular driver 2627 * can read ahead of the section of document (state returned) just read and 2628 * can maintain a different internal state. 2629 * 2630 * State returned corresponds to Stax states. 2631 * 2632 * @return state representing the section of document just read. 2633 * 2634 * @throws IOException Thrown on i/o error. 2635 * @throws XNIException Thrown on parse error. 2636 */ 2637 2638 public int next() throws IOException, XNIException { 2639 while (true) { 2640 try { 2641 2642 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2643 //decideSubState. 2644 2645 if (fScannerState == SCANNER_STATE_CONTENT) { 2646 final int ch = fEntityScanner.peekChar(); 2647 if (ch == '<') { 2648 fEntityScanner.scanChar(null); 2649 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2650 } else if (ch == '&') { 2651 fEntityScanner.scanChar(NameType.REFERENCE); 2652 setScannerState(SCANNER_STATE_REFERENCE) ; 2653 } else { 2654 //element content is there.. 2655 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2656 } 2657 } 2658 2659 if (fScannerState == SCANNER_STATE_START_OF_MARKUP) { 2660 startOfMarkup(); 2661 } 2662 2663 //decideSubState() ; 2664 2665 //do some special handling if isCoalesce is set to true. 2666 if (fIsCoalesce) { 2667 fUsebuffer = true ; 2668 //if the last section was character data 2669 if (fLastSectionWasCharacterData) { 2670 2671 //if we dont encounter any CDATA or ENTITY REFERENCE and 2672 //current state is also not SCANNER_STATE_CHARACTER_DATA 2673 //return the last scanned charactrer data. 2674 if ((fScannerState != SCANNER_STATE_CDATA) 2675 && (fScannerState != SCANNER_STATE_REFERENCE) 2676 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)) { 2677 fLastSectionWasCharacterData = false; 2678 return XMLEvent.CHARACTERS; 2679 } 2680 }//if last section was CDATA or ENTITY REFERENCE 2681 //xxx: there might be another entity reference or CDATA after this 2682 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2683 else if ((fLastSectionWasCData || fLastSectionWasEntityReference)) { 2684 //and current state is not SCANNER_STATE_CHARACTER_DATA 2685 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2686 //this means there is nothing more to be coalesced. 2687 //return the CHARACTERS event. 2688 if ((fScannerState != SCANNER_STATE_CDATA) 2689 && (fScannerState != SCANNER_STATE_REFERENCE) 2690 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2691 2692 fLastSectionWasCData = false; 2693 fLastSectionWasEntityReference = false; 2694 return XMLEvent.CHARACTERS; 2695 } 2696 } 2697 } 2698 2699 switch(fScannerState){ 2700 2701 case XMLEvent.START_DOCUMENT : 2702 return XMLEvent.START_DOCUMENT; 2703 2704 case SCANNER_STATE_START_ELEMENT_TAG :{ 2705 2706 //returns true if the element is empty 2707 fEmptyElement = scanStartElement() ; 2708 //if the element is empty the next event is "end element" 2709 if(fEmptyElement){ 2710 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2711 }else{ 2712 //set the next possible state 2713 setScannerState(SCANNER_STATE_CONTENT); 2714 } 2715 return XMLEvent.START_ELEMENT ; 2716 } 2717 2718 case SCANNER_STATE_CHARACTER_DATA: { 2719 2720 //if last section was either entity reference or cdata or 2721 //character data we should be using buffer 2722 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData 2723 || fLastSectionWasCharacterData ; 2724 2725 //When coalesce is set to true and last state was REFERENCE or 2726 //CDATA or CHARACTER_DATA, buffer should not be cleared. 2727 if( fIsCoalesce && (fLastSectionWasEntityReference || 2728 fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2729 fLastSectionWasEntityReference = false; 2730 fLastSectionWasCData = false; 2731 fLastSectionWasCharacterData = true ; 2732 fUsebuffer = true; 2733 }else{ 2734 //clear the buffer 2735 fContentBuffer.clear(); 2736 } 2737 2738 //set the fTempString length to 0 before passing it on to scanContent 2739 //scanContent sets the correct co-ordinates as per the content read 2740 fTempString.length = 0; 2741 int c = fEntityScanner.scanContent(fTempString); 2742 2743 if(fEntityScanner.skipChar('<', null)){ 2744 //check if we have reached end of element 2745 if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){ 2746 //increase the mark up depth 2747 fMarkupDepth++; 2748 fLastSectionWasCharacterData = false; 2749 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2750 //check if its start of new element 2751 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2752 fMarkupDepth++; 2753 fLastSectionWasCharacterData = false; 2754 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2755 }else{ 2756 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2757 //there can be cdata ahead if coalesce is true we should call again 2758 if(fIsCoalesce){ 2759 fLastSectionWasCharacterData = true; 2760 bufferContent(); 2761 continue; 2762 } 2763 } 2764 //in case last section was either entity reference or 2765 //cdata or character data -- we should be using buffer 2766 if(fUsebuffer){ 2767 bufferContent(); 2768 } 2769 2770 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2771 if(DEBUG)System.out.println("Return SPACE EVENT"); 2772 return XMLEvent.SPACE; 2773 }else 2774 return XMLEvent.CHARACTERS; 2775 2776 } else{ 2777 bufferContent(); 2778 } 2779 if (c == '\r') { 2780 if(DEBUG){ 2781 System.out.println("'\r' character found"); 2782 } 2783 // happens when there is the character reference 2784 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2785 fEntityScanner.scanChar(null); 2786 fUsebuffer = true; 2787 fContentBuffer.append((char)c); 2788 c = -1 ; 2789 } else if (c == ']') { 2790 //fStringBuffer.clear(); 2791 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2792 fUsebuffer = true; 2793 fContentBuffer.append((char)fEntityScanner.scanChar(null)); 2794 // remember where we are in case we get an endEntity before we 2795 // could flush the buffer out - this happens when we're parsing an 2796 // entity which ends with a ] 2797 fInScanContent = true; 2798 2799 // We work on a single character basis to handle cases such as: 2800 // ']]]>' which we might otherwise miss. 2801 // 2802 if (fEntityScanner.skipChar(']', null)) { 2803 fContentBuffer.append(']'); 2804 while (fEntityScanner.skipChar(']', null)) { 2805 fContentBuffer.append(']'); 2806 } 2807 if (fEntityScanner.skipChar('>', null)) { 2808 reportFatalError("CDEndInContent", null); 2809 } 2810 } 2811 c = -1 ; 2812 fInScanContent = false; 2813 } 2814 2815 do{ 2816 //xxx: we should be using only one buffer.. 2817 // we need not to grow the buffer only when isCoalesce() is not true; 2818 2819 if (c == '<') { 2820 fEntityScanner.scanChar(null); 2821 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2822 break; 2823 }//xxx what should be the behavior if entity reference is present in the content ? 2824 else if (c == '&') { 2825 fEntityScanner.scanChar(NameType.REFERENCE); 2826 setScannerState(SCANNER_STATE_REFERENCE); 2827 break; 2828 }///xxx since this part is also characters, it should be merged... 2829 else if (c != -1 && isInvalidLiteral(c)) { 2830 if (XMLChar.isHighSurrogate(c)) { 2831 // special case: surrogates 2832 scanSurrogates(fContentBuffer) ; 2833 setScannerState(SCANNER_STATE_CONTENT); 2834 } else { 2835 reportFatalError("InvalidCharInContent", 2836 new Object[] { 2837 Integer.toString(c, 16)}); 2838 fEntityScanner.scanChar(null); 2839 } 2840 break; 2841 } 2842 //xxx: scanContent also gives character callback. 2843 c = scanContent(fContentBuffer) ; 2844 //we should not be iterating again if fIsCoalesce is not set to true 2845 2846 if(!fIsCoalesce){ 2847 setScannerState(SCANNER_STATE_CONTENT); 2848 break; 2849 } 2850 2851 }while(true); 2852 2853 //if (fDocumentHandler != null) { 2854 // fDocumentHandler.characters(fContentBuffer, null); 2855 //} 2856 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2857 //if fIsCoalesce is true there might be more data so call fDriver.next() 2858 if(fIsCoalesce){ 2859 fLastSectionWasCharacterData = true ; 2860 continue; 2861 }else{ 2862 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2863 if(DEBUG)System.out.println("Return SPACE EVENT"); 2864 return XMLEvent.SPACE; 2865 } else 2866 return XMLEvent.CHARACTERS ; 2867 } 2868 } 2869 2870 case SCANNER_STATE_END_ELEMENT_TAG :{ 2871 if(fEmptyElement){ 2872 //set it back to false. 2873 fEmptyElement = false; 2874 setScannerState(SCANNER_STATE_CONTENT); 2875 //check the case when there is comment after single element document 2876 //<foo/> and some comment after this 2877 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? 2878 XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2879 2880 } else if(scanEndElement() == 0) { 2881 //It is last element of the document 2882 if (elementDepthIsZeroHook()) { 2883 //if element depth is zero , it indicates the end of the document 2884 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2885 //xxx understand this point once again.. 2886 return XMLEvent.END_ELEMENT ; 2887 } 2888 2889 } 2890 setScannerState(SCANNER_STATE_CONTENT); 2891 return XMLEvent.END_ELEMENT ; 2892 } 2893 2894 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2895 scanComment(); 2896 setScannerState(SCANNER_STATE_CONTENT); 2897 return XMLEvent.COMMENT; 2898 //break; 2899 } 2900 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2901 //clear the buffer first 2902 fContentBuffer.clear() ; 2903 //xxx: which buffer should be passed. Ideally we shouldn't have 2904 //more than two buffers -- 2905 //xxx: where should we add the switch for buffering. 2906 scanPI(fContentBuffer); 2907 setScannerState(SCANNER_STATE_CONTENT); 2908 return XMLEvent.PROCESSING_INSTRUCTION; 2909 //break; 2910 } 2911 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2912 //xxx: What if CDATA is the first event 2913 //<foo><![CDATA[hello<><>]]>append</foo> 2914 2915 //we should not clear the buffer only when the last state was 2916 //either SCANNER_STATE_REFERENCE or 2917 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2918 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2919 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2920 fLastSectionWasCData = true ; 2921 fLastSectionWasEntityReference = false; 2922 fLastSectionWasCharacterData = false; 2923 }//if we dont need to coalesce clear the buffer 2924 else{ 2925 fContentBuffer.clear(); 2926 } 2927 fUsebuffer = true; 2928 //CDATA section is read up to the chunk size limit 2929 scanCDATASection(fContentBuffer , true); 2930 setScannerState(SCANNER_STATE_CONTENT); 2931 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 2932 //and just call fDispatche.next(). Since we have set the scanner state to 2933 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 2934 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 2935 //2. Check if application has set for reporting CDATA event 2936 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 2937 //return the cdata event as characters. 2938 if (fIsCoalesce) { 2939 fLastSectionWasCData = true ; 2940 //there might be more data to coalesce. 2941 continue; 2942 } else if(fReportCdataEvent) { 2943 if (!fCDataEnd) { 2944 setScannerState(SCANNER_STATE_CDATA); 2945 } 2946 return XMLEvent.CDATA; 2947 } else { 2948 return XMLEvent.CHARACTERS; 2949 } 2950 } 2951 2952 case SCANNER_STATE_REFERENCE :{ 2953 fMarkupDepth++; 2954 foundBuiltInRefs = false; 2955 2956 //we should not clear the buffer only when the last state was 2957 //either CDATA or 2958 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2959 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2960 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2961 //fLastSectionWasEntityReference or fLastSectionWasCData are only 2962 //used when fIsCoalesce is set to true. 2963 fLastSectionWasEntityReference = true ; 2964 fLastSectionWasCData = false; 2965 fLastSectionWasCharacterData = false; 2966 }//if we dont need to coalesce clear the buffer 2967 else{ 2968 fContentBuffer.clear(); 2969 } 2970 fUsebuffer = true ; 2971 //take care of character reference 2972 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 2973 scanCharReferenceValue(fContentBuffer, null); 2974 fMarkupDepth--; 2975 if(!fIsCoalesce){ 2976 setScannerState(SCANNER_STATE_CONTENT); 2977 return XMLEvent.CHARACTERS; 2978 } 2979 } else { 2980 // this function also starts new entity 2981 scanEntityReference(fContentBuffer); 2982 //if there was built-in entity reference & coalesce is not true 2983 //return CHARACTERS 2984 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 2985 setScannerState(SCANNER_STATE_CONTENT); 2986 if (builtInRefCharacterHandled) { 2987 builtInRefCharacterHandled = false; 2988 return XMLEvent.ENTITY_REFERENCE; 2989 } else { 2990 return XMLEvent.CHARACTERS; 2991 } 2992 } 2993 2994 //if there was a text declaration, call next() it will be taken care. 2995 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 2996 fLastSectionWasEntityReference = true ; 2997 continue; 2998 } 2999 3000 if(fScannerState == SCANNER_STATE_REFERENCE){ 3001 setScannerState(SCANNER_STATE_CONTENT); 3002 if (fReplaceEntityReferences && 3003 fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3004 // Skip the entity reference, we don't care 3005 continue; 3006 } 3007 return XMLEvent.ENTITY_REFERENCE; 3008 } 3009 } 3010 //Wether it was character reference, entity reference or built-in entity 3011 //set the next possible state to SCANNER_STATE_CONTENT 3012 setScannerState(SCANNER_STATE_CONTENT); 3013 fLastSectionWasEntityReference = true ; 3014 continue; 3015 } 3016 3017 case SCANNER_STATE_TEXT_DECL: { 3018 // scan text decl 3019 if (fEntityScanner.skipString("<?xml")) { 3020 fMarkupDepth++; 3021 // NOTE: special case where entity starts with a PI 3022 // whose name starts with "xml" (e.g. "xmlfoo") 3023 if (isValidNameChar(fEntityScanner.peekChar())) { 3024 fStringBuffer.clear(); 3025 fStringBuffer.append("xml"); 3026 3027 if (fNamespaces) { 3028 while (isValidNCName(fEntityScanner.peekChar())) { 3029 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3030 } 3031 } else { 3032 while (isValidNameChar(fEntityScanner.peekChar())) { 3033 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3034 } 3035 } 3036 String target = fSymbolTable.addSymbol(fStringBuffer.ch, 3037 fStringBuffer.offset, fStringBuffer.length); 3038 fContentBuffer.clear(); 3039 scanPIData(target, fContentBuffer); 3040 } 3041 3042 // standard text declaration 3043 else { 3044 //xxx: this function gives callback 3045 scanXMLDeclOrTextDecl(true); 3046 } 3047 } 3048 // now that we've straightened out the readers, we can read in chunks: 3049 fEntityManager.fCurrentEntity.mayReadChunks = true; 3050 setScannerState(SCANNER_STATE_CONTENT); 3051 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3052 //it seems we have to careful when to allow function issue a callback 3053 //and when to allow adapter issue a callback. 3054 continue; 3055 } 3056 3057 3058 case SCANNER_STATE_ROOT_ELEMENT: { 3059 if (scanRootElementHook()) { 3060 fEmptyElement = true; 3061 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3062 return XMLEvent.START_ELEMENT; 3063 } 3064 setScannerState(SCANNER_STATE_CONTENT); 3065 return XMLEvent.START_ELEMENT ; 3066 } 3067 case SCANNER_STATE_CHAR_REFERENCE : { 3068 fContentBuffer.clear(); 3069 scanCharReferenceValue(fContentBuffer, null); 3070 fMarkupDepth--; 3071 setScannerState(SCANNER_STATE_CONTENT); 3072 return XMLEvent.CHARACTERS; 3073 } 3074 default: 3075 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3076 3077 }//switch 3078 } 3079 // premature end of file 3080 catch (EOFException e) { 3081 endOfFileHook(e); 3082 return -1; 3083 } 3084 } //while loop 3085 }//next 3086 3087 // 3088 // Protected methods 3089 // 3090 3091 // hooks 3092 3093 // NOTE: These hook methods are added so that the full document 3094 // scanner can share the majority of code with this class. 3095 3096 /** 3097 * Scan for DOCTYPE hook. This method is a hook for subclasses 3098 * to add code to handle scanning for a the "DOCTYPE" string 3099 * after the string "<!" has been scanned. 3100 * 3101 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3102 * was not scanned. 3103 */ 3104 protected boolean scanForDoctypeHook() 3105 throws IOException, XNIException { 3106 return false; 3107 } // scanForDoctypeHook():boolean 3108 3109 /** 3110 * Element depth iz zero. This methos is a hook for subclasses 3111 * to add code to handle when the element depth hits zero. When 3112 * scanning a document fragment, an element depth of zero is 3113 * normal. However, when scanning a full XML document, the 3114 * scanner must handle the trailing miscellanous section of 3115 * the document after the end of the document's root element. 3116 * 3117 * @return True if the caller should stop and return true which 3118 * allows the scanner to switch to a new scanning 3119 * driver. A return value of false indicates that 3120 * the content driver should continue as normal. 3121 */ 3122 protected boolean elementDepthIsZeroHook() 3123 throws IOException, XNIException { 3124 return false; 3125 } // elementDepthIsZeroHook():boolean 3126 3127 /** 3128 * Scan for root element hook. This method is a hook for 3129 * subclasses to add code that handles scanning for the root 3130 * element. When scanning a document fragment, there is no 3131 * "root" element. However, when scanning a full XML document, 3132 * the scanner must handle the root element specially. 3133 * 3134 * @return True if the caller should stop and return true which 3135 * allows the scanner to switch to a new scanning 3136 * driver. A return value of false indicates that 3137 * the content driver should continue as normal. 3138 */ 3139 protected boolean scanRootElementHook() 3140 throws IOException, XNIException { 3141 return false; 3142 } // scanRootElementHook():boolean 3143 3144 /** 3145 * End of file hook. This method is a hook for subclasses to 3146 * add code that handles the end of file. The end of file in 3147 * a document fragment is OK if the markup depth is zero. 3148 * However, when scanning a full XML document, an end of file 3149 * is always premature. 3150 */ 3151 protected void endOfFileHook(EOFException e) 3152 throws IOException, XNIException { 3153 3154 // NOTE: An end of file is only only an error if we were 3155 // in the middle of scanning some markup. -Ac 3156 if (fMarkupDepth != 0) { 3157 reportFatalError("PrematureEOF", null); 3158 } 3159 3160 } // endOfFileHook() 3161 3162 } // class FragmentContentDriver 3163 3164 static void pr(String str) { 3165 System.out.println(str) ; 3166 } 3167 3168 protected boolean fUsebuffer ; 3169 3170 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3171 * maintained for attributes. 3172 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3173 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3174 * XMLString. 3175 * 3176 * @return XMLString XMLString used to store an attribute value. 3177 */ 3178 3179 protected XMLString getString(){ 3180 if(fAttributeCacheUsedCount < initialCacheCount || 3181 fAttributeCacheUsedCount < attributeValueCache.size()){ 3182 return attributeValueCache.get(fAttributeCacheUsedCount++); 3183 } else{ 3184 XMLString str = new XMLString(); 3185 fAttributeCacheUsedCount++; 3186 attributeValueCache.add(str); 3187 return str; 3188 } 3189 } 3190 3191 /** 3192 * Implements XMLBufferListener interface. 3193 */ 3194 3195 public void refresh(){ 3196 refresh(0); 3197 } 3198 3199 /** 3200 * receives callbacks from {@link XMLEntityReader } when buffer 3201 * is being changed. 3202 * @param refreshPosition 3203 */ 3204 public void refresh(int refreshPosition){ 3205 //If you are reading attributes and you got a callback 3206 //cache available attributes. 3207 if(fReadingAttributes){ 3208 fAttributes.refresh(); 3209 } 3210 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3211 bufferContent(); 3212 } 3213 } 3214 3215 /** 3216 * Since 'TempString' shares the buffer (a char array) with the CurrentEntity, 3217 * when the cursor position reaches the end, that is, before the buffer is 3218 * being loaded with new data, the content in the TempString needs to be 3219 * copied into the ContentBuffer. 3220 */ 3221 private void bufferContent() { 3222 fContentBuffer.append(fTempString); 3223 //clear the XMLString so that data can't be added again. 3224 fTempString.length = 0; 3225 fUsebuffer = true; 3226 } 3227 } // class XMLDocumentFragmentScannerImpl