1 /* 2 * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. 3 * @LastModified: Sep 2017 4 */ 5 6 /* 7 * Licensed to the Apache Software Foundation (ASF) under one or more 8 * contributor license agreements. See the NOTICE file distributed with 9 * this work for additional information regarding copyright ownership. 10 * The ASF licenses this file to You under the Apache License, Version 2.0 11 * (the "License"); you may not use this file except in compliance with 12 * the License. You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 */ 22 23 package com.sun.org.apache.xerces.internal.impl; 24 25 import com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDDescription; 26 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 27 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 28 import com.sun.org.apache.xerces.internal.util.XMLChar; 29 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 30 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 31 import com.sun.org.apache.xerces.internal.xni.Augmentations; 32 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 33 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 34 import com.sun.org.apache.xerces.internal.xni.XNIException; 35 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 36 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 37 import com.sun.org.apache.xerces.internal.xni.parser.XMLDTDScanner; 38 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 39 import com.sun.xml.internal.stream.Entity; 40 import com.sun.xml.internal.stream.StaxXMLInputSource; 41 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 42 import java.io.EOFException; 43 import java.io.IOException; 44 import javax.xml.stream.XMLInputFactory; 45 import javax.xml.stream.events.XMLEvent; 46 import jdk.xml.internal.SecuritySupport; 47 48 49 /** 50 * This class is responsible for scanning XML document structure 51 * and content. 52 * 53 * This class has been modified as per the new design which is more suited to 54 * efficiently build pull parser. Lot of improvements have been done and 55 * the code has been added to support stax functionality/features. 56 * 57 * @author Neeraj Bajaj, Sun Microsystems 58 * @author K.Venugopal, Sun Microsystems 59 * @author Glenn Marcy, IBM 60 * @author Andy Clark, IBM 61 * @author Arnaud Le Hors, IBM 62 * @author Eric Ye, IBM 63 * @author Sunitha Reddy, Sun Microsystems 64 * 65 * Refer to the table in unit-test javax.xml.stream.XMLStreamReaderTest.SupportDTD for changes 66 * related to property SupportDTD. 67 * @author Joe Wang, Sun Microsystems 68 */ 69 public class XMLDocumentScannerImpl 70 extends XMLDocumentFragmentScannerImpl{ 71 72 // 73 // Constants 74 // 75 76 // scanner states 77 78 /** Scanner state: XML declaration. */ 79 protected static final int SCANNER_STATE_XML_DECL = 42; 80 81 /** Scanner state: prolog. */ 82 protected static final int SCANNER_STATE_PROLOG = 43; 83 84 /** Scanner state: trailing misc. */ 85 protected static final int SCANNER_STATE_TRAILING_MISC = 44; 86 87 /** Scanner state: DTD internal declarations. */ 88 protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 45; 89 90 /** Scanner state: open DTD external subset. */ 91 protected static final int SCANNER_STATE_DTD_EXTERNAL = 46; 92 93 /** Scanner state: DTD external declarations. */ 94 protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 47; 95 96 /** Scanner state: NO MORE ELEMENTS. */ 97 protected static final int SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION = 48; 98 99 // feature identifiers 100 101 /** Property identifier document scanner: */ 102 protected static final String DOCUMENT_SCANNER = 103 Constants.XERCES_PROPERTY_PREFIX + Constants.DOCUMENT_SCANNER_PROPERTY; 104 105 /** Feature identifier: load external DTD. */ 106 protected static final String LOAD_EXTERNAL_DTD = 107 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 108 109 /** Feature identifier: load external DTD. */ 110 protected static final String DISALLOW_DOCTYPE_DECL_FEATURE = 111 Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE; 112 113 // property identifiers 114 115 /** Property identifier: DTD scanner. */ 116 protected static final String DTD_SCANNER = 117 Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY; 118 119 // property identifier: ValidationManager 120 protected static final String VALIDATION_MANAGER = 121 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 122 123 /** property identifier: NamespaceContext */ 124 protected static final String NAMESPACE_CONTEXT = 125 Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY; 126 127 // recognized features and properties 128 129 /** Recognized features. */ 130 private static final String[] RECOGNIZED_FEATURES = { 131 LOAD_EXTERNAL_DTD, 132 DISALLOW_DOCTYPE_DECL_FEATURE, 133 }; 134 135 /** Feature defaults. */ 136 private static final Boolean[] FEATURE_DEFAULTS = { 137 Boolean.TRUE, 138 Boolean.FALSE, 139 }; 140 141 /** Recognized properties. */ 142 private static final String[] RECOGNIZED_PROPERTIES = { 143 DTD_SCANNER, 144 VALIDATION_MANAGER 145 }; 146 147 /** Property defaults. */ 148 private static final Object[] PROPERTY_DEFAULTS = { 149 null, 150 null 151 }; 152 153 // 154 // Data((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 155 // 156 157 // properties 158 159 /** DTD scanner. */ 160 protected XMLDTDScanner fDTDScanner = null; 161 162 /** Validation manager . */ 163 //xxx: fValidationManager code needs to be added yet! 164 protected ValidationManager fValidationManager; 165 166 protected XMLStringBuffer fDTDDecl = null; 167 protected boolean fReadingDTD = false; 168 protected boolean fAddedListener = false; 169 170 // protected data 171 172 // other info 173 174 /** Doctype name. */ 175 protected String fDoctypeName; 176 177 /** Doctype declaration public identifier. */ 178 protected String fDoctypePublicId; 179 180 /** Doctype declaration system identifier. */ 181 protected String fDoctypeSystemId; 182 183 /** Namespace support. */ 184 protected NamespaceContext fNamespaceContext = new NamespaceSupport(); 185 186 // features 187 188 /** Load external DTD. */ 189 protected boolean fLoadExternalDTD = true; 190 191 // state 192 193 /** Seen doctype declaration. */ 194 protected boolean fSeenDoctypeDecl; 195 196 protected boolean fScanEndElement; 197 198 //protected int fScannerLastState ; 199 200 // drivers 201 202 /** XML declaration driver. */ 203 protected Driver fXMLDeclDriver = new XMLDeclDriver(); 204 205 /** Prolog driver. */ 206 protected Driver fPrologDriver = new PrologDriver(); 207 208 /** DTD driver. */ 209 protected Driver fDTDDriver = null ; 210 211 /** Trailing miscellaneous section driver. */ 212 protected Driver fTrailingMiscDriver = new TrailingMiscDriver(); 213 protected int fStartPos = 0; 214 protected int fEndPos = 0; 215 protected boolean fSeenInternalSubset= false; 216 // temporary variables 217 218 /** Array of 3 strings. */ 219 private String[] fStrings = new String[3]; 220 221 /** External subset source. */ 222 private XMLInputSource fExternalSubsetSource = null; 223 224 /** A DTD Description. */ 225 private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null); 226 227 private static final char [] DOCTYPE = {'D','O','C','T','Y','P','E'}; 228 private static final char [] COMMENTSTRING = {'-','-'}; 229 230 // 231 // Constructors 232 // 233 234 /** Default constructor. */ 235 public XMLDocumentScannerImpl() {} // <init>() 236 237 238 // 239 // XMLDocumentScanner methods 240 // 241 242 243 /** 244 * Sets the input source. 245 * 246 * @param inputSource The input source. 247 * 248 * @throws IOException Thrown on i/o error. 249 */ 250 public void setInputSource(XMLInputSource inputSource) throws IOException { 251 fEntityManager.setEntityHandler(this); 252 //this starts a new entity and sets the current entity to the document entity. 253 fEntityManager.startDocumentEntity(inputSource); 254 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 255 setScannerState(XMLEvent.START_DOCUMENT); 256 } // setInputSource(XMLInputSource) 257 258 259 260 /**return the state of the scanner */ 261 public int getScannetState(){ 262 return fScannerState ; 263 } 264 265 266 267 268 public void reset(PropertyManager propertyManager) { 269 super.reset(propertyManager); 270 // other settings 271 fDoctypeName = null; 272 fDoctypePublicId = null; 273 fDoctypeSystemId = null; 274 fSeenDoctypeDecl = false; 275 fNamespaceContext.reset(); 276 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 277 278 // xerces features 279 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 280 setScannerState(XMLEvent.START_DOCUMENT); 281 setDriver(fXMLDeclDriver); 282 fSeenInternalSubset = false; 283 if(fDTDScanner != null){ 284 ((XMLDTDScannerImpl)fDTDScanner).reset(propertyManager); 285 } 286 fEndPos = 0; 287 fStartPos = 0; 288 if(fDTDDecl != null){ 289 fDTDDecl.clear(); 290 } 291 292 } 293 294 /** 295 * Resets the component. The component can query the component manager 296 * about any features and properties that affect the operation of the 297 * component. 298 * 299 * @param componentManager The component manager. 300 * 301 * @throws SAXException Thrown by component on initialization error. 302 * For example, if a feature or property is 303 * required for the operation of the component, the 304 * component manager may throw a 305 * SAXNotRecognizedException or a 306 * SAXNotSupportedException. 307 */ 308 public void reset(XMLComponentManager componentManager) 309 throws XMLConfigurationException { 310 311 super.reset(componentManager); 312 313 // other settings 314 fDoctypeName = null; 315 fDoctypePublicId = null; 316 fDoctypeSystemId = null; 317 fSeenDoctypeDecl = false; 318 fExternalSubsetSource = null; 319 320 // xerces features 321 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 322 fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE, false); 323 324 fNamespaces = componentManager.getFeature(NAMESPACES, true); 325 326 fSeenInternalSubset = false; 327 // xerces properties 328 fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER); 329 330 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 331 332 try { 333 fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT); 334 } 335 catch (XMLConfigurationException e) { } 336 if (fNamespaceContext == null) { 337 fNamespaceContext = new NamespaceSupport(); 338 } 339 fNamespaceContext.reset(); 340 341 fEndPos = 0; 342 fStartPos = 0; 343 if(fDTDDecl != null) 344 fDTDDecl.clear(); 345 346 347 //fEntityScanner.registerListener((XMLBufferListener)componentManager.getProperty(DOCUMENT_SCANNER)); 348 349 // setup driver 350 setScannerState(SCANNER_STATE_XML_DECL); 351 setDriver(fXMLDeclDriver); 352 353 } // reset(XMLComponentManager) 354 355 356 /** 357 * Returns a list of feature identifiers that are recognized by 358 * this component. This method may return null if no features 359 * are recognized by this component. 360 */ 361 public String[] getRecognizedFeatures() { 362 String[] featureIds = super.getRecognizedFeatures(); 363 int length = featureIds != null ? featureIds.length : 0; 364 String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length]; 365 if (featureIds != null) { 366 System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length); 367 } 368 System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length); 369 return combinedFeatureIds; 370 } // getRecognizedFeatures():String[] 371 372 /** 373 * Sets the state of a feature. This method is called by the component 374 * manager any time after reset when a feature changes state. 375 * <p> 376 * <strong>Note:</strong> Components should silently ignore features 377 * that do not affect the operation of the component. 378 * 379 * @param featureId The feature identifier. 380 * @param state The state of the feature. 381 * 382 * @throws SAXNotRecognizedException The component should not throw 383 * this exception. 384 * @throws SAXNotSupportedException The component should not throw 385 * this exception. 386 */ 387 public void setFeature(String featureId, boolean state) 388 throws XMLConfigurationException { 389 390 super.setFeature(featureId, state); 391 392 // Xerces properties 393 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 394 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 395 396 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 397 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 398 fLoadExternalDTD = state; 399 return; 400 } 401 else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() && 402 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) { 403 fDisallowDoctype = state; 404 return; 405 } 406 } 407 408 } // setFeature(String,boolean) 409 410 /** 411 * Returns a list of property identifiers that are recognized by 412 * this component. This method may return null if no properties 413 * are recognized by this component. 414 */ 415 public String[] getRecognizedProperties() { 416 String[] propertyIds = super.getRecognizedProperties(); 417 int length = propertyIds != null ? propertyIds.length : 0; 418 String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length]; 419 if (propertyIds != null) { 420 System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length); 421 } 422 System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length); 423 return combinedPropertyIds; 424 } // getRecognizedProperties():String[] 425 426 /** 427 * Sets the value of a property. This method is called by the component 428 * manager any time after reset when a property changes value. 429 * <p> 430 * <strong>Note:</strong> Components should silently ignore properties 431 * that do not affect the operation of the component. 432 * 433 * @param propertyId The property identifier. 434 * @param value The value of the property. 435 * 436 * @throws SAXNotRecognizedException The component should not throw 437 * this exception. 438 * @throws SAXNotSupportedException The component should not throw 439 * this exception. 440 */ 441 public void setProperty(String propertyId, Object value) 442 throws XMLConfigurationException { 443 444 super.setProperty(propertyId, value); 445 446 // Xerces properties 447 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 448 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 449 450 if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() && 451 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) { 452 fDTDScanner = (XMLDTDScanner)value; 453 } 454 if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() && 455 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) { 456 if (value != null) { 457 fNamespaceContext = (NamespaceContext)value; 458 } 459 } 460 461 return; 462 } 463 464 } // setProperty(String,Object) 465 466 /** 467 * Returns the default state for a feature, or null if this 468 * component does not want to report a default value for this 469 * feature. 470 * 471 * @param featureId The feature identifier. 472 * 473 * @since Xerces 2.2.0 474 */ 475 public Boolean getFeatureDefault(String featureId) { 476 477 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 478 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 479 return FEATURE_DEFAULTS[i]; 480 } 481 } 482 return super.getFeatureDefault(featureId); 483 } // getFeatureDefault(String):Boolean 484 485 /** 486 * Returns the default state for a property, or null if this 487 * component does not want to report a default value for this 488 * property. 489 * 490 * @param propertyId The property identifier. 491 * 492 * @since Xerces 2.2.0 493 */ 494 public Object getPropertyDefault(String propertyId) { 495 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 496 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 497 return PROPERTY_DEFAULTS[i]; 498 } 499 } 500 return super.getPropertyDefault(propertyId); 501 } // getPropertyDefault(String):Object 502 503 // 504 // XMLEntityHandler methods 505 // 506 507 /** 508 * This method notifies of the start of an entity. The DTD has the 509 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 510 * general entities are just specified by their name. 511 * 512 * @param name The name of the entity. 513 * @param identifier The resource identifier. 514 * @param encoding The auto-detected IANA encoding name of the entity 515 * stream. This value will be null in those situations 516 * where the entity encoding is not auto-detected (e.g. 517 * internal entities or a document entity that is 518 * parsed from a java.io.Reader). 519 * 520 * @throws XNIException Thrown by handler to signal an error. 521 */ 522 public void startEntity(String name, 523 XMLResourceIdentifier identifier, 524 String encoding, Augmentations augs) throws XNIException { 525 526 super.startEntity(name, identifier, encoding,augs); 527 528 //register current document scanner as a listener for XMLEntityScanner 529 fEntityScanner.registerListener(this); 530 531 // prepare to look for a TextDecl if external general entity 532 if (!name.equals("[xml]") && fEntityScanner.isExternal()) { 533 // Don't do this if we're skipping the entity! 534 if (augs == null || !((Boolean) augs.getItem(Constants.ENTITY_SKIPPED)).booleanValue()) { 535 setScannerState(SCANNER_STATE_TEXT_DECL); 536 } 537 } 538 539 // call handler 540 /** comment this part.. LOCATOR problem.. */ 541 if (fDocumentHandler != null && name.equals("[xml]")) { 542 fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null); 543 } 544 545 } // startEntity(String,identifier,String) 546 547 548 /** 549 * This method notifies the end of an entity. The DTD has the pseudo-name 550 * of "[dtd]" parameter entity names start with '%'; and general entities 551 * are just specified by their name. 552 * 553 * @param name The name of the entity. 554 * 555 * @throws XNIException Thrown by handler to signal an error. 556 */ 557 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 558 559 super.endEntity(name, augs); 560 561 if(name.equals("[xml]")){ 562 //if fMarkupDepth has reached 0. 563 //and driver is fTrailingMiscDriver (which 564 //handles end of document in normal case) 565 //set the scanner state of SCANNER_STATE_TERMINATED 566 if(fMarkupDepth == 0 && fDriver == fTrailingMiscDriver){ 567 //set the scanner set to SCANNER_STATE_TERMINATED 568 setScannerState(SCANNER_STATE_TERMINATED) ; 569 } else{ 570 //else we have reached the end of document prematurely 571 //so throw EOFException. 572 throw new java.io.EOFException(); 573 } 574 575 //this is taken care in wrapper which generates XNI callbacks, There are no next events 576 577 //if (fDocumentHandler != null) { 578 //fDocumentHandler.endDocument(null); 579 //} 580 } 581 } // endEntity(String) 582 583 584 public XMLStringBuffer getDTDDecl(){ 585 Entity entity = fEntityScanner.getCurrentEntity(); 586 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 587 if(fSeenInternalSubset) 588 fDTDDecl.append("]>"); 589 return fDTDDecl; 590 } 591 592 public String getCharacterEncodingScheme(){ 593 return fDeclaredEncoding; 594 } 595 596 /** return the next state on the input 597 * 598 * @return int 599 */ 600 601 public int next() throws IOException, XNIException { 602 return fDriver.next(); 603 } 604 605 //getNamespaceContext 606 public NamespaceContext getNamespaceContext(){ 607 return fNamespaceContext ; 608 } 609 610 611 612 // 613 // Protected methods 614 // 615 616 // driver factory methods 617 618 /** Creates a content driver. */ 619 protected Driver createContentDriver() { 620 return new ContentDriver(); 621 } // createContentDriver():Driver 622 623 // scanning methods 624 625 /** Scans a doctype declaration. */ 626 protected boolean scanDoctypeDecl(boolean supportDTD) throws IOException, XNIException { 627 628 // spaces 629 if (!fEntityScanner.skipSpaces()) { 630 reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL", 631 null); 632 } 633 634 // root element name 635 fDoctypeName = fEntityScanner.scanName(NameType.DOCTYPE); 636 if (fDoctypeName == null) { 637 reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null); 638 } 639 640 // external id 641 if (fEntityScanner.skipSpaces()) { 642 scanExternalID(fStrings, false); 643 fDoctypeSystemId = fStrings[0]; 644 fDoctypePublicId = fStrings[1]; 645 fEntityScanner.skipSpaces(); 646 } 647 648 fHasExternalDTD = fDoctypeSystemId != null; 649 650 // Attempt to locate an external subset with an external subset resolver. 651 if (supportDTD && !fHasExternalDTD && fExternalSubsetResolver != null) { 652 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 653 fDTDDescription.setRootName(fDoctypeName); 654 fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 655 fHasExternalDTD = fExternalSubsetSource != null; 656 } 657 658 // call handler 659 if (supportDTD && fDocumentHandler != null) { 660 // NOTE: I don't like calling the doctypeDecl callback until 661 // end of the *full* doctype line (including internal 662 // subset) is parsed correctly but SAX2 requires that 663 // it knows the root element name and public and system 664 // identifier for the startDTD call. -Ac 665 if (fExternalSubsetSource == null) { 666 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 667 } 668 else { 669 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null); 670 } 671 } 672 673 // is there an internal subset? 674 boolean internalSubset = true; 675 if (!fEntityScanner.skipChar('[', null)) { 676 internalSubset = false; 677 fEntityScanner.skipSpaces(); 678 if (!fEntityScanner.skipChar('>', null)) { 679 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 680 } 681 fMarkupDepth--; 682 } 683 return internalSubset; 684 685 } // scanDoctypeDecl():boolean 686 687 // 688 // Private methods 689 // 690 /** Set the scanner state after scanning DTD */ 691 protected void setEndDTDScanState() { 692 setScannerState(SCANNER_STATE_PROLOG); 693 setDriver(fPrologDriver); 694 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 695 fReadingDTD=false; 696 } 697 698 /** Returns the scanner state name. */ 699 protected String getScannerStateName(int state) { 700 701 switch (state) { 702 case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL"; 703 case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG"; 704 case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC"; 705 case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS"; 706 case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL"; 707 case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS"; 708 } 709 return super.getScannerStateName(state); 710 711 } // getScannerStateName(int):String 712 713 // 714 // Classes 715 // 716 717 /** 718 * Driver to handle XMLDecl scanning. 719 * 720 * This class has been modified as per the new design which is more suited to 721 * efficiently build pull parser. Lots of performance improvements have been done and 722 * the code has been added to support stax functionality/features. 723 * 724 * @author Neeraj Bajaj, Sun Microsystems. 725 * 726 * @author Andy Clark, IBM 727 */ 728 protected final class XMLDeclDriver 729 implements Driver { 730 731 // 732 // Driver methods 733 // 734 735 736 public int next() throws IOException, XNIException { 737 738 // next driver is prolog regardless of whether there 739 // is an XMLDecl in this document 740 setScannerState(SCANNER_STATE_PROLOG); 741 setDriver(fPrologDriver); 742 743 //System.out.println("fEntityScanner = " + fEntityScanner); 744 // scan XMLDecl 745 try { 746 if (fEntityScanner.skipString(XMLDECL)) { 747 if (XMLChar.isSpace(fEntityScanner.peekChar())) { 748 fMarkupDepth++; 749 scanXMLDeclOrTextDecl(false); 750 } else { 751 // PI, reset position 752 fEntityManager.fCurrentEntity.position = 0; 753 } 754 } 755 756 //START_OF_THE_DOCUMENT 757 fEntityManager.fCurrentEntity.mayReadChunks = true; 758 return XMLEvent.START_DOCUMENT; 759 760 } 761 762 // premature end of file 763 catch (EOFException e) { 764 reportFatalError("PrematureEOF", null); 765 return -1; 766 //throw e; 767 } 768 769 } 770 } // class XMLDeclDriver 771 772 /** 773 * Driver to handle prolog scanning. 774 * 775 * @author Andy Clark, IBM 776 */ 777 protected final class PrologDriver 778 implements Driver { 779 780 /** 781 * Drives the parser to the next state/event on the input. Parser is guaranteed 782 * to stop at the next state/event. 783 * 784 * Internally XML document is divided into several states. Each state represents 785 * a sections of XML document. When this functions returns normally, it has read 786 * the section of XML document and returns the state corresponding to section of 787 * document which has been read. For optimizations, a particular driver 788 * can read ahead of the section of document (state returned) just read and 789 * can maintain a different internal state. 790 * 791 * @return state representing the section of document just read. 792 * 793 * @throws IOException Thrown on i/o error. 794 * @throws XNIException Thrown on parse error. 795 */ 796 797 public int next() throws IOException, XNIException { 798 799 try { 800 do { 801 switch (fScannerState) { 802 case SCANNER_STATE_PROLOG: { 803 fEntityScanner.skipSpaces(); 804 if (fEntityScanner.skipChar('<', null)) { 805 setScannerState(SCANNER_STATE_START_OF_MARKUP); 806 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 807 setScannerState(SCANNER_STATE_REFERENCE); 808 } else { 809 setScannerState(SCANNER_STATE_CONTENT); 810 } 811 break; 812 } 813 814 case SCANNER_STATE_START_OF_MARKUP: { 815 fMarkupDepth++; 816 if (isValidNameStartChar(fEntityScanner.peekChar()) || 817 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 818 setScannerState(SCANNER_STATE_ROOT_ELEMENT); 819 setDriver(fContentDriver); 820 //from now onwards this would be handled by fContentDriver,in the same next() call 821 return fContentDriver.next(); 822 } else if (fEntityScanner.skipChar('!', null)) { 823 if (fEntityScanner.skipChar('-', null)) { 824 if (!fEntityScanner.skipChar('-', null)) { 825 reportFatalError("InvalidCommentStart", 826 null); 827 } 828 setScannerState(SCANNER_STATE_COMMENT); 829 } else if (fEntityScanner.skipString(DOCTYPE)) { 830 setScannerState(SCANNER_STATE_DOCTYPE); 831 Entity entity = fEntityScanner.getCurrentEntity(); 832 if(entity instanceof Entity.ScannedEntity){ 833 fStartPos=((Entity.ScannedEntity)entity).position; 834 } 835 fReadingDTD=true; 836 if(fDTDDecl == null) 837 fDTDDecl = new XMLStringBuffer(); 838 fDTDDecl.append("<!DOCTYPE"); 839 840 } else { 841 reportFatalError("MarkupNotRecognizedInProlog", 842 null); 843 } 844 } else if (fEntityScanner.skipChar('?', null)) { 845 setScannerState(SCANNER_STATE_PI); 846 } else { 847 reportFatalError("MarkupNotRecognizedInProlog", 848 null); 849 } 850 break; 851 } 852 } 853 } while (fScannerState == SCANNER_STATE_PROLOG || fScannerState == SCANNER_STATE_START_OF_MARKUP ); 854 855 switch(fScannerState){ 856 /** 857 //this part is handled by FragmentContentHandler 858 case SCANNER_STATE_ROOT_ELEMENT: { 859 //we have read '<' and beginning of reading the start element tag 860 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 861 setDriver(fContentDriver); 862 //from now onwards this would be handled by fContentDriver,in the same next() call 863 return fContentDriver.next(); 864 } 865 */ 866 case SCANNER_STATE_COMMENT: { 867 //this function fills the data.. 868 scanComment(); 869 setScannerState(SCANNER_STATE_PROLOG); 870 return XMLEvent.COMMENT; 871 //setScannerState(SCANNER_STATE_PROLOG); 872 //break; 873 } 874 case SCANNER_STATE_PI: { 875 fContentBuffer.clear() ; 876 scanPI(fContentBuffer); 877 setScannerState(SCANNER_STATE_PROLOG); 878 return XMLEvent.PROCESSING_INSTRUCTION; 879 } 880 881 case SCANNER_STATE_DOCTYPE: { 882 if (fDisallowDoctype) { 883 reportFatalError("DoctypeNotAllowed", null); 884 } 885 886 if (fSeenDoctypeDecl) { 887 reportFatalError("AlreadySeenDoctype", null); 888 } 889 fSeenDoctypeDecl = true; 890 891 // scanDoctypeDecl() sends XNI doctypeDecl event that 892 // in SAX is converted to startDTD() event. 893 if (scanDoctypeDecl(fSupportDTD)) { 894 //allow parsing of entity decls to continue in order to stay well-formed 895 setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS); 896 fSeenInternalSubset = true; 897 if(fDTDDriver == null){ 898 fDTDDriver = new DTDDriver(); 899 } 900 setDriver(fContentDriver); 901 //always return DTD event, the event however, will not contain any entities 902 return fDTDDriver.next(); 903 } 904 905 if(fSeenDoctypeDecl){ 906 Entity entity = fEntityScanner.getCurrentEntity(); 907 if(entity instanceof Entity.ScannedEntity){ 908 fEndPos = ((Entity.ScannedEntity)entity).position; 909 } 910 fReadingDTD = false; 911 } 912 913 // handle external subset 914 if (fDoctypeSystemId != null) { 915 if (((fValidation || fLoadExternalDTD) 916 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 917 if (fSupportDTD) { 918 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 919 } else { 920 setScannerState(SCANNER_STATE_PROLOG); 921 } 922 923 setDriver(fContentDriver); 924 if(fDTDDriver == null) { 925 fDTDDriver = new DTDDriver(); 926 } 927 928 return fDTDDriver.next(); 929 } 930 } 931 else if (fExternalSubsetSource != null) { 932 if (((fValidation || fLoadExternalDTD) 933 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 934 // This handles the case of a DOCTYPE that had neither an internal subset or an external subset. 935 fDTDScanner.setInputSource(fExternalSubsetSource); 936 fExternalSubsetSource = null; 937 if (fSupportDTD) 938 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 939 else 940 setScannerState(SCANNER_STATE_PROLOG); 941 setDriver(fContentDriver); 942 if(fDTDDriver == null) 943 fDTDDriver = new DTDDriver(); 944 return fDTDDriver.next(); 945 } 946 } 947 948 // Send endDTD() call if: 949 // a) systemId is null or if an external subset resolver could not locate an external subset. 950 // b) "load-external-dtd" and validation are false 951 // c) DTD grammar is cached 952 953 // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD 954 // in SAX this results in 2 events: startDTD, endDTD 955 if (fDTDScanner != null) { 956 fDTDScanner.setInputSource(null); 957 } 958 setScannerState(SCANNER_STATE_PROLOG); 959 return XMLEvent.DTD; 960 } 961 962 case SCANNER_STATE_CONTENT: { 963 reportFatalError("ContentIllegalInProlog", null); 964 fEntityScanner.scanChar(null); 965 return -1; 966 } 967 case SCANNER_STATE_REFERENCE: { 968 reportFatalError("ReferenceIllegalInProlog", null); 969 return -1; 970 } 971 972 /** 973 * if (complete) { 974 * if (fEntityScanner.scanChar() != '<') { 975 * reportFatalError("RootElementRequired", null); 976 * } 977 * setScannerState(SCANNER_STATE_ROOT_ELEMENT); 978 * setDriver(fContentDriver); 979 * } 980 */ 981 } 982 } 983 // premature end of file 984 catch (EOFException e) { 985 reportFatalError("PrematureEOF", null); 986 //xxx what should be returned here.... ??? 987 return -1 ; 988 //throw e; 989 } 990 //xxx what should be returned here.... ??? 991 return -1; 992 993 } 994 995 996 } // class PrologDriver 997 998 /** 999 * Driver to handle the internal and external DTD subsets. 1000 * 1001 * @author Andy Clark, IBM 1002 */ 1003 protected final class DTDDriver 1004 implements Driver { 1005 1006 // 1007 // Driver methods 1008 // 1009 1010 public int next() throws IOException, XNIException{ 1011 1012 dispatch(true); 1013 1014 //xxx: remove this hack and align this with reusing DTD components 1015 //currently this routine will only be executed from Stax 1016 if(fPropertyManager != null){ 1017 dtdGrammarUtil = new DTDGrammarUtil(((XMLDTDScannerImpl)fDTDScanner).getGrammar(),fSymbolTable, fNamespaceContext); 1018 } 1019 1020 return XMLEvent.DTD ; 1021 } 1022 1023 /** 1024 * Dispatch an XML "event". 1025 * 1026 * @param complete True if this driver is intended to scan 1027 * and dispatch as much as possible. 1028 * 1029 * @return True if there is more to dispatch either from this 1030 * or a another driver. 1031 * 1032 * @throws IOException Thrown on i/o error. 1033 * @throws XNIException Thrown on parse error. 1034 */ 1035 public boolean dispatch(boolean complete) 1036 throws IOException, XNIException { 1037 fEntityManager.setEntityHandler(null); 1038 try { 1039 boolean again; 1040 XMLResourceIdentifierImpl resourceIdentifier = new XMLResourceIdentifierImpl(); 1041 if( fDTDScanner == null){ 1042 1043 if (fEntityManager.getEntityScanner() instanceof XML11EntityScanner){ 1044 fDTDScanner = new XML11DTDScannerImpl(); 1045 } else 1046 1047 fDTDScanner = new XMLDTDScannerImpl(); 1048 1049 ((XMLDTDScannerImpl)fDTDScanner).reset(fPropertyManager); 1050 } 1051 1052 fDTDScanner.setLimitAnalyzer(fLimitAnalyzer); 1053 do { 1054 again = false; 1055 switch (fScannerState) { 1056 case SCANNER_STATE_DTD_INTERNAL_DECLS: { 1057 boolean moreToScan = false; 1058 if (!fDTDScanner.skipDTD(fSupportDTD)) { 1059 // REVISIT: Should there be a feature for 1060 // the "complete" parameter? 1061 boolean completeDTD = true; 1062 1063 moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD); 1064 } 1065 Entity entity = fEntityScanner.getCurrentEntity(); 1066 if(entity instanceof Entity.ScannedEntity){ 1067 fEndPos=((Entity.ScannedEntity)entity).position; 1068 } 1069 fReadingDTD=false; 1070 if (!moreToScan) { 1071 // end doctype declaration 1072 if (!fEntityScanner.skipChar(']', null)) { 1073 reportFatalError("DoctypedeclNotClosed", new Object[]{fDoctypeName}); 1074 } 1075 fEntityScanner.skipSpaces(); 1076 if (!fEntityScanner.skipChar('>', null)) { 1077 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 1078 } 1079 fMarkupDepth--; 1080 1081 if (!fSupportDTD) { 1082 //simply reset the entity store without having to mess around 1083 //with the DTD Scanner code 1084 fEntityStore = fEntityManager.getEntityStore(); 1085 fEntityStore.reset(); 1086 } else { 1087 // scan external subset next unless we are ignoring DTDs 1088 if (fDoctypeSystemId != null && (fValidation || fLoadExternalDTD)) { 1089 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 1090 break; 1091 } 1092 } 1093 1094 setEndDTDScanState(); 1095 return true; 1096 1097 } 1098 break; 1099 } 1100 case SCANNER_STATE_DTD_EXTERNAL: { 1101 /** 1102 fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1103 fDTDDescription.setRootName(fDoctypeName); 1104 XMLInputSource xmlInputSource = 1105 fEntityManager.resolveEntity(fDTDDescription); 1106 fDTDScanner.setInputSource(xmlInputSource); 1107 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1108 again = true; 1109 break; 1110 */ 1111 1112 resourceIdentifier.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1113 XMLInputSource xmlInputSource = null ; 1114 StaxXMLInputSource staxInputSource = fEntityManager.resolveEntityAsPerStax(resourceIdentifier); 1115 1116 // Check access permission. If the source is resolved by a resolver, the check is skipped. 1117 if (!staxInputSource.isCreatedByResolver()) { 1118 String accessError = checkAccess(fDoctypeSystemId, fAccessExternalDTD); 1119 if (accessError != null) { 1120 reportFatalError("AccessExternalDTD", new Object[]{ SecuritySupport.sanitizePath(fDoctypeSystemId), accessError }); 1121 } 1122 } 1123 xmlInputSource = staxInputSource.getXMLInputSource(); 1124 fDTDScanner.setInputSource(xmlInputSource); 1125 if (fEntityScanner.fCurrentEntity != null) { 1126 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1127 } else { 1128 setScannerState(SCANNER_STATE_PROLOG); 1129 } 1130 again = true; 1131 break; 1132 } 1133 case SCANNER_STATE_DTD_EXTERNAL_DECLS: { 1134 // REVISIT: Should there be a feature for 1135 // the "complete" parameter? 1136 boolean completeDTD = true; 1137 boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD); 1138 if (!moreToScan) { 1139 setEndDTDScanState(); 1140 return true; 1141 } 1142 break; 1143 } 1144 case SCANNER_STATE_PROLOG : { 1145 // skip entity decls 1146 setEndDTDScanState(); 1147 return true; 1148 } 1149 default: { 1150 throw new XNIException("DTDDriver#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')'); 1151 } 1152 } 1153 } while (complete || again); 1154 } 1155 1156 // premature end of file 1157 catch (EOFException e) { 1158 e.printStackTrace(); 1159 reportFatalError("PrematureEOF", null); 1160 return false; 1161 //throw e; 1162 } 1163 1164 // cleanup 1165 finally { 1166 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1167 } 1168 1169 return true; 1170 1171 } 1172 1173 // dispatch(boolean):boolean 1174 1175 } // class DTDDriver 1176 1177 /** 1178 * Driver to handle content scanning. 1179 * 1180 * @author Andy Clark, IBM 1181 * @author Eric Ye, IBM 1182 */ 1183 protected class ContentDriver 1184 extends FragmentContentDriver { 1185 1186 // 1187 // Protected methods 1188 // 1189 1190 // hooks 1191 1192 // NOTE: These hook methods are added so that the full document 1193 // scanner can share the majority of code with this class. 1194 1195 /** 1196 * Scan for DOCTYPE hook. This method is a hook for subclasses 1197 * to add code to handle scanning for a the "DOCTYPE" string 1198 * after the string "<!" has been scanned. 1199 * 1200 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 1201 * was not scanned. 1202 */ 1203 protected boolean scanForDoctypeHook() 1204 throws IOException, XNIException { 1205 1206 if (fEntityScanner.skipString(DOCTYPE)) { 1207 setScannerState(SCANNER_STATE_DOCTYPE); 1208 // fEntityScanner.markStartOfDTD(); 1209 return true; 1210 } 1211 return false; 1212 1213 } // scanForDoctypeHook():boolean 1214 1215 /** 1216 * Element depth iz zero. This methos is a hook for subclasses 1217 * to add code to handle when the element depth hits zero. When 1218 * scanning a document fragment, an element depth of zero is 1219 * normal. However, when scanning a full XML document, the 1220 * scanner must handle the trailing miscellanous section of 1221 * the document after the end of the document's root element. 1222 * 1223 * @return True if the caller should stop and return true which 1224 * allows the scanner to switch to a new scanning 1225 * driver. A return value of false indicates that 1226 * the content driver should continue as normal. 1227 */ 1228 protected boolean elementDepthIsZeroHook() 1229 throws IOException, XNIException { 1230 1231 setScannerState(SCANNER_STATE_TRAILING_MISC); 1232 setDriver(fTrailingMiscDriver); 1233 return true; 1234 1235 } // elementDepthIsZeroHook():boolean 1236 1237 /** 1238 * Scan for root element hook. This method is a hook for 1239 * subclasses to add code that handles scanning for the root 1240 * element. When scanning a document fragment, there is no 1241 * "root" element. However, when scanning a full XML document, 1242 * the scanner must handle the root element specially. 1243 * 1244 * @return True if the caller should stop and return true which 1245 * allows the scanner to switch to a new scanning 1246 * driver. A return value of false indicates that 1247 * the content driver should continue as normal. 1248 */ 1249 protected boolean scanRootElementHook() 1250 throws IOException, XNIException { 1251 1252 if (scanStartElement()) { 1253 setScannerState(SCANNER_STATE_TRAILING_MISC); 1254 setDriver(fTrailingMiscDriver); 1255 return true; 1256 } 1257 return false; 1258 1259 } // scanRootElementHook():boolean 1260 1261 /** 1262 * End of file hook. This method is a hook for subclasses to 1263 * add code that handles the end of file. The end of file in 1264 * a document fragment is OK if the markup depth is zero. 1265 * However, when scanning a full XML document, an end of file 1266 * is always premature. 1267 */ 1268 protected void endOfFileHook(EOFException e) 1269 throws IOException, XNIException { 1270 1271 reportFatalError("PrematureEOF", null); 1272 // in case continue-after-fatal-error set, should not do this... 1273 //throw e; 1274 1275 } // endOfFileHook() 1276 1277 protected void resolveExternalSubsetAndRead() 1278 throws IOException, XNIException { 1279 1280 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 1281 fDTDDescription.setRootName(fElementQName.rawname); 1282 XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 1283 1284 if (src != null) { 1285 fDoctypeName = fElementQName.rawname; 1286 fDoctypePublicId = src.getPublicId(); 1287 fDoctypeSystemId = src.getSystemId(); 1288 // call document handler 1289 if (fDocumentHandler != null) { 1290 // This inserts a doctypeDecl event into the stream though no 1291 // DOCTYPE existed in the instance document. 1292 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 1293 } 1294 try { 1295 fDTDScanner.setInputSource(src); 1296 while (fDTDScanner.scanDTDExternalSubset(true)); 1297 } finally { 1298 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1299 } 1300 } 1301 } // resolveExternalSubsetAndRead() 1302 1303 1304 1305 } // class ContentDriver 1306 1307 /** 1308 * Driver to handle trailing miscellaneous section scanning. 1309 * 1310 * @author Andy Clark, IBM 1311 * @author Eric Ye, IBM 1312 */ 1313 protected final class TrailingMiscDriver 1314 implements Driver { 1315 1316 // 1317 // Driver methods 1318 // 1319 public int next() throws IOException, XNIException{ 1320 //this could for cases like <foo/> 1321 //look at scanRootElementHook 1322 if(fEmptyElement){ 1323 fEmptyElement = false; 1324 return XMLEvent.END_ELEMENT; 1325 } 1326 1327 try { 1328 if(fScannerState == SCANNER_STATE_TERMINATED){ 1329 return XMLEvent.END_DOCUMENT ;} 1330 do { 1331 switch (fScannerState) { 1332 case SCANNER_STATE_TRAILING_MISC: { 1333 1334 fEntityScanner.skipSpaces(); 1335 //we should have reached the end of the document in 1336 //most cases. 1337 if(fScannerState == SCANNER_STATE_TERMINATED ){ 1338 return XMLEvent.END_DOCUMENT ; 1339 } 1340 if (fEntityScanner.skipChar('<', null)) { 1341 setScannerState(SCANNER_STATE_START_OF_MARKUP); 1342 } else { 1343 setScannerState(SCANNER_STATE_CONTENT); 1344 } 1345 break; 1346 } 1347 case SCANNER_STATE_START_OF_MARKUP: { 1348 fMarkupDepth++; 1349 if (fEntityScanner.skipChar('?', null)) { 1350 setScannerState(SCANNER_STATE_PI); 1351 } else if (fEntityScanner.skipChar('!', null)) { 1352 setScannerState(SCANNER_STATE_COMMENT); 1353 } else if (fEntityScanner.skipChar('/', null)) { 1354 reportFatalError("MarkupNotRecognizedInMisc", 1355 null); 1356 } else if (isValidNameStartChar(fEntityScanner.peekChar()) || 1357 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 1358 reportFatalError("MarkupNotRecognizedInMisc", 1359 null); 1360 scanStartElement(); 1361 setScannerState(SCANNER_STATE_CONTENT); 1362 } else { 1363 reportFatalError("MarkupNotRecognizedInMisc", 1364 null); 1365 } 1366 break; 1367 } 1368 } 1369 } while(fScannerState == SCANNER_STATE_START_OF_MARKUP || 1370 fScannerState == SCANNER_STATE_TRAILING_MISC); 1371 1372 switch (fScannerState){ 1373 case SCANNER_STATE_PI: { 1374 fContentBuffer.clear(); 1375 scanPI(fContentBuffer); 1376 setScannerState(SCANNER_STATE_TRAILING_MISC); 1377 return XMLEvent.PROCESSING_INSTRUCTION ; 1378 } 1379 case SCANNER_STATE_COMMENT: { 1380 if (!fEntityScanner.skipString(COMMENTSTRING)) { 1381 reportFatalError("InvalidCommentStart", null); 1382 } 1383 scanComment(); 1384 setScannerState(SCANNER_STATE_TRAILING_MISC); 1385 return XMLEvent.COMMENT; 1386 } 1387 case SCANNER_STATE_CONTENT: { 1388 int ch = fEntityScanner.peekChar(); 1389 if (ch == -1) { 1390 setScannerState(SCANNER_STATE_TERMINATED); 1391 return XMLEvent.END_DOCUMENT ; 1392 } else{ 1393 reportFatalError("ContentIllegalInTrailingMisc", 1394 null); 1395 fEntityScanner.scanChar(null); 1396 setScannerState(SCANNER_STATE_TRAILING_MISC); 1397 return XMLEvent.CHARACTERS; 1398 } 1399 1400 } 1401 case SCANNER_STATE_REFERENCE: { 1402 reportFatalError("ReferenceIllegalInTrailingMisc", 1403 null); 1404 setScannerState(SCANNER_STATE_TRAILING_MISC); 1405 return XMLEvent.ENTITY_REFERENCE ; 1406 } 1407 case SCANNER_STATE_TERMINATED: { 1408 //there can't be any element after SCANNER_STATE_TERMINATED or when the parser 1409 //has reached the end of document 1410 setScannerState(SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION); 1411 //xxx what to do when the scanner has reached the terminating state. 1412 return XMLEvent.END_DOCUMENT ; 1413 } 1414 case SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION:{ 1415 throw new java.util.NoSuchElementException("No more events to be parsed"); 1416 } 1417 default: throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 1418 }//switch 1419 1420 } catch (EOFException e) { 1421 // NOTE: This is the only place we're allowed to reach 1422 // the real end of the document stream. Unless the 1423 // end of file was reached prematurely. 1424 if (fMarkupDepth != 0) { 1425 reportFatalError("PrematureEOF", null); 1426 return -1; 1427 //throw e; 1428 } 1429 //System.out.println("EOFException thrown") ; 1430 setScannerState(SCANNER_STATE_TERMINATED); 1431 } 1432 1433 return XMLEvent.END_DOCUMENT; 1434 1435 }//next 1436 1437 } // class TrailingMiscDriver 1438 1439 /** 1440 * Implements XMLBufferListener interface. 1441 */ 1442 1443 1444 /** 1445 * receives callbacks from {@link XMLEntityReader } when buffer 1446 * is being changed. 1447 * @param refreshPosition 1448 */ 1449 public void refresh(int refreshPosition){ 1450 super.refresh(refreshPosition); 1451 if(fReadingDTD){ 1452 Entity entity = fEntityScanner.getCurrentEntity(); 1453 if(entity instanceof Entity.ScannedEntity){ 1454 fEndPos=((Entity.ScannedEntity)entity).position; 1455 } 1456 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 1457 fStartPos = refreshPosition; 1458 } 1459 } 1460 1461 } // class XMLDocumentScannerImpl