New src/java.xml/share/classes/com/sun/org/apache/xml/internal/dtm/ref/dom2dtm/DOM2DTM.java

   1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm;
  22 
  23 import com.sun.org.apache.xml.internal.dtm.DTM;
  24 import com.sun.org.apache.xml.internal.dtm.DTMManager;
  25 import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
  26 import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators;
  27 import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;
  28 import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable;
  29 import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource;
  30 import com.sun.org.apache.xml.internal.res.XMLErrorResources;
  31 import com.sun.org.apache.xml.internal.res.XMLMessages;
  32 import com.sun.org.apache.xml.internal.utils.FastStringBuffer;
  33 import com.sun.org.apache.xml.internal.utils.QName;
  34 import com.sun.org.apache.xml.internal.utils.StringBufferPool;
  35 import com.sun.org.apache.xml.internal.utils.TreeWalker;
  36 import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer;
  37 import com.sun.org.apache.xml.internal.utils.XMLString;
  38 import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
  39 import java.util.ArrayList;
  40 import java.util.List;
  41 import javax.xml.transform.SourceLocator;
  42 import javax.xml.transform.dom.DOMSource;
  43 import org.w3c.dom.Attr;
  44 import org.w3c.dom.Document;
  45 import org.w3c.dom.DocumentType;
  46 import org.w3c.dom.Element;
  47 import org.w3c.dom.Entity;
  48 import org.w3c.dom.NamedNodeMap;
  49 import org.w3c.dom.Node;
  50 import org.xml.sax.ContentHandler;
  51 
  52 /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
  53  * DTM API.
  54  *
  55  * Note that it doesn't necessarily represent a full Document
  56  * tree. You can wrap a DOM2DTM around a specific node and its subtree
  57  * and the right things should happen. (I don't _think_ we currently
  58  * support DocumentFrgment nodes as roots, though that might be worth
  59  * considering.)
  60  *
  61  * Note too that we do not currently attempt to track document
  62  * mutation. If you alter the DOM after wrapping DOM2DTM around it,
  63  * all bets are off.
  64  *
  65  * @LastModified: Oct 2017
  66  */
  67 public class DOM2DTM extends DTMDefaultBaseIterators
  68 {
  69   static final boolean JJK_DEBUG=false;
  70   static final boolean JJK_NEWCODE=true;
  71 
  72   /** Manefest constant
  73    */
  74   static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
  75 
  76   /** The current position in the DOM tree. Last node examined for
  77    * possible copying to DTM. */
  78   transient private Node m_pos;
  79   /** The current position in the DTM tree. Who children get appended to. */
  80   private int m_last_parent=0;
  81   /** The current position in the DTM tree. Who children reference as their
  82    * previous sib. */
  83   private int m_last_kid=NULL;
  84 
  85   /** The top of the subtree.
  86    * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
  87    * */
  88   transient private Node m_root;
  89 
  90   /** True iff the first element has been processed. This is used to control
  91       synthesis of the implied xml: namespace declaration node. */
  92   boolean m_processedFirstElement=false;
  93 
  94   /** true if ALL the nodes in the m_root subtree have been processed;
  95    * false if our incremental build has not yet finished scanning the
  96    * DOM tree.  */
  97   transient private boolean m_nodesAreProcessed;
  98 
  99   /** The node objects.  The instance part of the handle indexes
 100    * directly into this vector.  Each DTM node may actually be
 101    * composed of several DOM nodes (for example, if logically-adjacent
 102    * Text/CDATASection nodes in the DOM have been coalesced into a
 103    * single DTM Text node); this table points only to the first in
 104    * that sequence. */
 105   protected List<Node> m_nodes = new ArrayList<>();
 106 
 107   /**
 108    * Construct a DOM2DTM object from a DOM node.
 109    *
 110    * @param mgr The DTMManager who owns this DTM.
 111    * @param domSource the DOM source that this DTM will wrap.
 112    * @param dtmIdentity The DTM identity ID for this DTM.
 113    * @param whiteSpaceFilter The white space filter for this DTM, which may
 114    *                         be null.
 115    * @param xstringfactory XMLString factory for creating character content.
 116    * @param doIndexing true if the caller considers it worth it to use
 117    *                   indexing schemes.
 118    */
 119   public DOM2DTM(DTMManager mgr, DOMSource domSource,
 120                  int dtmIdentity, DTMWSFilter whiteSpaceFilter,
 121                  XMLStringFactory xstringfactory,
 122                  boolean doIndexing)
 123   {
 124     super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
 125           xstringfactory, doIndexing);
 126 
 127     // Initialize DOM navigation
 128     m_pos=m_root = domSource.getNode();
 129     // Initialize DTM navigation
 130     m_last_parent=m_last_kid=NULL;
 131     m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
 132 
 133     // Apparently the domSource root may not actually be the
 134     // Document node. If it's an Element node, we need to immediately
 135     // add its attributes. Adapted from nextNode().
 136     // %REVIEW% Move this logic into addNode and recurse? Cleaner!
 137     //
 138     // (If it's an EntityReference node, we're probably scrod. For now
 139     // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
 140                 //
 141                 // %ISSUE% What about inherited namespaces in this case?
 142                 // Do we need to special-case initialize them into the DTM model?
 143     if(ELEMENT_NODE == m_root.getNodeType())
 144     {
 145       NamedNodeMap attrs=m_root.getAttributes();
 146       int attrsize=(attrs==null) ? 0 : attrs.getLength();
 147       if(attrsize>0)
 148       {
 149         int attrIndex=NULL; // start with no previous sib
 150         for(int i=0;i<attrsize;++i)
 151         {
 152           // No need to force nodetype in this case;
 153           // addNode() will take care of switching it from
 154           // Attr to Namespace if necessary.
 155           attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
 156           m_firstch.setElementAt(DTM.NULL,attrIndex);
 157         }
 158         // Terminate list of attrs, and make sure they aren't
 159         // considered children of the element
 160         m_nextsib.setElementAt(DTM.NULL,attrIndex);
 161 
 162         // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
 163       } // if attrs exist
 164     } //if(ELEMENT_NODE)
 165 
 166     // Initialize DTM-completed status
 167     m_nodesAreProcessed = false;
 168   }
 169 
 170   /**
 171    * Construct the node map from the node.
 172    *
 173    * @param node The node that is to be added to the DTM.
 174    * @param parentIndex The current parent index.
 175    * @param previousSibling The previous sibling index.
 176    * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
 177    *    Used to force nodes to Text rather than CDATASection when their
 178    *    coalesced value includes ordinary Text nodes (current DTM behavior).
 179    *
 180    * @return The index identity of the node that was added.
 181    */
 182   protected int addNode(Node node, int parentIndex,
 183                         int previousSibling, int forceNodeType)
 184   {
 185     int nodeIndex = m_nodes.size();
 186 
 187     // Have we overflowed a DTM Identity's addressing range?
 188     if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
 189     {
 190       try
 191       {
 192         if(m_mgr==null)
 193           throw new ClassCastException();
 194 
 195                                 // Handle as Extended Addressing
 196         DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
 197         int id=mgrD.getFirstFreeDTMID();
 198         mgrD.addDTM(this,id,nodeIndex);
 199         m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
 200       }
 201       catch(ClassCastException e)
 202       {
 203         // %REVIEW% Wrong error message, but I've been told we're trying
 204         // not to add messages right not for I18N reasons.
 205         // %REVIEW% Should this be a Fatal Error?
 206         error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
 207       }
 208     }
 209 
 210     m_size++;
 211     // ensureSize(nodeIndex);
 212 
 213     int type;
 214     if(NULL==forceNodeType)
 215         type = node.getNodeType();
 216     else
 217         type=forceNodeType;
 218 
 219     // %REVIEW% The Namespace Spec currently says that Namespaces are
 220     // processed in a non-namespace-aware manner, by matching the
 221     // QName, even though there is in fact a namespace assigned to
 222     // these nodes in the DOM. If and when that changes, we will have
 223     // to consider whether we check the namespace-for-namespaces
 224     // rather than the node name.
 225     //
 226     // %TBD% Note that the DOM does not necessarily explicitly declare
 227     // all the namespaces it uses. DOM Level 3 will introduce a
 228     // namespace-normalization operation which reconciles that, and we
 229     // can request that users invoke it or otherwise ensure that the
 230     // tree is namespace-well-formed before passing the DOM to Xalan.
 231     // But if they don't, what should we do about it? We probably
 232     // don't want to alter the source DOM (and may not be able to do
 233     // so if it's read-only). The best available answer might be to
 234     // synthesize additional DTM Namespace Nodes that don't correspond
 235     // to DOM Attr Nodes.
 236     if (Node.ATTRIBUTE_NODE == type)
 237     {
 238       String name = node.getNodeName();
 239 
 240       if (name.startsWith("xmlns:") || name.equals("xmlns"))
 241       {
 242         type = DTM.NAMESPACE_NODE;
 243       }
 244     }
 245 
 246     m_nodes.add(node);
 247 
 248     m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
 249     m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
 250     m_prevsib.setElementAt(previousSibling,nodeIndex);
 251     m_parent.setElementAt(parentIndex,nodeIndex);
 252 
 253     if(DTM.NULL != parentIndex &&
 254        type != DTM.ATTRIBUTE_NODE &&
 255        type != DTM.NAMESPACE_NODE)
 256     {
 257       // If the DTM parent had no children, this becomes its first child.
 258       if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
 259         m_firstch.setElementAt(nodeIndex,parentIndex);
 260     }
 261 
 262     String nsURI = node.getNamespaceURI();
 263 
 264     // Deal with the difference between Namespace spec and XSLT
 265     // definitions of local name. (The former says PIs don't have
 266     // localnames; the latter says they do.)
 267     String localName =  (type == Node.PROCESSING_INSTRUCTION_NODE) ?
 268                          node.getNodeName() :
 269                          node.getLocalName();
 270 
 271     // Hack to make DOM1 sort of work...
 272     if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
 273         && null == localName)
 274       localName = node.getNodeName(); // -sb
 275 
 276     ExpandedNameTable exnt = m_expandedNameTable;
 277 
 278     // %TBD% Nodes created with the old non-namespace-aware DOM
 279     // calls createElement() and createAttribute() will never have a
 280     // localname. That will cause their expandedNameID to be just the
 281     // nodeType... which will keep them from being matched
 282     // successfully by name. Since the DOM makes no promise that
 283     // those will participate in namespace processing, this is
 284     // officially accepted as Not Our Fault. But it might be nice to
 285     // issue a diagnostic message!
 286     if(node.getLocalName()==null &&
 287        (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
 288       {
 289         // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
 290       }
 291 
 292     int expandedNameID = (null != localName)
 293        ? exnt.getExpandedTypeID(nsURI, localName, type) :
 294          exnt.getExpandedTypeID(type);
 295 
 296     m_exptype.setElementAt(expandedNameID,nodeIndex);
 297 
 298     indexNode(expandedNameID, nodeIndex);
 299 
 300     if (DTM.NULL != previousSibling)
 301       m_nextsib.setElementAt(nodeIndex,previousSibling);
 302 
 303     // This should be done after m_exptype has been set, and probably should
 304     // always be the last thing we do
 305     if (type == DTM.NAMESPACE_NODE)
 306         declareNamespaceInContext(parentIndex,nodeIndex);
 307 
 308     return nodeIndex;
 309   }
 310 
 311   /**
 312    * Get the number of nodes that have been added.
 313    */
 314   public int getNumberOfNodes()
 315   {
 316     return m_nodes.size();
 317   }
 318 
 319  /**
 320    * This method iterates to the next node that will be added to the table.
 321    * Each call to this method adds a new node to the table, unless the end
 322    * is reached, in which case it returns null.
 323    *
 324    * @return The true if a next node is found or false if
 325    *         there are no more nodes.
 326    */
 327   protected boolean nextNode()
 328   {
 329     // Non-recursive one-fetch-at-a-time depth-first traversal with
 330     // attribute/namespace nodes and white-space stripping.
 331     // Navigating the DOM is simple, navigating the DTM is simple;
 332     // keeping track of both at once is a trifle baroque but at least
 333     // we've avoided most of the special cases.
 334     if (m_nodesAreProcessed)
 335       return false;
 336 
 337     // %REVIEW% Is this local copy Really Useful from a performance
 338     // point of view?  Or is this a false microoptimization?
 339     Node pos=m_pos;
 340     Node next=null;
 341     int nexttype=NULL;
 342 
 343     // Navigate DOM tree
 344     do
 345       {
 346         // Look down to first child.
 347         if (pos.hasChildNodes())
 348           {
 349             next = pos.getFirstChild();
 350 
 351             // %REVIEW% There's probably a more elegant way to skip
 352             // the doctype. (Just let it go and Suppress it?
 353             if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
 354               next=next.getNextSibling();
 355 
 356             // Push DTM context -- except for children of Entity References,
 357             // which have no DTM equivalent and cause no DTM navigation.
 358             if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
 359               {
 360                 m_last_parent=m_last_kid;
 361                 m_last_kid=NULL;
 362                 // Whitespace-handler context stacking
 363                 if(null != m_wsfilter)
 364                 {
 365                   short wsv =
 366                     m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
 367                   boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
 368                     ? getShouldStripWhitespace()
 369                     : (DTMWSFilter.STRIP == wsv);
 370                   pushShouldStripWhitespace(shouldStrip);
 371                 } // if(m_wsfilter)
 372               }
 373           }
 374 
 375         // If that fails, look up and right (but not past root!)
 376         else
 377           {
 378             if(m_last_kid!=NULL)
 379               {
 380                 // Last node posted at this level had no more children
 381                 // If it has _no_ children, we need to record that.
 382                 if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
 383                   m_firstch.setElementAt(NULL,m_last_kid);
 384               }
 385 
 386             while(m_last_parent != NULL)
 387               {
 388                 // %REVIEW% There's probably a more elegant way to
 389                 // skip the doctype. (Just let it go and Suppress it?
 390                 next = pos.getNextSibling();
 391                 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
 392                   next=next.getNextSibling();
 393 
 394                 if(next!=null)
 395                   break; // Found it!
 396 
 397                 // No next-sibling found. Pop the DOM.
 398                 pos=pos.getParentNode();
 399                 if(pos==null)
 400                   {
 401                     // %TBD% Should never arise, but I want to be sure of that...
 402                     if(JJK_DEBUG)
 403                       {
 404                         System.out.println("***** DOM2DTM Pop Control Flow problem");
 405                         for(;;); // Freeze right here!
 406                       }
 407                   }
 408 
 409                 // The only parents in the DTM are Elements.  However,
 410                 // the DOM could contain EntityReferences.  If we
 411                 // encounter one, pop it _without_ popping DTM.
 412                 if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
 413                   {
 414                     // Nothing needs doing
 415                     if(JJK_DEBUG)
 416                       System.out.println("***** DOM2DTM popping EntRef");
 417                   }
 418                 else
 419                   {
 420                     popShouldStripWhitespace();
 421                     // Fix and pop DTM
 422                     if(m_last_kid==NULL)
 423                       m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
 424                     else
 425                       m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
 426                     m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
 427                   }
 428               }
 429             if(m_last_parent==NULL)
 430               next=null;
 431           }
 432 
 433         if(next!=null)
 434           nexttype=next.getNodeType();
 435 
 436         // If it's an entity ref, advance past it.
 437         //
 438         // %REVIEW% Should we let this out the door and just suppress it?
 439         // More work, but simpler code, more likely to be correct, and
 440         // it doesn't happen very often. We'd get rid of the loop too.
 441         if (ENTITY_REFERENCE_NODE == nexttype)
 442           pos=next;
 443       }
 444     while (ENTITY_REFERENCE_NODE == nexttype);
 445 
 446     // Did we run out of the tree?
 447     if(next==null)
 448       {
 449         m_nextsib.setElementAt(NULL,0);
 450         m_nodesAreProcessed = true;
 451         m_pos=null;
 452 
 453         if(JJK_DEBUG)
 454           {
 455             System.out.println("***** DOM2DTM Crosscheck:");
 456             for(int i=0;i<m_nodes.size();++i)
 457               System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
 458           }
 459 
 460         return false;
 461       }
 462 
 463     // Text needs some special handling:
 464     //
 465     // DTM may skip whitespace. This is handled by the suppressNode flag, which
 466     // when true will keep the DTM node from being created.
 467     //
 468     // DTM only directly records the first DOM node of any logically-contiguous
 469     // sequence. The lastTextNode value will be set to the last node in the
 470     // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
 471     // advance next over this whole block. Should be simpler than special-casing
 472     // the above loop for "Was the logically-preceeding sibling a text node".
 473     //
 474     // Finally, a DTM node should be considered a CDATASection only if all the
 475     // contiguous text it covers is CDATASections. The first Text should
 476     // force DTM to Text.
 477 
 478     boolean suppressNode=false;
 479     Node lastTextNode=null;
 480 
 481     nexttype=next.getNodeType();
 482 
 483     // nexttype=pos.getNodeType();
 484     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
 485       {
 486         // If filtering, initially assume we're going to suppress the node
 487         suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
 488 
 489         // Scan logically contiguous text (siblings, plus "flattening"
 490         // of entity reference boundaries).
 491         Node n=next;
 492         while(n!=null)
 493           {
 494             lastTextNode=n;
 495             // Any Text node means DTM considers it all Text
 496             if(TEXT_NODE == n.getNodeType())
 497               nexttype=TEXT_NODE;
 498             // Any non-whitespace in this sequence blocks whitespace
 499             // suppression
 500             suppressNode &=
 501               XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
 502 
 503             n=logicalNextDOMTextNode(n);
 504           }
 505       }
 506 
 507     // Special handling for PIs: Some DOMs represent the XML
 508     // Declaration as a PI. This is officially incorrect, per the DOM
 509     // spec, but is considered a "wrong but tolerable" temporary
 510     // workaround pending proper handling of these fields in DOM Level
 511     // 3. We want to recognize and reject that case.
 512     else if(PROCESSING_INSTRUCTION_NODE==nexttype)
 513       {
 514         suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
 515       }
 516 
 517 
 518     if(!suppressNode)
 519       {
 520         // Inserting next. NOTE that we force the node type; for
 521         // coalesced Text, this records CDATASections adjacent to
 522         // ordinary Text as Text.
 523         int nextindex=addNode(next,m_last_parent,m_last_kid,
 524                               nexttype);
 525 
 526         m_last_kid=nextindex;
 527 
 528         if(ELEMENT_NODE == nexttype)
 529           {
 530             int attrIndex=NULL; // start with no previous sib
 531             // Process attributes _now_, rather than waiting.
 532             // Simpler control flow, makes NS cache available immediately.
 533             NamedNodeMap attrs=next.getAttributes();
 534             int attrsize=(attrs==null) ? 0 : attrs.getLength();
 535             if(attrsize>0)
 536               {
 537                 for(int i=0;i<attrsize;++i)
 538                   {
 539                     // No need to force nodetype in this case;
 540                     // addNode() will take care of switching it from
 541                     // Attr to Namespace if necessary.
 542                     attrIndex=addNode(attrs.item(i),
 543                                       nextindex,attrIndex,NULL);
 544                     m_firstch.setElementAt(DTM.NULL,attrIndex);
 545 
 546                     // If the xml: prefix is explicitly declared
 547                     // we don't need to synthesize one.
 548                     //
 549                     // NOTE that XML Namespaces were not originally
 550                     // defined as being namespace-aware (grrr), and
 551                     // while the W3C is planning to fix this it's
 552                     // safer for now to test the QName and trust the
 553                     // parsers to prevent anyone from redefining the
 554                     // reserved xmlns: prefix
 555                     if(!m_processedFirstElement
 556                        && "xmlns:xml".equals(attrs.item(i).getNodeName()))
 557                       m_processedFirstElement=true;
 558                   }
 559                 // Terminate list of attrs, and make sure they aren't
 560                 // considered children of the element
 561               } // if attrs exist
 562             if(!m_processedFirstElement)
 563             {
 564               // The DOM might not have an explicit declaration for the
 565               // implicit "xml:" prefix, but the XPath data model
 566               // requires that this appear as a Namespace Node so we
 567               // have to synthesize one. You can think of this as
 568               // being a default attribute defined by the XML
 569               // Namespaces spec rather than by the DTD.
 570               attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
 571                                                                                                                                         (Element)next,"xml",NAMESPACE_DECL_NS,
 572                                                                                                                                         makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
 573                                                                                                                                         ),
 574                                 nextindex,attrIndex,NULL);
 575               m_firstch.setElementAt(DTM.NULL,attrIndex);
 576               m_processedFirstElement=true;
 577             }
 578             if(attrIndex!=NULL)
 579               m_nextsib.setElementAt(DTM.NULL,attrIndex);
 580           } //if(ELEMENT_NODE)
 581       } // (if !suppressNode)
 582 
 583     // Text postprocessing: Act on values stored above
 584     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
 585       {
 586         // %TBD% If nexttype was forced to TEXT, patch the DTM node
 587 
 588         next=lastTextNode;      // Advance the DOM cursor over contiguous text
 589       }
 590 
 591     // Remember where we left off.
 592     m_pos=next;
 593     return true;
 594   }
 595 
 596 
 597   /**
 598    * Return an DOM node for the given node.
 599    *
 600    * @param nodeHandle The node ID.
 601    *
 602    * @return A node representation of the DTM node.
 603    */
 604   public Node getNode(int nodeHandle)
 605   {
 606 
 607     int identity = makeNodeIdentity(nodeHandle);
 608 
 609     return m_nodes.get(identity);
 610   }
 611 
 612   /**
 613    * Get a Node from an identity index.
 614    *
 615    * NEEDSDOC @param nodeIdentity
 616    *
 617    * NEEDSDOC ($objectName$) @return
 618    */
 619   protected Node lookupNode(int nodeIdentity)
 620   {
 621     return m_nodes.get(nodeIdentity);
 622   }
 623 
 624   /**
 625    * Get the next node identity value in the list, and call the iterator
 626    * if it hasn't been added yet.
 627    *
 628    * @param identity The node identity (index).
 629    * @return identity+1, or DTM.NULL.
 630    */
 631   protected int getNextNodeIdentity(int identity)
 632   {
 633 
 634     identity += 1;
 635 
 636     if (identity >= m_nodes.size())
 637     {
 638       if (!nextNode())
 639         identity = DTM.NULL;
 640     }
 641 
 642     return identity;
 643   }
 644 
 645   /**
 646    * Get the handle from a Node.
 647    * <p>%OPT% This will be pretty slow.</p>
 648    *
 649    * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
 650    * walk down DTM reconstructing path) might be considerably faster
 651    * on later nodes in large documents. That might also imply improving
 652    * this call to handle nodes which would be in this DTM but
 653    * have not yet been built, which might or might not be a Good Thing.</p>
 654    *
 655    * %REVIEW% This relies on being able to test node-identity via
 656    * object-identity. DTM2DOM proxying is a great example of a case where
 657    * that doesn't work. DOM Level 3 will provide the isSameNode() method
 658    * to fix that, but until then this is going to be flaky.
 659    *
 660    * @param node A node, which may be null.
 661    *
 662    * @return The node handle or <code>DTM.NULL</code>.
 663    */
 664   private int getHandleFromNode(Node node)
 665   {
 666     if (null != node)
 667     {
 668       int len = m_nodes.size();
 669       boolean isMore;
 670       int i = 0;
 671       do
 672       {
 673         for (; i < len; i++)
 674         {
 675           if (m_nodes.get(i) == node)
 676             return makeNodeHandle(i);
 677         }
 678 
 679         isMore = nextNode();
 680 
 681         len = m_nodes.size();
 682 
 683       }
 684       while(isMore || i < len);
 685     }
 686 
 687     return DTM.NULL;
 688   }
 689 
 690   /** Get the handle from a Node. This is a more robust version of
 691    * getHandleFromNode, intended to be usable by the public.
 692    *
 693    * <p>%OPT% This will be pretty slow.</p>
 694    *
 695    * %REVIEW% This relies on being able to test node-identity via
 696    * object-identity. DTM2DOM proxying is a great example of a case where
 697    * that doesn't work. DOM Level 3 will provide the isSameNode() method
 698    * to fix that, but until then this is going to be flaky.
 699    *
 700    * @param node A node, which may be null.
 701    *
 702    * @return The node handle or <code>DTM.NULL</code>.  */
 703   public int getHandleOfNode(Node node)
 704   {
 705     if (null != node)
 706     {
 707       // Is Node actually within the same document? If not, don't search!
 708       // This would be easier if m_root was always the Document node, but
 709       // we decided to allow wrapping a DTM around a subtree.
 710       if((m_root==node) ||
 711          (m_root.getNodeType()==DOCUMENT_NODE &&
 712           m_root==node.getOwnerDocument()) ||
 713          (m_root.getNodeType()!=DOCUMENT_NODE &&
 714           m_root.getOwnerDocument()==node.getOwnerDocument())
 715          )
 716         {
 717           // If node _is_ in m_root's tree, find its handle
 718           //
 719           // %OPT% This check may be improved significantly when DOM
 720           // Level 3 nodeKey and relative-order tests become
 721           // available!
 722           for(Node cursor=node;
 723               cursor!=null;
 724               cursor=
 725                 (cursor.getNodeType()!=ATTRIBUTE_NODE)
 726                 ? cursor.getParentNode()
 727                 : ((org.w3c.dom.Attr)cursor).getOwnerElement())
 728             {
 729               if(cursor==m_root)
 730                 // We know this node; find its handle.
 731                 return getHandleFromNode(node);
 732             } // for ancestors of node
 733         } // if node and m_root in same Document
 734     } // if node!=null
 735 
 736     return DTM.NULL;
 737   }
 738 
 739   /**
 740    * Retrieves an attribute node by by qualified name and namespace URI.
 741    *
 742    * @param nodeHandle int Handle of the node upon which to look up this attribute..
 743    * @param namespaceURI The namespace URI of the attribute to
 744    *   retrieve, or null.
 745    * @param name The local name of the attribute to
 746    *   retrieve.
 747    * @return The attribute node handle with the specified name (
 748    *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
 749    *   attribute.
 750    */
 751   public int getAttributeNode(int nodeHandle, String namespaceURI,
 752                               String name)
 753   {
 754 
 755     // %OPT% This is probably slower than it needs to be.
 756     if (null == namespaceURI)
 757       namespaceURI = "";
 758 
 759     int type = getNodeType(nodeHandle);
 760 
 761     if (DTM.ELEMENT_NODE == type)
 762     {
 763 
 764       // Assume that attributes immediately follow the element.
 765       int identity = makeNodeIdentity(nodeHandle);
 766 
 767       while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
 768       {
 769         // Assume this can not be null.
 770         type = _type(identity);
 771 
 772                                 // %REVIEW%
 773                                 // Should namespace nodes be retrievable DOM-style as attrs?
 774                                 // If not we need a separate function... which may be desirable
 775                                 // architecturally, but which is ugly from a code point of view.
 776                                 // (If we REALLY insist on it, this code should become a subroutine
 777                                 // of both -- retrieve the node, then test if the type matches
 778                                 // what you're looking for.)
 779         if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
 780         {
 781           Node node = lookupNode(identity);
 782           String nodeuri = node.getNamespaceURI();
 783 
 784           if (null == nodeuri)
 785             nodeuri = "";
 786 
 787           String nodelocalname = node.getLocalName();
 788 
 789           if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
 790             return makeNodeHandle(identity);
 791         }
 792 
 793         else // if (DTM.NAMESPACE_NODE != type)
 794         {
 795           break;
 796         }
 797       }
 798     }
 799 
 800     return DTM.NULL;
 801   }
 802 
 803   /**
 804    * Get the string-value of a node as a String object
 805    * (see http://www.w3.org/TR/xpath#data-model
 806    * for the definition of a node's string-value).
 807    *
 808    * @param nodeHandle The node ID.
 809    *
 810    * @return A string object that represents the string-value of the given node.
 811    */
 812   public XMLString getStringValue(int nodeHandle)
 813   {
 814 
 815     int type = getNodeType(nodeHandle);
 816     Node node = getNode(nodeHandle);
 817     // %TBD% If an element only has one text node, we should just use it
 818     // directly.
 819     if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
 820     || DTM.DOCUMENT_FRAGMENT_NODE == type)
 821     {
 822       FastStringBuffer buf = StringBufferPool.get();
 823       String s;
 824 
 825       try
 826       {
 827         getNodeData(node, buf);
 828 
 829         s = (buf.length() > 0) ? buf.toString() : "";
 830       }
 831       finally
 832       {
 833         StringBufferPool.free(buf);
 834       }
 835 
 836       return m_xstrf.newstr( s );
 837     }
 838     else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
 839     {
 840       // If this is a DTM text node, it may be made of multiple DOM text
 841       // nodes -- including navigating into Entity References. DOM2DTM
 842       // records the first node in the sequence and requires that we
 843       // pick up the others when we retrieve the DTM node's value.
 844       //
 845       // %REVIEW% DOM Level 3 is expected to add a "whole text"
 846       // retrieval method which performs this function for us.
 847       FastStringBuffer buf = StringBufferPool.get();
 848       while(node!=null)
 849       {
 850         buf.append(node.getNodeValue());
 851         node=logicalNextDOMTextNode(node);
 852       }
 853       String s=(buf.length() > 0) ? buf.toString() : "";
 854       StringBufferPool.free(buf);
 855       return m_xstrf.newstr( s );
 856     }
 857     else
 858       return m_xstrf.newstr( node.getNodeValue() );
 859   }
 860 
 861   /**
 862    * Determine if the string-value of a node is whitespace
 863    *
 864    * @param nodeHandle The node Handle.
 865    *
 866    * @return Return true if the given node is whitespace.
 867    */
 868   public boolean isWhitespace(int nodeHandle)
 869   {
 870         int type = getNodeType(nodeHandle);
 871     Node node = getNode(nodeHandle);
 872         if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
 873     {
 874       // If this is a DTM text node, it may be made of multiple DOM text
 875       // nodes -- including navigating into Entity References. DOM2DTM
 876       // records the first node in the sequence and requires that we
 877       // pick up the others when we retrieve the DTM node's value.
 878       //
 879       // %REVIEW% DOM Level 3 is expected to add a "whole text"
 880       // retrieval method which performs this function for us.
 881       FastStringBuffer buf = StringBufferPool.get();
 882       while(node!=null)
 883       {
 884         buf.append(node.getNodeValue());
 885         node=logicalNextDOMTextNode(node);
 886       }
 887      boolean b = buf.isWhitespace(0, buf.length());
 888       StringBufferPool.free(buf);
 889      return b;
 890     }
 891     return false;
 892   }
 893 
 894   /**
 895    * Retrieve the text content of a DOM subtree, appending it into a
 896    * user-supplied FastStringBuffer object. Note that attributes are
 897    * not considered part of the content of an element.
 898    * <p>
 899    * There are open questions regarding whitespace stripping.
 900    * Currently we make no special effort in that regard, since the standard
 901    * DOM doesn't yet provide DTD-based information to distinguish
 902    * whitespace-in-element-context from genuine #PCDATA. Note that we
 903    * should probably also consider xml:space if/when we address this.
 904    * DOM Level 3 may solve the problem for us.
 905    * <p>
 906    * %REVIEW% Actually, since this method operates on the DOM side of the
 907    * fence rather than the DTM side, it SHOULDN'T do
 908    * any special handling. The DOM does what the DOM does; if you want
 909    * DTM-level abstractions, use DTM-level methods.
 910    *
 911    * @param node Node whose subtree is to be walked, gathering the
 912    * contents of all Text or CDATASection nodes.
 913    * @param buf FastStringBuffer into which the contents of the text
 914    * nodes are to be concatenated.
 915    */
 916   protected static void getNodeData(Node node, FastStringBuffer buf)
 917   {
 918 
 919     switch (node.getNodeType())
 920     {
 921     case Node.DOCUMENT_FRAGMENT_NODE :
 922     case Node.DOCUMENT_NODE :
 923     case Node.ELEMENT_NODE :
 924     {
 925       for (Node child = node.getFirstChild(); null != child;
 926               child = child.getNextSibling())
 927       {
 928         getNodeData(child, buf);
 929       }
 930     }
 931     break;
 932     case Node.TEXT_NODE :
 933     case Node.CDATA_SECTION_NODE :
 934     case Node.ATTRIBUTE_NODE :  // Never a child but might be our starting node
 935       buf.append(node.getNodeValue());
 936       break;
 937     case Node.PROCESSING_INSTRUCTION_NODE :
 938       // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
 939       break;
 940     default :
 941       // ignore
 942       break;
 943     }
 944   }
 945 
 946   /**
 947    * Given a node handle, return its DOM-style node name. This will
 948    * include names such as #text or #document.
 949    *
 950    * @param nodeHandle the id of the node.
 951    * @return String Name of this node, which may be an empty string.
 952    * %REVIEW% Document when empty string is possible...
 953    * %REVIEW-COMMENT% It should never be empty, should it?
 954    */
 955   public String getNodeName(int nodeHandle)
 956   {
 957 
 958     Node node = getNode(nodeHandle);
 959 
 960     // Assume non-null.
 961     return node.getNodeName();
 962   }
 963 
 964   /**
 965    * Given a node handle, return the XPath node name.  This should be
 966    * the name as described by the XPath data model, NOT the DOM-style
 967    * name.
 968    *
 969    * @param nodeHandle the id of the node.
 970    * @return String Name of this node, which may be an empty string.
 971    */
 972   public String getNodeNameX(int nodeHandle)
 973   {
 974 
 975     String name;
 976     short type = getNodeType(nodeHandle);
 977 
 978     switch (type)
 979     {
 980     case DTM.NAMESPACE_NODE :
 981     {
 982       Node node = getNode(nodeHandle);
 983 
 984       // assume not null.
 985       name = node.getNodeName();
 986       if(name.startsWith("xmlns:"))
 987       {
 988         name = QName.getLocalPart(name);
 989       }
 990       else if(name.equals("xmlns"))
 991       {
 992         name = "";
 993       }
 994     }
 995     break;
 996     case DTM.ATTRIBUTE_NODE :
 997     case DTM.ELEMENT_NODE :
 998     case DTM.ENTITY_REFERENCE_NODE :
 999     case DTM.PROCESSING_INSTRUCTION_NODE :
1000     {
1001       Node node = getNode(nodeHandle);
1002 
1003       // assume not null.
1004       name = node.getNodeName();
1005     }
1006     break;
1007     default :
1008       name = "";
1009     }
1010 
1011     return name;
1012   }
1013 
1014   /**
1015    * Given a node handle, return its XPath-style localname.
1016    * (As defined in Namespaces, this is the portion of the name after any
1017    * colon character).
1018    *
1019    * @param nodeHandle the id of the node.
1020    * @return String Local name of this node.
1021    */
1022   public String getLocalName(int nodeHandle)
1023   {
1024     if(JJK_NEWCODE)
1025     {
1026       int id=makeNodeIdentity(nodeHandle);
1027       if(NULL==id) return null;
1028       Node newnode=m_nodes.get(id);
1029       String newname=newnode.getLocalName();
1030       if (null == newname)
1031       {
1032         // XSLT treats PIs, and possibly other things, as having QNames.
1033         String qname = newnode.getNodeName();
1034         if('#'==qname.charAt(0))
1035         {
1036           //  Match old default for this function
1037           // This conversion may or may not be necessary
1038           newname="";
1039         }
1040         else
1041         {
1042           int index = qname.indexOf(':');
1043           newname = (index < 0) ? qname : qname.substring(index + 1);
1044         }
1045       }
1046       return newname;
1047     }
1048     else
1049     {
1050       String name;
1051       short type = getNodeType(nodeHandle);
1052       switch (type)
1053       {
1054       case DTM.ATTRIBUTE_NODE :
1055       case DTM.ELEMENT_NODE :
1056       case DTM.ENTITY_REFERENCE_NODE :
1057       case DTM.NAMESPACE_NODE :
1058       case DTM.PROCESSING_INSTRUCTION_NODE :
1059         {
1060           Node node = getNode(nodeHandle);
1061 
1062           // assume not null.
1063           name = node.getLocalName();
1064 
1065           if (null == name)
1066           {
1067             String qname = node.getNodeName();
1068             int index = qname.indexOf(':');
1069 
1070             name = (index < 0) ? qname : qname.substring(index + 1);
1071           }
1072         }
1073         break;
1074       default :
1075         name = "";
1076       }
1077       return name;
1078     }
1079   }
1080 
1081   /**
1082    * Given a namespace handle, return the prefix that the namespace decl is
1083    * mapping.
1084    * Given a node handle, return the prefix used to map to the namespace.
1085    *
1086    * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
1087    * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb  </p>
1088    *
1089    * @param nodeHandle the id of the node.
1090    * @return String prefix of this node's name, or "" if no explicit
1091    * namespace prefix was given.
1092    */
1093   public String getPrefix(int nodeHandle)
1094   {
1095 
1096     String prefix;
1097     short type = getNodeType(nodeHandle);
1098 
1099     switch (type)
1100     {
1101     case DTM.NAMESPACE_NODE :
1102     {
1103       Node node = getNode(nodeHandle);
1104 
1105       // assume not null.
1106       String qname = node.getNodeName();
1107       int index = qname.indexOf(':');
1108 
1109       prefix = (index < 0) ? "" : qname.substring(index + 1);
1110     }
1111     break;
1112     case DTM.ATTRIBUTE_NODE :
1113     case DTM.ELEMENT_NODE :
1114     {
1115       Node node = getNode(nodeHandle);
1116 
1117       // assume not null.
1118       String qname = node.getNodeName();
1119       int index = qname.indexOf(':');
1120 
1121       prefix = (index < 0) ? "" : qname.substring(0, index);
1122     }
1123     break;
1124     default :
1125       prefix = "";
1126     }
1127 
1128     return prefix;
1129   }
1130 
1131   /**
1132    * Given a node handle, return its DOM-style namespace URI
1133    * (As defined in Namespaces, this is the declared URI which this node's
1134    * prefix -- or default in lieu thereof -- was mapped to.)
1135    *
1136    * <p>%REVIEW% Null or ""? -sb</p>
1137    *
1138    * @param nodeHandle the id of the node.
1139    * @return String URI value of this node's namespace, or null if no
1140    * namespace was resolved.
1141    */
1142   public String getNamespaceURI(int nodeHandle)
1143   {
1144     if(JJK_NEWCODE)
1145     {
1146       int id=makeNodeIdentity(nodeHandle);
1147       if(id==NULL) return null;
1148       Node node=m_nodes.get(id);
1149       return node.getNamespaceURI();
1150     }
1151     else
1152     {
1153       String nsuri;
1154       short type = getNodeType(nodeHandle);
1155 
1156       switch (type)
1157       {
1158       case DTM.ATTRIBUTE_NODE :
1159       case DTM.ELEMENT_NODE :
1160       case DTM.ENTITY_REFERENCE_NODE :
1161       case DTM.NAMESPACE_NODE :
1162       case DTM.PROCESSING_INSTRUCTION_NODE :
1163         {
1164           Node node = getNode(nodeHandle);
1165 
1166           // assume not null.
1167           nsuri = node.getNamespaceURI();
1168 
1169           // %TBD% Handle DOM1?
1170         }
1171         break;
1172       default :
1173         nsuri = null;
1174       }
1175 
1176       return nsuri;
1177     }
1178 
1179   }
1180 
1181   /** Utility function: Given a DOM Text node, determine whether it is
1182    * logically followed by another Text or CDATASection node. This may
1183    * involve traversing into Entity References.
1184    *
1185    * %REVIEW% DOM Level 3 is expected to add functionality which may
1186    * allow us to retire this.
1187    */
1188   private Node logicalNextDOMTextNode(Node n)
1189   {
1190         Node p=n.getNextSibling();
1191         if(p==null)
1192         {
1193                 // Walk out of any EntityReferenceNodes that ended with text
1194                 for(n=n.getParentNode();
1195                         n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
1196                         n=n.getParentNode())
1197                 {
1198                         p=n.getNextSibling();
1199                         if(p!=null)
1200                                 break;
1201                 }
1202         }
1203         n=p;
1204         while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
1205         {
1206                 // Walk into any EntityReferenceNodes that start with text
1207                 if(n.hasChildNodes())
1208                         n=n.getFirstChild();
1209                 else
1210                         n=n.getNextSibling();
1211         }
1212         if(n!=null)
1213         {
1214                 // Found a logical next sibling. Is it text?
1215                 int ntype=n.getNodeType();
1216                 if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
1217                         n=null;
1218         }
1219         return n;
1220   }
1221 
1222   /**
1223    * Given a node handle, return its node value. This is mostly
1224    * as defined by the DOM, but may ignore some conveniences.
1225    * <p>
1226    *
1227    * @param nodeHandle The node id.
1228    * @return String Value of this node, or null if not
1229    * meaningful for this node type.
1230    */
1231   public String getNodeValue(int nodeHandle)
1232   {
1233     // The _type(nodeHandle) call was taking the lion's share of our
1234     // time, and was wrong anyway since it wasn't coverting handle to
1235     // identity. Inlined it.
1236     int type = _exptype(makeNodeIdentity(nodeHandle));
1237     type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
1238 
1239     if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
1240       return getNode(nodeHandle).getNodeValue();
1241 
1242     // If this is a DTM text node, it may be made of multiple DOM text
1243     // nodes -- including navigating into Entity References. DOM2DTM
1244     // records the first node in the sequence and requires that we
1245     // pick up the others when we retrieve the DTM node's value.
1246     //
1247     // %REVIEW% DOM Level 3 is expected to add a "whole text"
1248     // retrieval method which performs this function for us.
1249     Node node = getNode(nodeHandle);
1250     Node n=logicalNextDOMTextNode(node);
1251     if(n==null)
1252       return node.getNodeValue();
1253 
1254     FastStringBuffer buf = StringBufferPool.get();
1255         buf.append(node.getNodeValue());
1256     while(n!=null)
1257     {
1258       buf.append(n.getNodeValue());
1259       n=logicalNextDOMTextNode(n);
1260     }
1261     String s = (buf.length() > 0) ? buf.toString() : "";
1262     StringBufferPool.free(buf);
1263     return s;
1264   }
1265 
1266   /**
1267    *   A document type declaration information item has the following properties:
1268    *
1269    *     1. [system identifier] The system identifier of the external subset, if
1270    *        it exists. Otherwise this property has no value.
1271    *
1272    * @return the system identifier String object, or null if there is none.
1273    */
1274   public String getDocumentTypeDeclarationSystemIdentifier()
1275   {
1276 
1277     Document doc;
1278 
1279     if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1280       doc = (Document) m_root;
1281     else
1282       doc = m_root.getOwnerDocument();
1283 
1284     if (null != doc)
1285     {
1286       DocumentType dtd = doc.getDoctype();
1287 
1288       if (null != dtd)
1289       {
1290         return dtd.getSystemId();
1291       }
1292     }
1293 
1294     return null;
1295   }
1296 
1297   /**
1298    * Return the public identifier of the external subset,
1299    * normalized as described in 4.2.2 External Entities [XML]. If there is
1300    * no external subset or if it has no public identifier, this property
1301    * has no value.
1302    *
1303    * @return the public identifier String object, or null if there is none.
1304    */
1305   public String getDocumentTypeDeclarationPublicIdentifier()
1306   {
1307 
1308     Document doc;
1309 
1310     if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1311       doc = (Document) m_root;
1312     else
1313       doc = m_root.getOwnerDocument();
1314 
1315     if (null != doc)
1316     {
1317       DocumentType dtd = doc.getDoctype();
1318 
1319       if (null != dtd)
1320       {
1321         return dtd.getPublicId();
1322       }
1323     }
1324 
1325     return null;
1326   }
1327 
1328   /**
1329    * Returns the <code>Element</code> whose <code>ID</code> is given by
1330    * <code>elementId</code>. If no such element exists, returns
1331    * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1332    * has this <code>ID</code>. Attributes (including those
1333    * with the name "ID") are not of type ID unless so defined by DTD/Schema
1334    * information available to the DTM implementation.
1335    * Implementations that do not know whether attributes are of type ID or
1336    * not are expected to return <code>DTM.NULL</code>.
1337    *
1338    * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1339    * and this operation searches only within a single document, right?
1340    * Wouldn't want collisions between DTMs in the same process.</p>
1341    *
1342    * @param elementId The unique <code>id</code> value for an element.
1343    * @return The handle of the matching element.
1344    */
1345   public int getElementById(String elementId)
1346   {
1347 
1348     Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1349         ? (Document) m_root : m_root.getOwnerDocument();
1350 
1351     if(null != doc)
1352     {
1353       Node elem = doc.getElementById(elementId);
1354       if(null != elem)
1355       {
1356         int elemHandle = getHandleFromNode(elem);
1357 
1358         if(DTM.NULL == elemHandle)
1359         {
1360           int identity = m_nodes.size()-1;
1361           while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
1362           {
1363             Node node = getNode(identity);
1364             if(node == elem)
1365             {
1366               elemHandle = getHandleFromNode(elem);
1367               break;
1368             }
1369            }
1370         }
1371 
1372         return elemHandle;
1373       }
1374 
1375     }
1376     return DTM.NULL;
1377   }
1378 
1379   /**
1380    * The getUnparsedEntityURI function returns the URI of the unparsed
1381    * entity with the specified name in the same document as the context
1382    * node (see [3.3 Unparsed Entities]). It returns the empty string if
1383    * there is no such entity.
1384    * <p>
1385    * XML processors may choose to use the System Identifier (if one
1386    * is provided) to resolve the entity, rather than the URI in the
1387    * Public Identifier. The details are dependent on the processor, and
1388    * we would have to support some form of plug-in resolver to handle
1389    * this properly. Currently, we simply return the System Identifier if
1390    * present, and hope that it a usable URI or that our caller can
1391    * map it to one.
1392    * TODO: Resolve Public Identifiers... or consider changing function name.
1393    * <p>
1394    * If we find a relative URI
1395    * reference, XML expects it to be resolved in terms of the base URI
1396    * of the document. The DOM doesn't do that for us, and it isn't
1397    * entirely clear whether that should be done here; currently that's
1398    * pushed up to a higher level of our application. (Note that DOM Level
1399    * 1 didn't store the document's base URI.)
1400    * TODO: Consider resolving Relative URIs.
1401    * <p>
1402    * (The DOM's statement that "An XML processor may choose to
1403    * completely expand entities before the structure model is passed
1404    * to the DOM" refers only to parsed entities, not unparsed, and hence
1405    * doesn't affect this function.)
1406    *
1407    * @param name A string containing the Entity Name of the unparsed
1408    * entity.
1409    *
1410    * @return String containing the URI of the Unparsed Entity, or an
1411    * empty string if no such entity exists.
1412    */
1413   public String getUnparsedEntityURI(String name)
1414   {
1415 
1416     String url = "";
1417     Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1418         ? (Document) m_root : m_root.getOwnerDocument();
1419 
1420     if (null != doc)
1421     {
1422       DocumentType doctype = doc.getDoctype();
1423 
1424       if (null != doctype)
1425       {
1426         NamedNodeMap entities = doctype.getEntities();
1427         if(null == entities)
1428           return url;
1429         Entity entity = (Entity) entities.getNamedItem(name);
1430         if(null == entity)
1431           return url;
1432 
1433         String notationName = entity.getNotationName();
1434 
1435         if (null != notationName)  // then it's unparsed
1436         {
1437           // The draft says: "The XSLT processor may use the public
1438           // identifier to generate a URI for the entity instead of the URI
1439           // specified in the system identifier. If the XSLT processor does
1440           // not use the public identifier to generate the URI, it must use
1441           // the system identifier; if the system identifier is a relative
1442           // URI, it must be resolved into an absolute URI using the URI of
1443           // the resource containing the entity declaration as the base
1444           // URI [RFC2396]."
1445           // So I'm falling a bit short here.
1446           url = entity.getSystemId();
1447 
1448           if (null == url)
1449           {
1450             url = entity.getPublicId();
1451           }
1452           else
1453           {
1454             // This should be resolved to an absolute URL, but that's hard
1455             // to do from here.
1456           }
1457         }
1458       }
1459     }
1460 
1461     return url;
1462   }
1463 
1464   /**
1465    *     5. [specified] A flag indicating whether this attribute was actually
1466    *        specified in the start-tag of its element, or was defaulted from the
1467    *        DTD.
1468    *
1469    * @param attributeHandle the attribute handle
1470    * @return <code>true</code> if the attribute was specified;
1471    *         <code>false</code> if it was defaulted.
1472    */
1473   public boolean isAttributeSpecified(int attributeHandle)
1474   {
1475     int type = getNodeType(attributeHandle);
1476 
1477     if (DTM.ATTRIBUTE_NODE == type)
1478     {
1479       Attr attr = (Attr)getNode(attributeHandle);
1480       return attr.getSpecified();
1481     }
1482     return false;
1483   }
1484 
1485   /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1486    * we're wrapped around an existing DOM.
1487    *
1488    * @param source The IncrementalSAXSource that we want to recieve events from
1489    * on demand.
1490    */
1491   public void setIncrementalSAXSource(IncrementalSAXSource source)
1492   {
1493   }
1494 
1495   /** getContentHandler returns "our SAX builder" -- the thing that
1496    * someone else should send SAX events to in order to extend this
1497    * DTM model.
1498    *
1499    * @return null if this model doesn't respond to SAX events,
1500    * "this" if the DTM object has a built-in SAX ContentHandler,
1501    * the IncrmentalSAXSource if we're bound to one and should receive
1502    * the SAX stream via it for incremental build purposes...
1503    * */
1504   public org.xml.sax.ContentHandler getContentHandler()
1505   {
1506       return null;
1507   }
1508 
1509   /**
1510    * Return this DTM's lexical handler.
1511    *
1512    * %REVIEW% Should this return null if constrution already done/begun?
1513    *
1514    * @return null if this model doesn't respond to lexical SAX events,
1515    * "this" if the DTM object has a built-in SAX ContentHandler,
1516    * the IncrementalSAXSource if we're bound to one and should receive
1517    * the SAX stream via it for incremental build purposes...
1518    */
1519   public org.xml.sax.ext.LexicalHandler getLexicalHandler()
1520   {
1521 
1522     return null;
1523   }
1524 
1525 
1526   /**
1527    * Return this DTM's EntityResolver.
1528    *
1529    * @return null if this model doesn't respond to SAX entity ref events.
1530    */
1531   public org.xml.sax.EntityResolver getEntityResolver()
1532   {
1533 
1534     return null;
1535   }
1536 
1537   /**
1538    * Return this DTM's DTDHandler.
1539    *
1540    * @return null if this model doesn't respond to SAX dtd events.
1541    */
1542   public org.xml.sax.DTDHandler getDTDHandler()
1543   {
1544 
1545     return null;
1546   }
1547 
1548   /**
1549    * Return this DTM's ErrorHandler.
1550    *
1551    * @return null if this model doesn't respond to SAX error events.
1552    */
1553   public org.xml.sax.ErrorHandler getErrorHandler()
1554   {
1555 
1556     return null;
1557   }
1558 
1559   /**
1560    * Return this DTM's DeclHandler.
1561    *
1562    * @return null if this model doesn't respond to SAX Decl events.
1563    */
1564   public org.xml.sax.ext.DeclHandler getDeclHandler()
1565   {
1566 
1567     return null;
1568   }
1569 
1570   /** @return true iff we're building this model incrementally (eg
1571    * we're partnered with a IncrementalSAXSource) and thus require that the
1572    * transformation and the parse run simultaneously. Guidance to the
1573    * DTMManager.
1574    * */
1575   public boolean needsTwoThreads()
1576   {
1577     return false;
1578   }
1579 
1580   // ========== Direct SAX Dispatch, for optimization purposes ========
1581 
1582   /**
1583    * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1584    * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1585    * the definition of <CODE>S</CODE></A> for details.
1586    * @param   ch      Character to check as XML whitespace.
1587    * @return          =true if <var>ch</var> is XML whitespace; otherwise =false.
1588    */
1589   private static boolean isSpace(char ch)
1590   {
1591     return XMLCharacterRecognizer.isWhiteSpace(ch);  // Take the easy way out for now.
1592   }
1593 
1594   /**
1595    * Directly call the
1596    * characters method on the passed ContentHandler for the
1597    * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1598    * for the definition of a node's string-value). Multiple calls to the
1599    * ContentHandler's characters methods may well occur for a single call to
1600    * this method.
1601    *
1602    * @param nodeHandle The node ID.
1603    * @param ch A non-null reference to a ContentHandler.
1604    *
1605    * @throws org.xml.sax.SAXException
1606    */
1607   public void dispatchCharactersEvents(
1608           int nodeHandle, org.xml.sax.ContentHandler ch,
1609           boolean normalize)
1610             throws org.xml.sax.SAXException
1611   {
1612     if(normalize)
1613     {
1614       XMLString str = getStringValue(nodeHandle);
1615       str = str.fixWhiteSpace(true, true, false);
1616       str.dispatchCharactersEvents(ch);
1617     }
1618     else
1619     {
1620       int type = getNodeType(nodeHandle);
1621       Node node = getNode(nodeHandle);
1622       dispatchNodeData(node, ch, 0);
1623           // Text coalition -- a DTM text node may represent multiple
1624           // DOM nodes.
1625           if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
1626           {
1627                   while( null != (node=logicalNextDOMTextNode(node)) )
1628                   {
1629                       dispatchNodeData(node, ch, 0);
1630                   }
1631           }
1632     }
1633   }
1634 
1635   /**
1636    * Retrieve the text content of a DOM subtree, appending it into a
1637    * user-supplied FastStringBuffer object. Note that attributes are
1638    * not considered part of the content of an element.
1639    * <p>
1640    * There are open questions regarding whitespace stripping.
1641    * Currently we make no special effort in that regard, since the standard
1642    * DOM doesn't yet provide DTD-based information to distinguish
1643    * whitespace-in-element-context from genuine #PCDATA. Note that we
1644    * should probably also consider xml:space if/when we address this.
1645    * DOM Level 3 may solve the problem for us.
1646    * <p>
1647    * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1648    * routine _shouldn't_ perform any processing beyond what the DOM already
1649    * does, and that whitespace stripping and so on belong at the DTM level.
1650    * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1651    *
1652    * @param node Node whose subtree is to be walked, gathering the
1653    * contents of all Text or CDATASection nodes.
1654    */
1655   @SuppressWarnings("fallthrough")
1656   protected static void dispatchNodeData(Node node,
1657                                          org.xml.sax.ContentHandler ch,
1658                                          int depth)
1659             throws org.xml.sax.SAXException
1660   {
1661 
1662     switch (node.getNodeType())
1663     {
1664     case Node.DOCUMENT_FRAGMENT_NODE :
1665     case Node.DOCUMENT_NODE :
1666     case Node.ELEMENT_NODE :
1667     {
1668       for (Node child = node.getFirstChild(); null != child;
1669               child = child.getNextSibling())
1670       {
1671         dispatchNodeData(child, ch, depth+1);
1672       }
1673     }
1674     break;
1675     case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
1676     case Node.COMMENT_NODE :
1677       if(0 != depth)
1678         break;
1679         // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1680         // to perform Text Coalition. That should only be done in DTM space.
1681     case Node.TEXT_NODE :
1682     case Node.CDATA_SECTION_NODE :
1683     case Node.ATTRIBUTE_NODE :
1684       String str = node.getNodeValue();
1685       if(ch instanceof CharacterNodeHandler)
1686       {
1687         ((CharacterNodeHandler)ch).characters(node);
1688       }
1689       else
1690       {
1691         ch.characters(str.toCharArray(), 0, str.length());
1692       }
1693       break;
1694 //    /* case Node.PROCESSING_INSTRUCTION_NODE :
1695 //      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1696 //      break; */
1697     default :
1698       // ignore
1699       break;
1700     }
1701   }
1702 
1703   TreeWalker m_walker = new TreeWalker(null);
1704 
1705   /**
1706    * Directly create SAX parser events from a subtree.
1707    *
1708    * @param nodeHandle The node ID.
1709    * @param ch A non-null reference to a ContentHandler.
1710    *
1711    * @throws org.xml.sax.SAXException
1712    */
1713   public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
1714           throws org.xml.sax.SAXException
1715   {
1716     TreeWalker treeWalker = m_walker;
1717     ContentHandler prevCH = treeWalker.getContentHandler();
1718 
1719     if(null != prevCH)
1720     {
1721       treeWalker = new TreeWalker(null);
1722     }
1723     treeWalker.setContentHandler(ch);
1724 
1725     try
1726     {
1727       Node node = getNode(nodeHandle);
1728       treeWalker.traverseFragment(node);
1729     }
1730     finally
1731     {
1732       treeWalker.setContentHandler(null);
1733     }
1734   }
1735 
1736   public interface CharacterNodeHandler
1737   {
1738     public void characters(Node node)
1739             throws org.xml.sax.SAXException;
1740   }
1741 
1742   /**
1743    * For the moment all the run time properties are ignored by this
1744    * class.
1745    *
1746    * @param property a <code>String</code> value
1747    * @param value an <code>Object</code> value
1748    */
1749   public void setProperty(String property, Object value)
1750   {
1751   }
1752 
1753   /**
1754    * No source information is available for DOM2DTM, so return
1755    * <code>null</code> here.
1756    *
1757    * @param node an <code>int</code> value
1758    * @return null
1759    */
1760   public SourceLocator getSourceLocatorFor(int node)
1761   {
1762     return null;
1763   }
1764 
1765 }