1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl.dtd.models; 22 23 import com.sun.org.apache.xerces.internal.impl.dtd.XMLContentSpec; 24 import com.sun.org.apache.xerces.internal.xni.QName; 25 import java.util.HashMap; 26 import java.util.Map; 27 28 /** 29 30 * DFAContentModel is the derivative of ContentModel that does 31 * all of the non-trivial element content validation. This class does 32 * the conversion from the regular expression to the DFA that 33 * it then uses in its validation algorithm. 34 * <p> 35 * <b>Note:</b> Upstream work insures that this class will never see 36 * a content model with PCDATA in it. Any model with PCDATA is 'mixed' 37 * and is handled via the MixedContentModel class since mixed models 38 * are very constrained in form and easily handled via a special case. 39 * This also makes implementation of this class much easier. 40 * 41 * @xerces.internal 42 * 43 * @LastModified: Oct 2017 44 */ 45 public class DFAContentModel 46 implements ContentModelValidator { 47 48 // 49 // Constants 50 // 51 // special strings 52 53 /** Epsilon string. */ 54 private static String fEpsilonString = "<<CMNODE_EPSILON>>"; 55 56 /** End-of-content string. */ 57 private static String fEOCString = "<<CMNODE_EOC>>"; 58 59 /** initializing static members **/ 60 static { 61 fEpsilonString = fEpsilonString.intern(); 62 fEOCString = fEOCString.intern(); 63 } 64 65 // debugging 66 67 /** Set to true to debug content model validation. */ 68 private static final boolean DEBUG_VALIDATE_CONTENT = false; 69 70 // 71 // Data 72 // 73 74 /* this is the EquivClassComparator object */ 75 //private EquivClassComparator comparator = null; 76 77 /** 78 * This is the map of unique input symbol elements to indices into 79 * each state's per-input symbol transition table entry. This is part 80 * of the built DFA information that must be kept around to do the 81 * actual validation. 82 */ 83 private QName fElemMap[] = null; 84 85 /** 86 * This is a map of whether the element map contains information 87 * related to ANY models. 88 */ 89 private int fElemMapType[] = null; 90 91 /** The element map size. */ 92 private int fElemMapSize = 0; 93 94 /** Boolean to distinguish Schema Mixed Content */ 95 private boolean fMixed; 96 97 /** 98 * The NFA position of the special EOC (end of content) node. This 99 * is saved away since it's used during the DFA build. 100 */ 101 private int fEOCPos = 0; 102 103 104 /** 105 * This is an array of booleans, one per state (there are 106 * fTransTableSize states in the DFA) that indicates whether that 107 * state is a final state. 108 */ 109 private boolean fFinalStateFlags[] = null; 110 111 /** 112 * The list of follow positions for each NFA position (i.e. for each 113 * non-epsilon leaf node.) This is only used during the building of 114 * the DFA, and is let go afterwards. 115 */ 116 private CMStateSet fFollowList[] = null; 117 118 /** 119 * This is the head node of our intermediate representation. It is 120 * only non-null during the building of the DFA (just so that it 121 * does not have to be passed all around.) Once the DFA is built, 122 * this is no longer required so its nulled out. 123 */ 124 private CMNode fHeadNode = null; 125 126 /** 127 * The count of leaf nodes. This is an important number that set some 128 * limits on the sizes of data structures in the DFA process. 129 */ 130 private int fLeafCount = 0; 131 132 /** 133 * An array of non-epsilon leaf nodes, which is used during the DFA 134 * build operation, then dropped. 135 */ 136 private CMLeaf fLeafList[] = null; 137 138 /** Array mapping ANY types to the leaf list. */ 139 private int fLeafListType[] = null; 140 141 //private ContentLeafNameTypeVector fLeafNameTypeVector = null; 142 143 /** 144 * The string pool of our parser session. This is set during construction 145 * and kept around. 146 */ 147 //private StringPool fStringPool = null; 148 149 /** 150 * This is the transition table that is the main by product of all 151 * of the effort here. It is an array of arrays of ints. The first 152 * dimension is the number of states we end up with in the DFA. The 153 * second dimensions is the number of unique elements in the content 154 * model (fElemMapSize). Each entry in the second dimension indicates 155 * the new state given that input for the first dimension's start 156 * state. 157 * <p> 158 * The fElemMap array handles mapping from element indexes to 159 * positions in the second dimension of the transition table. 160 */ 161 private int fTransTable[][] = null; 162 163 /** 164 * The number of valid entries in the transition table, and in the other 165 * related tables such as fFinalStateFlags. 166 */ 167 private int fTransTableSize = 0; 168 169 /** 170 * Flag that indicates that even though we have a "complicated" 171 * content model, it is valid to have no content. In other words, 172 * all parts of the content model are optional. For example: 173 * <pre> 174 * <!ELEMENT AllOptional (Optional*,NotRequired?)> 175 * </pre> 176 */ 177 private boolean fEmptyContentIsValid = false; 178 179 // temp variables 180 181 /** Temporary qualified name. */ 182 private final QName fQName = new QName(); 183 184 // 185 // Constructors 186 // 187 188 189 // 190 // Constructors 191 // 192 193 /** 194 * Constructs a DFA content model. 195 * 196 * @param syntaxTree The syntax tree of the content model. 197 * @param leafCount The number of leaves. 198 * @param mixed 199 * 200 */ 201 public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) { 202 // Store away our index and pools in members 203 //fStringPool = stringPool; 204 fLeafCount = leafCount; 205 206 207 // this is for Schema Mixed Content 208 fMixed = mixed; 209 210 // 211 // Ok, so lets grind through the building of the DFA. This method 212 // handles the high level logic of the algorithm, but it uses a 213 // number of helper classes to do its thing. 214 // 215 // In order to avoid having hundreds of references to the error and 216 // string handlers around, this guy and all of his helper classes 217 // just throw a simple exception and we then pass it along. 218 // 219 buildDFA(syntaxTree); 220 } 221 222 // 223 // ContentModelValidator methods 224 // 225 226 /** 227 * Check that the specified content is valid according to this 228 * content model. This method can also be called to do 'what if' 229 * testing of content models just to see if they would be valid. 230 * <p> 231 * A value of -1 in the children array indicates a PCDATA node. All other 232 * indexes will be positive and represent child elements. The count can be 233 * zero, since some elements have the EMPTY content model and that must be 234 * confirmed. 235 * 236 * @param children The children of this element. Each integer is an index within 237 * the <code>StringPool</code> of the child element name. An index 238 * of -1 is used to indicate an occurrence of non-whitespace character 239 * data. 240 * @param offset Offset into the array where the children starts. 241 * @param length The number of entries in the <code>children</code> array. 242 * 243 * @return The value -1 if fully valid, else the 0 based index of the child 244 * that first failed. If the value returned is equal to the number 245 * of children, then the specified children are valid but additional 246 * content is required to reach a valid ending state. 247 * 248 */ 249 public int validate(QName[] children, int offset, int length) { 250 251 if (DEBUG_VALIDATE_CONTENT) 252 System.out.println("DFAContentModel#validateContent"); 253 254 // 255 // A DFA content model must *always* have at least 1 child 256 // so a failure is given if no children present. 257 // 258 // Defect 782: This is an incorrect statement because a DFA 259 // content model is also used for constructions such as: 260 // 261 // (Optional*,NotRequired?) 262 // 263 // where a perfectly valid content would be NO CHILDREN. 264 // Therefore, if there are no children, we must check to 265 // see if the CMNODE_EOC marker is a valid start state! -Ac 266 // 267 if (length == 0) { 268 if (DEBUG_VALIDATE_CONTENT) { 269 System.out.println("!!! no children"); 270 System.out.println("elemMap="+fElemMap); 271 for (int i = 0; i < fElemMap.length; i++) { 272 String uri = fElemMap[i].uri; 273 String localpart = fElemMap[i].localpart; 274 275 System.out.println("fElemMap["+i+"]="+uri+","+ 276 localpart+" ("+ 277 uri+", "+ 278 localpart+ 279 ')'); 280 281 } 282 System.out.println("EOCIndex="+fEOCString); 283 } 284 285 return fEmptyContentIsValid ? -1 : 0; 286 287 } // if child count == 0 288 289 // 290 // Lets loop through the children in the array and move our way 291 // through the states. Note that we use the fElemMap array to map 292 // an element index to a state index. 293 // 294 int curState = 0; 295 for (int childIndex = 0; childIndex < length; childIndex++) 296 { 297 // Get the current element index out 298 final QName curElem = children[offset + childIndex]; 299 // ignore mixed text 300 if (fMixed && curElem.localpart == null) { 301 continue; 302 } 303 304 // Look up this child in our element map 305 int elemIndex = 0; 306 for (; elemIndex < fElemMapSize; elemIndex++) 307 { 308 int type = fElemMapType[elemIndex] & 0x0f ; 309 if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) { 310 //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]); 311 if (fElemMap[elemIndex].rawname == curElem.rawname) { 312 break; 313 } 314 } 315 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) { 316 String uri = fElemMap[elemIndex].uri; 317 if (uri == null || uri == curElem.uri) { 318 break; 319 } 320 } 321 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) { 322 if (curElem.uri == null) { 323 break; 324 } 325 } 326 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) { 327 if (fElemMap[elemIndex].uri != curElem.uri) { 328 break; 329 } 330 } 331 } 332 333 // If we didn't find it, then obviously not valid 334 if (elemIndex == fElemMapSize) { 335 if (DEBUG_VALIDATE_CONTENT) { 336 System.out.println("!!! didn't find it"); 337 338 System.out.println("curElem : " +curElem ); 339 for (int i=0; i<fElemMapSize; i++) { 340 System.out.println("fElemMap["+i+"] = " +fElemMap[i] ); 341 System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] ); 342 } 343 } 344 345 return childIndex; 346 } 347 348 // 349 // Look up the next state for this input symbol when in the 350 // current state. 351 // 352 curState = fTransTable[curState][elemIndex]; 353 354 // If its not a legal transition, then invalid 355 if (curState == -1) { 356 if (DEBUG_VALIDATE_CONTENT) 357 System.out.println("!!! not a legal transition"); 358 return childIndex; 359 } 360 } 361 362 // 363 // We transitioned all the way through the input list. However, that 364 // does not mean that we ended in a final state. So check whether 365 // our ending state is a final state. 366 // 367 if (DEBUG_VALIDATE_CONTENT) 368 System.out.println("curState="+curState+", childCount="+length); 369 if (!fFinalStateFlags[curState]) 370 return length; 371 372 // success! 373 return -1; 374 } // validate 375 376 377 // 378 // Private methods 379 // 380 381 /** 382 * Builds the internal DFA transition table from the given syntax tree. 383 * 384 * @param syntaxTree The syntax tree. 385 * 386 * @exception CMException Thrown if DFA cannot be built. 387 */ 388 private void buildDFA(CMNode syntaxTree) 389 { 390 // 391 // The first step we need to take is to rewrite the content model 392 // using our CMNode objects, and in the process get rid of any 393 // repetition short cuts, converting them into '*' style repetitions 394 // or getting rid of repetitions altogether. 395 // 396 // The conversions done are: 397 // 398 // x+ -> (x|x*) 399 // x? -> (x|epsilon) 400 // 401 // This is a relatively complex scenario. What is happening is that 402 // we create a top level binary node of which the special EOC value 403 // is set as the right side node. The the left side is set to the 404 // rewritten syntax tree. The source is the original content model 405 // info from the decl pool. The rewrite is done by buildSyntaxTree() 406 // which recurses the decl pool's content of the element and builds 407 // a new tree in the process. 408 // 409 // Note that, during this operation, we set each non-epsilon leaf 410 // node's DFA state position and count the number of such leafs, which 411 // is left in the fLeafCount member. 412 // 413 // The nodeTmp object is passed in just as a temp node to use during 414 // the recursion. Otherwise, we'd have to create a new node on every 415 // level of recursion, which would be piggy in Java (as is everything 416 // for that matter.) 417 // 418 419 /* MODIFIED (Jan, 2001) 420 * 421 * Use following rules. 422 * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x) 423 * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x) 424 * 425 * The same computation of follow as x* is applied to x+ 426 * 427 * The modification drastically reduces computation time of 428 * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+" 429 */ 430 431 fQName.setValues(null, fEOCString, fEOCString, null); 432 CMLeaf nodeEOC = new CMLeaf(fQName); 433 fHeadNode = new CMBinOp 434 ( 435 XMLContentSpec.CONTENTSPECNODE_SEQ 436 , syntaxTree 437 , nodeEOC 438 ); 439 440 // 441 // And handle specially the EOC node, which also must be numbered 442 // and counted as a non-epsilon leaf node. It could not be handled 443 // in the above tree build because it was created before all that 444 // started. We save the EOC position since its used during the DFA 445 // building loop. 446 // 447 fEOCPos = fLeafCount; 448 nodeEOC.setPosition(fLeafCount++); 449 450 // 451 // Ok, so now we have to iterate the new tree and do a little more 452 // work now that we know the leaf count. One thing we need to do is 453 // to calculate the first and last position sets of each node. This 454 // is cached away in each of the nodes. 455 // 456 // Along the way we also set the leaf count in each node as the 457 // maximum state count. They must know this in order to create their 458 // first/last pos sets. 459 // 460 // We also need to build an array of references to the non-epsilon 461 // leaf nodes. Since we iterate it in the same way as before, this 462 // will put them in the array according to their position values. 463 // 464 fLeafList = new CMLeaf[fLeafCount]; 465 fLeafListType = new int[fLeafCount]; 466 postTreeBuildInit(fHeadNode, 0); 467 468 // 469 // And, moving onward... We now need to build the follow position 470 // sets for all the nodes. So we allocate an array of state sets, 471 // one for each leaf node (i.e. each DFA position.) 472 // 473 fFollowList = new CMStateSet[fLeafCount]; 474 for (int index = 0; index < fLeafCount; index++) 475 fFollowList[index] = new CMStateSet(fLeafCount); 476 calcFollowList(fHeadNode); 477 // 478 // And finally the big push... Now we build the DFA using all the 479 // states and the tree we've built up. First we set up the various 480 // data structures we are going to use while we do this. 481 // 482 // First of all we need an array of unique element names in our 483 // content model. For each transition table entry, we need a set of 484 // contiguous indices to represent the transitions for a particular 485 // input element. So we need to a zero based range of indexes that 486 // map to element types. This element map provides that mapping. 487 // 488 fElemMap = new QName[fLeafCount]; 489 fElemMapType = new int[fLeafCount]; 490 fElemMapSize = 0; 491 for (int outIndex = 0; outIndex < fLeafCount; outIndex++) 492 { 493 fElemMap[outIndex] = new QName(); 494 495 /**** 496 if ( (fLeafListType[outIndex] & 0x0f) != 0 ) { 497 if (fLeafNameTypeVector == null) { 498 fLeafNameTypeVector = new ContentLeafNameTypeVector(); 499 } 500 } 501 /****/ 502 503 // Get the current leaf's element index 504 final QName element = fLeafList[outIndex].getElement(); 505 506 // See if the current leaf node's element index is in the list 507 int inIndex = 0; 508 for (; inIndex < fElemMapSize; inIndex++) 509 { 510 if (fElemMap[inIndex].rawname == element.rawname) { 511 break; 512 } 513 } 514 515 // If it was not in the list, then add it, if not the EOC node 516 if (inIndex == fElemMapSize) { 517 fElemMap[fElemMapSize].setValues(element); 518 fElemMapType[fElemMapSize] = fLeafListType[outIndex]; 519 fElemMapSize++; 520 } 521 } 522 // set up the fLeafNameTypeVector object if there is one. 523 /***** 524 if (fLeafNameTypeVector != null) { 525 fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize); 526 } 527 528 /*** 529 * Optimization(Jan, 2001); We sort fLeafList according to 530 * elemIndex which is *uniquely* associated to each leaf. 531 * We are *assuming* that each element appears in at least one leaf. 532 **/ 533 534 int[] fLeafSorter = new int[fLeafCount + fElemMapSize]; 535 int fSortCount = 0; 536 537 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 538 for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { 539 final QName leaf = fLeafList[leafIndex].getElement(); 540 final QName element = fElemMap[elemIndex]; 541 if (leaf.rawname == element.rawname) { 542 fLeafSorter[fSortCount++] = leafIndex; 543 } 544 } 545 fLeafSorter[fSortCount++] = -1; 546 } 547 548 /* Optimization(Jan, 2001) */ 549 550 // 551 // Next lets create some arrays, some that that hold transient 552 // information during the DFA build and some that are permament. 553 // These are kind of sticky since we cannot know how big they will 554 // get, but we don't want to use any Java collections because of 555 // performance. 556 // 557 // Basically they will probably be about fLeafCount*2 on average, 558 // but can be as large as 2^(fLeafCount*2), worst case. So we start 559 // with fLeafCount*4 as a middle ground. This will be very unlikely 560 // to ever have to expand, though it if does, the overhead will be 561 // somewhat ugly. 562 // 563 int curArraySize = fLeafCount * 4; 564 CMStateSet[] statesToDo = new CMStateSet[curArraySize]; 565 fFinalStateFlags = new boolean[curArraySize]; 566 fTransTable = new int[curArraySize][]; 567 568 // 569 // Ok we start with the initial set as the first pos set of the 570 // head node (which is the seq node that holds the content model 571 // and the EOC node.) 572 // 573 CMStateSet setT = fHeadNode.firstPos(); 574 575 // 576 // Init our two state flags. Basically the unmarked state counter 577 // is always chasing the current state counter. When it catches up, 578 // that means we made a pass through that did not add any new states 579 // to the lists, at which time we are done. We could have used a 580 // expanding array of flags which we used to mark off states as we 581 // complete them, but this is easier though less readable maybe. 582 // 583 int unmarkedState = 0; 584 int curState = 0; 585 586 // 587 // Init the first transition table entry, and put the initial state 588 // into the states to do list, then bump the current state. 589 // 590 fTransTable[curState] = makeDefStateList(); 591 statesToDo[curState] = setT; 592 curState++; 593 594 /* Optimization(Jan, 2001); This is faster for 595 * a large content model such as, "(t001+|t002+|.... |t500+)". 596 */ 597 598 Map<CMStateSet, Integer> stateTable = new HashMap<>(); 599 600 /* Optimization(Jan, 2001) */ 601 602 // 603 // Ok, almost done with the algorithm... We now enter the 604 // loop where we go until the states done counter catches up with 605 // the states to do counter. 606 // 607 while (unmarkedState < curState) 608 { 609 // 610 // Get the first unmarked state out of the list of states to do. 611 // And get the associated transition table entry. 612 // 613 setT = statesToDo[unmarkedState]; 614 int[] transEntry = fTransTable[unmarkedState]; 615 616 // Mark this one final if it contains the EOC state 617 fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos); 618 619 // Bump up the unmarked state count, marking this state done 620 unmarkedState++; 621 622 // Loop through each possible input symbol in the element map 623 CMStateSet newSet = null; 624 /* Optimization(Jan, 2001) */ 625 int sorterIndex = 0; 626 /* Optimization(Jan, 2001) */ 627 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) 628 { 629 // 630 // Build up a set of states which is the union of all of 631 // the follow sets of DFA positions that are in the current 632 // state. If we gave away the new set last time through then 633 // create a new one. Otherwise, zero out the existing one. 634 // 635 if (newSet == null) 636 newSet = new CMStateSet(fLeafCount); 637 else 638 newSet.zeroBits(); 639 640 /* Optimization(Jan, 2001) */ 641 int leafIndex = fLeafSorter[sorterIndex++]; 642 643 while (leafIndex != -1) { 644 // If this leaf index (DFA position) is in the current set... 645 if (setT.getBit(leafIndex)) 646 { 647 // 648 // If this leaf is the current input symbol, then we 649 // want to add its follow list to the set of states to 650 // transition to from the current state. 651 // 652 newSet.union(fFollowList[leafIndex]); 653 } 654 655 leafIndex = fLeafSorter[sorterIndex++]; 656 } 657 /* Optimization(Jan, 2001) */ 658 659 // 660 // If this new set is not empty, then see if its in the list 661 // of states to do. If not, then add it. 662 // 663 if (!newSet.isEmpty()) 664 { 665 // 666 // Search the 'states to do' list to see if this new 667 // state set is already in there. 668 // 669 670 /* Optimization(Jan, 2001) */ 671 Integer stateObj = stateTable.get(newSet); 672 int stateIndex = (stateObj == null ? curState : stateObj.intValue()); 673 /* Optimization(Jan, 2001) */ 674 675 // If we did not find it, then add it 676 if (stateIndex == curState) 677 { 678 // 679 // Put this new state into the states to do and init 680 // a new entry at the same index in the transition 681 // table. 682 // 683 statesToDo[curState] = newSet; 684 fTransTable[curState] = makeDefStateList(); 685 686 /* Optimization(Jan, 2001) */ 687 stateTable.put(newSet, curState); 688 /* Optimization(Jan, 2001) */ 689 690 // We now have a new state to do so bump the count 691 curState++; 692 693 // 694 // Null out the new set to indicate we adopted it. 695 // This will cause the creation of a new set on the 696 // next time around the loop. 697 // 698 newSet = null; 699 } 700 701 // 702 // Now set this state in the transition table's entry 703 // for this element (using its index), with the DFA 704 // state we will move to from the current state when we 705 // see this input element. 706 // 707 transEntry[elemIndex] = stateIndex; 708 709 // Expand the arrays if we're full 710 if (curState == curArraySize) 711 { 712 // 713 // Yikes, we overflowed the initial array size, so 714 // we've got to expand all of these arrays. So adjust 715 // up the size by 50% and allocate new arrays. 716 // 717 final int newSize = (int)(curArraySize * 1.5); 718 CMStateSet[] newToDo = new CMStateSet[newSize]; 719 boolean[] newFinalFlags = new boolean[newSize]; 720 int[][] newTransTable = new int[newSize][]; 721 722 // Copy over all of the existing content 723 System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize); 724 System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize); 725 System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize); 726 727 // Store the new array size 728 curArraySize = newSize; 729 statesToDo = newToDo; 730 fFinalStateFlags = newFinalFlags; 731 fTransTable = newTransTable; 732 } 733 } 734 } 735 } 736 737 // Check to see if we can set the fEmptyContentIsValid flag. 738 fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable(); 739 740 // 741 // And now we can say bye bye to the temp representation since we've 742 // built the DFA. 743 // 744 if (DEBUG_VALIDATE_CONTENT) 745 dumpTree(fHeadNode, 0); 746 fHeadNode = null; 747 fLeafList = null; 748 fFollowList = null; 749 750 } 751 752 /** 753 * Calculates the follow list of the current node. 754 * 755 * @param nodeCur The curent node. 756 * 757 * @exception CMException Thrown if follow list cannot be calculated. 758 */ 759 private void calcFollowList(CMNode nodeCur) 760 { 761 // Recurse as required 762 if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE) 763 { 764 // Recurse only 765 calcFollowList(((CMBinOp)nodeCur).getLeft()); 766 calcFollowList(((CMBinOp)nodeCur).getRight()); 767 } 768 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ) 769 { 770 // Recurse first 771 calcFollowList(((CMBinOp)nodeCur).getLeft()); 772 calcFollowList(((CMBinOp)nodeCur).getRight()); 773 774 // 775 // Now handle our level. We use our left child's last pos 776 // set and our right child's first pos set, so go ahead and 777 // get them ahead of time. 778 // 779 final CMStateSet last = ((CMBinOp)nodeCur).getLeft().lastPos(); 780 final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos(); 781 782 // 783 // Now, for every position which is in our left child's last set 784 // add all of the states in our right child's first set to the 785 // follow set for that position. 786 // 787 for (int index = 0; index < fLeafCount; index++) 788 { 789 if (last.getBit(index)) 790 fFollowList[index].union(first); 791 } 792 } 793 /*** 794 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE) 795 { 796 // Recurse first 797 calcFollowList(((CMUniOp)nodeCur).getChild()); 798 799 // 800 // Now handle our level. We use our own first and last position 801 // sets, so get them up front. 802 // 803 final CMStateSet first = nodeCur.firstPos(); 804 final CMStateSet last = nodeCur.lastPos(); 805 806 // 807 // For every position which is in our last position set, add all 808 // of our first position states to the follow set for that 809 // position. 810 // 811 for (int index = 0; index < fLeafCount; index++) 812 { 813 if (last.getBit(index)) 814 fFollowList[index].union(first); 815 } 816 } 817 else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE) 818 || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE)) 819 { 820 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 821 } 822 /***/ 823 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE 824 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE) 825 { 826 // Recurse first 827 calcFollowList(((CMUniOp)nodeCur).getChild()); 828 829 // 830 // Now handle our level. We use our own first and last position 831 // sets, so get them up front. 832 // 833 final CMStateSet first = nodeCur.firstPos(); 834 final CMStateSet last = nodeCur.lastPos(); 835 836 // 837 // For every position which is in our last position set, add all 838 // of our first position states to the follow set for that 839 // position. 840 // 841 for (int index = 0; index < fLeafCount; index++) 842 { 843 if (last.getBit(index)) 844 fFollowList[index].union(first); 845 } 846 } 847 848 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) { 849 // Recurse only 850 calcFollowList(((CMUniOp)nodeCur).getChild()); 851 } 852 /***/ 853 } 854 855 /** 856 * Dumps the tree of the current node to standard output. 857 * 858 * @param nodeCur The current node. 859 * @param level The maximum levels to output. 860 * 861 * @exception CMException Thrown on error. 862 */ 863 private void dumpTree(CMNode nodeCur, int level) 864 { 865 for (int index = 0; index < level; index++) 866 System.out.print(" "); 867 868 int type = nodeCur.type(); 869 if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE) 870 || (type == XMLContentSpec.CONTENTSPECNODE_SEQ)) 871 { 872 if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE) 873 System.out.print("Choice Node "); 874 else 875 System.out.print("Seq Node "); 876 877 if (nodeCur.isNullable()) 878 System.out.print("Nullable "); 879 880 System.out.print("firstPos="); 881 System.out.print(nodeCur.firstPos().toString()); 882 System.out.print(" lastPos="); 883 System.out.println(nodeCur.lastPos().toString()); 884 885 dumpTree(((CMBinOp)nodeCur).getLeft(), level+1); 886 dumpTree(((CMBinOp)nodeCur).getRight(), level+1); 887 } 888 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE) 889 { 890 System.out.print("Rep Node "); 891 892 if (nodeCur.isNullable()) 893 System.out.print("Nullable "); 894 895 System.out.print("firstPos="); 896 System.out.print(nodeCur.firstPos().toString()); 897 System.out.print(" lastPos="); 898 System.out.println(nodeCur.lastPos().toString()); 899 900 dumpTree(((CMUniOp)nodeCur).getChild(), level+1); 901 } 902 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) 903 { 904 System.out.print 905 ( 906 "Leaf: (pos=" 907 + ((CMLeaf)nodeCur).getPosition() 908 + "), " 909 + ((CMLeaf)nodeCur).getElement() 910 + "(elemIndex=" 911 + ((CMLeaf)nodeCur).getElement() 912 + ") " 913 ); 914 915 if (nodeCur.isNullable()) 916 System.out.print(" Nullable "); 917 918 System.out.print("firstPos="); 919 System.out.print(nodeCur.firstPos().toString()); 920 System.out.print(" lastPos="); 921 System.out.println(nodeCur.lastPos().toString()); 922 } 923 else 924 { 925 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 926 } 927 } 928 929 930 /** 931 * -1 is used to represent bad transitions in the transition table 932 * entry for each state. So each entry is initialized to an all -1 933 * array. This method creates a new entry and initializes it. 934 */ 935 private int[] makeDefStateList() 936 { 937 int[] retArray = new int[fElemMapSize]; 938 for (int index = 0; index < fElemMapSize; index++) 939 retArray[index] = -1; 940 return retArray; 941 } 942 943 /** Post tree build initialization. */ 944 private int postTreeBuildInit(CMNode nodeCur, int curIndex) 945 { 946 // Set the maximum states on this node 947 nodeCur.setMaxStates(fLeafCount); 948 949 // Recurse as required 950 if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY || 951 (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL || 952 (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) { 953 // REVISIT: Don't waste these structures. 954 QName qname = new QName(null, null, null, ((CMAny)nodeCur).getURI()); 955 fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition()); 956 fLeafListType[curIndex] = nodeCur.type(); 957 curIndex++; 958 } 959 else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE) 960 || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)) 961 { 962 curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex); 963 curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex); 964 } 965 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE 966 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE 967 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) 968 { 969 curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex); 970 } 971 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) 972 { 973 // 974 // Put this node in the leaf list at the current index if its 975 // a non-epsilon leaf. 976 // 977 final QName node = ((CMLeaf)nodeCur).getElement(); 978 if (node.localpart != fEpsilonString) { 979 fLeafList[curIndex] = (CMLeaf)nodeCur; 980 fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF; 981 curIndex++; 982 } 983 } 984 else 985 { 986 throw new RuntimeException("ImplementationMessages.VAL_NIICM: type="+nodeCur.type()); 987 } 988 return curIndex; 989 } 990 991 } // class DFAContentModel