001 /* 002 * Copyright 2007, 2012 Stephen Fisher and Junhyong Kim, University of 003 * Pennsylvania. 004 * 005 * This file is part of Glo-DB. 006 * 007 * Glo-DB is free software: you can redistribute it and/or modify it 008 * under the terms of the GNU General Public License as published by 009 * the Free Software Foundation, either version 3 of the License, or 010 * (at your option) any later version. 011 * 012 * Glo-DB is distributed in the hope that it will be useful, but 013 * WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * along with Glo-DB. If not, see <http://www.gnu.org/licenses/>. 019 * 020 * @(#)Operator.java 021 */ 022 023 package edu.upenn.gloDB.parser; 024 025 import edu.upenn.gloDB.*; 026 import java.util.TreeSet; 027 import java.util.SortedSet; 028 import java.util.HashMap; 029 import java.util.Iterator; 030 031 /** 032 * Methods for processing the different types of operators. 033 * 034 * <pre> 035 * T1: r-s t-------u v---w x-y 036 * aa|------------------------------|bb 037 * |012345678901234567890123456789| 038 * T2: a-b c-d e-f g---h i-j 039 * </pre> 040 * 041 * The type of Operator: 042 * <ul> 043 * <li> -1 = null : (no preceeding operator - first track in a group). 044 * <li> 0 = POS : all contiguous features in T1 and T2, appropriately spaced 045 * <ul> T1 POS{5} T2: [(t,u), (i,j)] </ul> 046 * <ul> T2 POS{5} T1: [(a,b), (t,u), (e,f), (v,w)] </ul> 047 * <ul> T1 POS{-5} T2: [(t,u), (e,f)] </ul> 048 * <ul> T1 POS{3,6} T2: [(r,s), (c,d), (t,u), (i,j)] </ul> 049 * <ul> T1 POS{-5,-1} T2: [(r,s), (t,u), (e,f), (g,h), (v,w), (i,j)] </ul> 050 * <li> 1 = AND : all features in T1 which overlap features in T2. 051 * <ul> T1 AND T2: [(r,s), (t,u), (v,w), (c,d), (e,f), (g,h), (i,j)] </ul> 052 * <li> 2 = OR : all features in T1 and T2. 053 * <ul> T1 OR T2: [(r,s), (t,u), (v,w), (x,y), (c,d), (e,f), (g,h), (i,j)] </ul> 054 * <li> 3 = MINUS : all features in T1 that don't overlap features in T2. 055 * <ul> T1 MINUS T2: [(x,y)] </ul> 056 * <ul> T2 MINUS T1: [] </ul> 057 * <li> 4 = sAND : all features in T1 which exactly overlap features in T2. 058 * <ul> T1 sAND T2: [(r,s)] </ul> 059 * <li> 5 = sMINUS : all features in T1 that don't exactly overlap features in T2. 060 * <ul> T1 sMINUS T2: [(t,u), (v,w), (x,y)] </ul> 061 * <ul> T2 sMINUS T1: [(c,d), (e,f), (g,h), (i,j)] </ul> 062 * <li> 10 = . (bPOS) : all contiguous features in T1 and T2, appropriately spaced 063 * <ul> T1 .{0} T2: [] </ul> 064 * <ul> T1 .{5} T2: [(t,u), (i,j)] </ul> 065 * <ul> T2 .{5} T1: [(a,b), (t,u), (e,f), (v,w)] </ul> 066 * <ul> T1 .{-5} T2: [(t,u), (e,f)] </ul> 067 * <li> 11 = && (bAND) : all positions in T1 that overlap with positions in T2. 068 * <ul> T1 && T2: [(r,s), (t,d), (e,f), (g,u), (v,h), (i,w)] </ul> 069 * <li> 12 = || (bOR) : all positions in T1 and T2. 070 * <ul> T1 || T2: [(r,s), (c,j), (x,y)] </ul> 071 * <li> 13 = - (bMINUS) : the asymetrical difference between T1 and T2. 072 * <ul> T1 - T2: [(d,e), (f,g), (h,i), (x,y)] </ul> 073 * <ul> T2 - T1: [(c,t), (u,v), (w,j)] </ul> 074 * </ul> 075 * 076 * @author Stephen Fisher 077 * @version $Id: Operator.java,v 1.1.2.22 2007/03/01 21:17:33 fisher Exp $ 078 */ 079 080 public class Operator { 081 082 //-------------------------------------------------------------------------- 083 // Miscellaneous Methods 084 085 /** Return the representative String for a type value. */ 086 static String getType(int type) { 087 switch (type) { 088 case -1: return "null"; 089 case 0: return "POS"; 090 case 1: return "AND"; 091 case 2: return "OR"; 092 case 3: return "MINUS"; 093 case 4: return "sAND"; 094 case 5: return "sMINUS"; 095 // case 10: return "."; 096 case 11: return "&&"; 097 case 12: return "||"; 098 case 13: return "-"; 099 } 100 return ""; 101 } 102 103 /** 104 * The first argument 'left' is the set of Features on the left 105 * hand side of the operation, the second argument 'operation' 106 * contains both the operation type and the set of Features on 107 * the right hand side of the operation. 108 */ 109 static Track processOperation(Track left, Operation operation) { 110 switch(operation.getType()) { 111 case 0: // POS : all contiguous F in T1 and T2, appropriately spaced 112 return fxn_POS(left, operation); 113 case 1: // AND : all features in T1 which overlap features in T2. 114 return fxn_AND(left, operation); 115 case 2: // OR : all features in T1 and T2. 116 return fxn_OR(left, operation); 117 case 3: // MINUS : all features in T1 that don't overlap features in T2. 118 return fxn_MINUS(left, operation); 119 case 4: // sAND : all features in T1 which exactly overlap features in T2. 120 return fxn_sAND(left, operation); 121 case 5: // sMINUS : all features in T1 that don't exactly overlap features in T2. 122 return fxn_sMINUS(left, operation); 123 // case 10: // . (bPOS) : all contiguous F in T1 and T2, appropriately spaced 124 // return fxn_bPOS(left, operation); 125 case 11: // && (bAND) : all positions in T1 that overlap with positions in T2. 126 return fxn_bAND(left, operation); 127 case 12: // || (bOR) : all positions in T1 and T2. 128 return fxn_bOR(left, operation); 129 case 13: // - (bMINUS) : the asymetrical difference between T1 and T2. 130 return fxn_bMINUS(left, operation); 131 } 132 133 // if we got here, then we didn't have an appropriate 'type'. 134 return null; 135 } 136 137 /** POS : all contiguous F in T1 and T2, appropriately spaced 138 * 139 * For each T2 need to loop through all T1 (on same seq), until 140 * past range. Loop through T2 and if find match, then add T1 and 141 * T2, don't worry about repeatedly adding T1's but no need to 142 * ever test a T2 again once added -- only ever have to go through 143 * T2 once. 144 * @XXX can we make this more efficient by looping from end? Use 145 * different strategy for - and +? Consider that the Sets are 146 * sorted by their min values which means we can assume the T2's 147 * are incremental but we can't assume this about the T1's since 148 * we are concerned with the T1 max values but T1 is sorted by min 149 * values. 150 */ 151 public static Track fxn_POS(Track left, Operation operation) { 152 Track out = new Track(false); 153 154 // if either Track is empty then there will be no matches so 155 // just return an empty Track 156 if ((left.numFeatures() == 0) || (operation.numFeatures() == 0)) { 157 return out; 158 } 159 160 // Source Sets for Tracks 161 HashMap sourcesA = left.getSources(); 162 HashMap sourcesB = operation.track.getSources(); 163 164 GloDBUtils.printMsg("Working:...", GloDBUtils.FEEDBACK, false); 165 int cnt = 0; 166 167 // step through each Sequence from the second Set and if there 168 // are Features from the first Set that match the POS 169 // requirements, then add them Features. 170 for (Iterator s = sourcesB.keySet().iterator(); s.hasNext();) { 171 String source = (String) s.next(); 172 173 // if sourcesA doesn't include 'source' then try next 174 // source from sourcesB 175 if (! sourcesA.containsKey(source)) continue; 176 177 // get Features for this source 178 TreeSet featuresA = (TreeSet) sourcesA.get(source); 179 TreeSet featuresB = (TreeSet) sourcesB.get(source); 180 181 if ((featuresA.size() == 0) || (featuresB.size() == 0)) continue; 182 183 int interval = (featuresA.size() + featuresB.size()) / 50; 184 if (interval == 0) interval = 1; 185 186 // create TreeSet that stores Features in descending order 187 TreeSet featuresADesc = Track.sortByMax(featuresA); 188 189 Iterator iA = featuresADesc.iterator(); 190 Iterator iB = featuresB.iterator(); 191 192 Feature featureA = (Feature) iA.next(); 193 Feature featureB = (Feature) iB.next(); 194 195 int minA = featureA.getMax() + operation.minPos; 196 int maxA = featureA.getMax() + operation.maxPos; 197 int minB = featureB.getMin(); 198 while (true) { 199 // System.out.print("loopA: " + minA + " " + maxA + " " + featureA); 200 // System.out.print("loopB: " + featureB); 201 202 if (maxA < minB) { // increment A 203 if (iA.hasNext()) { 204 featureA = (Feature) iA.next(); 205 minA = featureA.getMax() + operation.minPos; 206 maxA = featureA.getMax() + operation.maxPos; 207 // System.out.print("incA: " + minA + " " + maxA + " " + featureA); 208 209 if ((cnt++ % interval) == 0) GloDBUtils.printMsg(".", GloDBUtils.FEEDBACK, false); 210 } else { 211 break; 212 } 213 } else if (minA > minB) { 214 // featureB less than featureA, so increment featureB 215 if (iB.hasNext()) { 216 featureB = (Feature) iB.next(); 217 // System.out.print("incB: " + featureB); 218 minB = featureB.getMin(); 219 } else { 220 // have run out of Features in B 221 break; 222 } 223 } else { // found a match 224 // System.out.println("match A to B"); 225 out.addFeature(featureA); 226 out.addFeature(featureB); 227 228 // increment A until no more overlapping Features 229 // and store the Features in a temporary set so 230 // that we can compare them to B. 231 TreeSet tmpASet = new TreeSet(); 232 Feature newA = featureA; 233 while (iA.hasNext()) { 234 newA = (Feature) iA.next(); 235 int newMinA = newA.getMax() + operation.minPos; 236 int newMaxA = newA.getMax() + operation.maxPos; 237 // System.out.print("newA: " + newMinA + " " + newMaxA + " " + newA); 238 if ((newMinA <= minB) && (minB <= newMaxA)) { 239 // System.out.println("match newA to B"); 240 out.addFeature(newA); 241 tmpASet.add(newA); 242 } else { 243 break; 244 } 245 } 246 247 // increment B until not overlapping with current 248 // A Feature, then test if it overlaps with any 249 // of the A Features just added. 250 TreeSet tmpBSet = new TreeSet(); 251 Feature newB = featureB; 252 while (iB.hasNext()) { 253 newB = (Feature) iB.next(); 254 // System.out.print("newB: " + newB); 255 int newMinB = newB.getMin(); 256 if ((minA <= newMinB) && (newMinB <= maxA)) { 257 // System.out.println("match A to newB"); 258 out.addFeature(newB); 259 tmpBSet.add(newB); // need to test these against newA 260 } else { 261 for (Iterator tmpAIt = tmpASet.iterator(); tmpAIt.hasNext();) { 262 Feature tmpA = (Feature) tmpAIt.next(); 263 int newMinA = tmpA.getMax() + operation.minPos; 264 int newMaxA = tmpA.getMax() + operation.maxPos; 265 // System.out.print("compA: " + newMinA + " " + newMaxA + " " + newA); 266 if ((newMinA <= newMinB) && (newMinB <= newMaxA)) { 267 // System.out.println("match newA(m) to newB(u)"); 268 out.addFeature(newB); 269 } 270 } 271 272 for (Iterator tmpBIt = tmpBSet.iterator(); tmpBIt.hasNext();) { 273 Feature tmpB = (Feature) tmpBIt.next(); 274 int newMinA = newA.getMax() + operation.minPos; 275 int newMaxA = newA.getMax() + operation.maxPos; 276 // System.out.print("compB: " + newB); 277 if ((newMinA <= tmpB.getMin()) && (tmpB.getMin() <= newMaxA)) { 278 // System.out.println("match newA(u) to newB(m)"); 279 out.addFeature(newA); 280 } 281 } 282 break; 283 } 284 } 285 286 // no more overlap, so test if either Track ran 287 // out of Features then we're done 288 if ((featureA == newA) || (featureB == newB)) break; 289 290 // more Features in both Tracks so keep going 291 featureA = newA; 292 minA = featureA.getMax() + operation.minPos; 293 maxA = featureA.getMax() + operation.maxPos; 294 featureB = newB; 295 minB = featureB.getMin(); 296 } 297 } 298 GloDBUtils.printMsg("", GloDBUtils.FEEDBACK); 299 300 } 301 return out; 302 } 303 304 /** AND : all F in T1 which also exists in T2. */ 305 public static Track fxn_AND(Track left, Operation operation) { 306 Track out = new Track(false); 307 308 // if either Track is empty then there will be no matches so 309 // just return an empty Track 310 if ((left.numFeatures() == 0) || (operation.numFeatures() == 0)) { 311 return out; 312 } 313 314 HashMap smaller; 315 HashMap larger; 316 if (left.numSources() < operation.track.numSources()) { 317 // Source sets for Tracks 318 smaller = left.getSources(); 319 larger = operation.track.getSources(); 320 } else { 321 smaller = operation.track.getSources(); 322 larger = left.getSources(); 323 } 324 325 // step through each Sequence from the second set and if there 326 // are Features from the first set that overlap 327 for (Iterator s = smaller.keySet().iterator(); s.hasNext();) { 328 String source = (String) s.next(); 329 330 // if larger doesn't include 'source' then try next source 331 // from smaller 332 if (! larger.containsKey(source)) continue; 333 334 // get Features for this source 335 TreeSet featuresSm = (TreeSet) smaller.get(source); 336 TreeSet featuresLg = (TreeSet) larger.get(source); 337 338 Feature featureSm, featureLg; 339 Iterator iSm = featuresSm.iterator(); 340 Iterator iLg = featuresLg.iterator(); 341 342 // get initial Features 343 // @XXX do we need to test for empty sets? 344 if (iSm.hasNext() && iLg.hasNext()) { 345 featureSm = (Feature) iSm.next(); 346 featureLg = (Feature) iLg.next(); 347 } else { 348 continue; 349 } 350 int minSm = featureSm.getMin(); 351 int maxSm = featureSm.getMax(); 352 int minLg = featureLg.getMin(); 353 int maxLg = featureLg.getMax(); 354 355 while (true) { 356 /// System.out.println("sm: (" + minSm + ", " + maxSm + ") :: lg: (" + minLg + ", " + maxLg + ")"); 357 358 if (minLg > maxSm) { 359 /// System.out.println("sm less than lg"); 360 361 // featureSm less than featureLg, so increment Sm 362 if (iSm.hasNext()) { 363 featureSm = (Feature) iSm.next(); 364 minSm = featureSm.getMin(); 365 maxSm = featureSm.getMax(); 366 } else { 367 /// System.out.println("no more sm"); 368 369 // have run out of Features in Sm 370 if (iLg.hasNext()) { 371 // more Features in Lg so keep going 372 featureLg = (Feature) iLg.next(); 373 minLg = featureLg.getMin(); 374 maxLg = featureLg.getMax(); 375 } else { 376 // no more Features in Lg, so stop 377 break; 378 } 379 } 380 } else if (minSm > maxLg) { 381 /// System.out.println("lg less than sm"); 382 383 // featureLg less than featureSm, so increment featureLg 384 if (iLg.hasNext()) { 385 featureLg = (Feature) iLg.next(); 386 minLg = featureLg.getMin(); 387 maxLg = featureLg.getMax(); 388 } else { 389 break; 390 } 391 } else { 392 /// System.out.print("adding (overlap) sm: " + featureSm); 393 /// System.out.print("adding (overlap) lg: " + featureLg); 394 395 // Features overlap so add the Features 396 out.addFeature(featureSm); 397 out.addFeature(featureLg); 398 399 // test if featureSm overlaps any remaining 400 // features in featuresLg before incrementing 401 SortedSet lgSet = featuresLg.tailSet(featureLg); 402 Iterator i = lgSet.iterator(); 403 // the first element is "featureLg" which was 404 // already added, so just step past this element 405 i.next(); 406 while (i.hasNext()) { 407 Feature tmpLg = (Feature) i.next(); 408 if (featureSm.overlaps(tmpLg)) { 409 /// System.out.print("matched (adding) lg: " + tmpLg); 410 411 out.addFeature(tmpLg); 412 } else if (tmpLg.getMin() > minSm) { 413 /// System.out.print("not matched lg: " + tmpLg); 414 415 // can't just stop when they don't 416 // overlap, need to make sure that we've 417 // gone far enough because one feature 418 // might be tiny and not overlap while the 419 // next might be huge and overlap 420 break; 421 } 422 } 423 424 // test if featureLg overlaps any remaining 425 // features in featuresSm before incrementing 426 SortedSet smSet = featuresSm.tailSet(featureSm); 427 i = smSet.iterator(); 428 // the first element is "featureSm" which was 429 // already added, so just step past this element 430 i.next(); 431 while (i.hasNext()) { 432 Feature tmpSm = (Feature) i.next(); 433 if (featureLg.overlaps(tmpSm)) { 434 /// System.out.print("matched (adding) sm: " + tmpSm); 435 436 out.addFeature(tmpSm); 437 } else if (tmpSm.getMin() > minLg) { 438 /// System.out.print("not matched sm: " + tmpSm); 439 440 // can't just stop when they don't 441 // overlap, need to make sure that we've 442 // gone far enough because one feature 443 // might be tiny and not overlap while the 444 // next might be huge and overlap 445 break; 446 } 447 } 448 449 // no more overlap, so test if either Track ran 450 // out of Features then we're done 451 if ((! iSm.hasNext()) || (! iLg.hasNext())) break; 452 453 // more Features in both Tracks so keep going 454 featureSm = (Feature) iSm.next(); 455 minSm = featureSm.getMin(); 456 maxSm = featureSm.getMax(); 457 featureLg = (Feature) iLg.next(); 458 minLg = featureLg.getMin(); 459 maxLg = featureLg.getMax(); 460 } 461 } 462 } 463 return out; 464 } 465 466 /** OR : all F in T1 and T2. */ 467 public static Track fxn_OR(Track left, Operation operation) { 468 if (left.numFeatures() < operation.numFeatures()) { 469 Track out = (Track) operation.track.cloneTrack(false); 470 out.addFeatures(left.getFeatures()); 471 return out; 472 } else { 473 Track out = (Track) left.cloneTrack(false); 474 out.addFeatures(operation.track.getFeatures()); 475 return out; 476 } 477 } 478 479 /** 480 * MINUS : all F in T1 that don't overlap with F in T2. 481 */ 482 public static Track fxn_MINUS(Track left, Operation operation) { 483 // if 'left' is empty then there will be no matches so just 484 // return an empty Track (ie 'left') and if 'operation' is empty 485 // then the entire 'left' will match so again just return 486 // 'left'. 487 if ((left.numFeatures() == 0) || (operation.numFeatures() == 0)) { 488 return (Track) left.cloneTrack(false); 489 } 490 491 Track out = new Track(false); 492 493 // Source Sets for Tracks 494 HashMap sourcesA = left.getSources(); 495 HashMap sourcesB = operation.track.getSources(); 496 497 // step through each Sequence 498 for (Iterator s = sourcesA.keySet().iterator(); s.hasNext();) { 499 String source = (String) s.next(); 500 501 // get Features for this source 502 TreeSet featuresA = (TreeSet) sourcesA.get(source); 503 504 // if sourcesB doesn't include 'source' then add all of 505 // the Features on the current Sequence and continue. 506 if (! sourcesB.containsKey(source)) { 507 out.addFeatures(featuresA); 508 continue; 509 } 510 511 // get Features for this source 512 TreeSet featuresB = (TreeSet) sourcesB.get(source); 513 514 Feature featureA, featureB; 515 Iterator iA = featuresA.iterator(); 516 Iterator iB = featuresB.iterator(); 517 518 // get initial Features 519 // @XXX do we need to test for empty sets? 520 if (iA.hasNext() && iB.hasNext()) { 521 featureA = (Feature) iA.next(); 522 featureB = (Feature) iB.next(); 523 } else { 524 continue; 525 } 526 int minA = featureA.getMin(); 527 int maxA = featureA.getMax(); 528 int minB = featureB.getMin(); 529 int maxB = featureB.getMax(); 530 531 while (true) { 532 if (minB > maxA) { 533 // featureA is less than featureB, so add featureA and 534 // increment A 535 out.addFeature(featureA); 536 if (iA.hasNext()) { 537 featureA = (Feature) iA.next(); 538 minA = featureA.getMin(); 539 maxA = featureA.getMax(); 540 } else { 541 // have run out of Tracks in A, but not sure if 542 // there are more in B that might overlap with the 543 // current A. So increment B and continue, until 544 // B is no longer less than A or run out of B. 545 if (iB.hasNext()) { 546 featureB = (Feature) iB.next(); 547 minB = featureB.getMin(); 548 maxB = featureB.getMax(); 549 } else { 550 // have run out of Tracks in B 551 break; // loop to next source 552 } 553 } 554 } else if (minA > maxB) { 555 // featureB is less than featureA, so increment featureB 556 if (iB.hasNext()) { 557 featureB = (Feature) iB.next(); 558 minB = featureB.getMin(); 559 maxB = featureB.getMax(); 560 } else { 561 // have run out of Tracks in B, so need to add 562 // current and remaining Tracks in A. 563 out.addFeature(featureA); 564 while (iA.hasNext()) { 565 out.addFeature((Feature) iA.next()); 566 } 567 break; // loop to next source 568 } 569 } else { 570 // Tracks overlap so increment A without adding A 571 if (iA.hasNext()) { 572 featureA = (Feature) iA.next(); 573 minA = featureA.getMin(); 574 maxA = featureA.getMax(); 575 } else { 576 // have run out of Tracks in A 577 break; // loop to next source 578 } 579 } 580 } 581 } 582 return out; 583 } 584 585 /** 586 * sAND : all features in T1 which exactly overlap features in T2. 587 */ 588 public static Track fxn_sAND(Track left, Operation operation) { 589 Track out = new Track(false); 590 591 // if either Track is empty then there will be no matches so 592 // just return an empty Track 593 if ((left.numFeatures() == 0) || (operation.numFeatures() == 0)) { 594 return out; 595 } 596 597 HashMap smaller; 598 HashMap larger; 599 if (left.numFeatures() < operation.numFeatures()) { 600 // Source Sets for Tracks 601 smaller = left.getSources(); 602 larger = operation.track.getSources(); 603 } else { 604 smaller = operation.track.getSources(); 605 larger = left.getSources(); 606 } 607 608 // step through each Sequence from the second set and test if 609 // there are Features from the first set that are equal 610 for (Iterator s = smaller.keySet().iterator(); s.hasNext();) { 611 String source = (String) s.next(); 612 613 // if larger doesn't include 'source' then try next source 614 // from smaller 615 if (! larger.containsKey(source)) continue; 616 617 // get Features for this source 618 TreeSet featuresSm = (TreeSet) smaller.get(source); 619 TreeSet featuresLg = (TreeSet) larger.get(source); 620 621 // loop over the smaller Set 622 for (Iterator i = featuresSm.iterator(); i.hasNext();) { 623 Feature feature = (Feature) i.next(); 624 if (featuresLg.contains(feature)) { 625 out.addFeature(feature); 626 } 627 } 628 } 629 return out; 630 } 631 632 /** 633 * sMINUS : all features in T1 that don't exactly overlap features in T2. 634 */ 635 public static Track fxn_sMINUS(Track left, Operation operation) { 636 // if 'left' is empty then there will be no matches so just 637 // return an empty Track (ie 'left') and if 'operation' is empty 638 // then the entire 'left' will match so again just return 639 // 'left'. 640 if ((left.numFeatures() == 0) || (operation.numFeatures() == 0)) { 641 return (Track) left.cloneTrack(false); 642 } 643 644 Track out = new Track(false); 645 646 // Source Sets for Tracks 647 HashMap sourcesA = left.getSources(); 648 HashMap sourcesB = operation.track.getSources(); 649 650 // step through each Sequence 651 for (Iterator s = sourcesA.keySet().iterator(); s.hasNext();) { 652 String source = (String) s.next(); 653 654 // get Features for this source 655 TreeSet featuresA = (TreeSet) sourcesA.get(source); 656 657 // if sourcesB doesn't include 'source' then add all of 658 // the Features on the current Sequence and continue. 659 if (! sourcesB.containsKey(source)) { 660 out.addFeatures(featuresA); 661 } else { 662 // get Features from B for this source 663 TreeSet featuresB = (TreeSet) sourcesB.get(source); 664 665 // loop over Features in A and see if they don't exist 666 // in B, then add them to the output 667 for (Iterator i = featuresA.iterator(); i.hasNext();) { 668 Feature feature = (Feature) i.next(); 669 if (! featuresB.contains(feature)) { 670 out.addFeature(feature); 671 } 672 } 673 } 674 } 675 676 return out; 677 } 678 679 /** OR : all F in T1 and T2. */ 680 public static Track fxn_bOR(Track left, Operation operation) { 681 // create a Track that contains all Features, so we can use 682 // the Track's mergeContiguous() function to do the work for 683 // us. 684 // @XXX this is probably not very efficient. 685 Track track = fxn_OR(left, operation); 686 track.mergeContiguous(); 687 return track; 688 } 689 690 /** && (bAND) : all positions in T1 that overlap with positions in T2. 691 * @XXX This will return only overlapping regions between 2 692 * Features. 693 * @XXX This assumes that mergeContiguous() has already been run 694 * on each set of Features; that is, neither set contains 695 * contiguous Features. 696 */ 697 public static Track fxn_bAND(Track left, Operation operation) { 698 // output Track 699 Track out = new Track(false); 700 701 // if either Track is empty then there will be no matches so 702 // just return an empty Track 703 if ((left.numFeatures() == 0) || (operation.numFeatures() == 0)) { 704 return out; 705 } 706 707 // first do a Track.mergeContiguous() for each TreeSet but 708 // this isn't probably very efficient. 709 left.mergeContiguous(); 710 HashMap sourcesA = left.getSources(); 711 712 // operation.track.mergeContiguous(); 713 // HashMap sourcesB = operation.track.getSources(); 714 Track trackB = operation.track.cloneMerged(); 715 HashMap sourcesB = trackB.getSources(); 716 717 // step through each Sequence from the first Set and if there 718 // are Features from the second Set on the same Sequence, 719 // then check for overlaps. 720 for (Iterator s = sourcesA.keySet().iterator(); s.hasNext();) { 721 String source = (String) s.next(); 722 Sequence sourceObj = (Sequence) ObjectHandles.sequencePool.get(source); 723 724 // if sourcesB doesn't include 'sourcesA' then try next 725 // source from sourcesA 726 if (! sourcesB.containsKey(source)) continue; 727 728 // get Features for this source 729 TreeSet featuresA = (TreeSet) sourcesA.get(source); 730 TreeSet featuresB = (TreeSet) sourcesB.get(source); 731 732 Iterator iA = featuresA.iterator(); 733 Iterator iB = featuresB.iterator(); 734 735 Feature featureA; 736 Feature featureB; 737 738 // get initial Features 739 if (iA.hasNext() && iB.hasNext()) { 740 featureA = (Feature) iA.next(); 741 featureB = (Feature) iB.next(); 742 } else { 743 continue; 744 } 745 int minA = featureA.getMin(); 746 int maxA = featureA.getMax(); 747 int minB = featureB.getMin(); 748 int maxB = featureB.getMax(); 749 750 // loop until run out of Features on the current 751 // Sequence, in either set 752 while (true) { 753 if (minB <= maxA) { 754 if (minA <= maxB) { // found overlap 755 // since neither set contains contiguous 756 // Features, we do not need to look past the 757 // current Features in handling the current 758 // overlap. we now need to figure out the 759 // bounds of the overlap. 760 int minL = (minA > minB) ? minA : minB; 761 int maxL; 762 if (maxA < maxB) { 763 maxL = maxA; 764 765 if ((minL == minA) && (maxL == maxA)) { 766 // if same dimenions as featureA, then featureA/B 767 // overlap entirely 768 out.addFeature(featureA); 769 } else { 770 // featureA/B don't entirely overlap, so 771 // create new Feature object 772 out.addFeature(new ExactFeature(minL, maxL, sourceObj)); 773 } 774 775 // featureA ends first, so increment featureA 776 if (iA.hasNext()) { 777 // featureA is less than featureB, so increment featureA 778 featureA = (Feature) iA.next(); 779 minA = featureA.getMin(); 780 maxA = featureA.getMax(); 781 } else { 782 // have run out of Features in A 783 break; // loop to next source 784 } 785 } else { 786 maxL = maxB; 787 788 if ((minL == minA) && (maxL == maxA)) { 789 // if same dimenions as featureA, then featureA/B 790 // overlap entirely 791 out.addFeature(featureA); 792 } else { 793 // featureA/B don't entirely overlap, so 794 // create new Feature object 795 out.addFeature(new ExactFeature(minL, maxL, sourceObj)); 796 } 797 798 // featureB ends first, so increment featureB 799 if (iB.hasNext()) { 800 // featureB is less than featureA, so increment featureB 801 featureB = (Feature) iB.next(); 802 minB = featureB.getMin(); 803 maxB = featureB.getMax(); 804 } else { 805 // have run out of Features in B 806 break; // loop to next source 807 } 808 } 809 810 } else { 811 if (iB.hasNext()) { 812 // featureB is less than featureA, so increment featureB 813 featureB = (Feature) iB.next(); 814 minB = featureB.getMin(); 815 maxB = featureB.getMax(); 816 } else { 817 // have run out of Features in B 818 break; // loop to next source 819 } 820 } 821 } else { 822 if (iA.hasNext()) { 823 // featureA is less than featureB, so increment featureA 824 featureA = (Feature) iA.next(); 825 minA = featureA.getMin(); 826 maxA = featureA.getMax(); 827 } else { 828 // have run out of Features in A 829 break; // loop to next source 830 } 831 } 832 } 833 } 834 835 return out; 836 } 837 838 /** bMINUS : all positions in T1 that don't exist in T2. 839 * 840 * This will return only overlapping regions between 2 Features. 841 * @XXX This assumes that mergeContiguous() has already been run 842 * on each set of Features; that is, neither set contains 843 * contiguous Features. 844 */ 845 public static Track fxn_bMINUS(Track left, Operation operation) { 846 // if 'left' is empty then there will be no matches so just 847 // return an empty Track (ie 'left') and if 'operation' is empty 848 // then the entire 'left' will match so again just return 849 // 'left'. 850 if ((left.numFeatures() == 0) || (operation.numFeatures() == 0)) { 851 return (Track) left.cloneTrack(false); 852 } 853 854 left.mergeContiguous(); 855 HashMap sourcesA = left.getSources(); 856 857 // operation.track.mergeContiguous(); 858 // HashMap sourcesB = operation.track.getSources(); 859 Track trackB = operation.track.cloneMerged(); 860 HashMap sourcesB = trackB.getSources(); 861 862 // output Track 863 Track out = new Track(false); 864 865 // step through each Sequence from the first Set and if there 866 // are Features from the second Set on the same Sequence, 867 // then check for overlaps. 868 for (Iterator s = sourcesA.keySet().iterator(); s.hasNext();) { 869 String source = (String) s.next(); 870 Sequence sourceObj = (Sequence) ObjectHandles.sequencePool.get(source); 871 872 // if sourcesB doesn't include 'sourceA' then try next 873 // source from sourcesA 874 if (! sourcesB.containsKey(source)) continue; 875 876 // get Features for this source 877 TreeSet featuresA = (TreeSet) sourcesA.get(source); 878 TreeSet featuresB = (TreeSet) sourcesB.get(source); 879 880 Iterator iA = featuresA.iterator(); 881 Iterator iB = featuresB.iterator(); 882 883 Feature featureA; 884 Feature featureB; 885 886 // get initial Features 887 if (iA.hasNext() && iB.hasNext()) { 888 featureA = (Feature) iA.next(); 889 featureB = (Feature) iB.next(); 890 } else { 891 continue; 892 } 893 894 int minA = featureA.getMin(); 895 int maxA = featureA.getMax(); 896 int minB = featureB.getMin(); 897 int maxB = featureB.getMax(); 898 899 int minL = -1; // store start value for new Feature 900 boolean spanA = false; // flag if spanning A or B Feature 901 902 // loop until run out of Features on the current 903 // Sequence, in either set 904 while (true) { 905 if (minL < 0) { // starting new Feature 906 if (minL == -1) { // compare min's 907 if (minA < minB) { 908 minL = minA; 909 spanA = true; // progressing along A 910 } else if (minB < minA) { 911 minL = minB; 912 spanA = false; // progressing along B 913 } else { 914 // min's equal, so compare max's 915 minL = -2; 916 } 917 } 918 if (minL == -2) { // equal min's so compare max's 919 if (maxA < maxB) { 920 minL = maxA; 921 spanA = false; // progressing along B 922 923 // starting at maxA, so need to increment A 924 if (iA.hasNext()) { 925 featureA = (Feature) iA.next(); 926 minA = featureA.getMin(); 927 maxA = featureA.getMax(); 928 } else { // no more Features in A 929 break; // loop to next source 930 } 931 } else if (maxB < maxA) { 932 // minL = maxB; 933 minL = maxB + 1; 934 spanA = true; // progressing along A 935 936 // starting at maxB, so need to increment B 937 if (iB.hasNext()) { 938 featureB = (Feature) iB.next(); 939 minB = featureB.getMin(); 940 maxB = featureB.getMax(); 941 } else { 942 // no more Features in B, so add featureA 943 // and all remaining Features in A 944 out.addFeature(new ExactFeature(minL, maxA, sourceObj)); 945 while (iA.hasNext()) { 946 out.addFeature((Feature) iA.next()); 947 } 948 break; // loop to next source 949 } 950 } else { // A/B are equal so increment both 951 minL = -1; // reset to compare min's next time 952 if (iB.hasNext()) { 953 featureB = (Feature) iB.next(); 954 minB = featureB.getMin(); 955 maxB = featureB.getMax(); 956 957 if (iA.hasNext()) { 958 featureA = (Feature) iA.next(); 959 minA = featureA.getMin(); 960 maxA = featureA.getMax(); 961 } else { // no more Features in A 962 break; // loop to next source 963 } 964 } else { 965 // no more Features in B, so add remaining 966 // Features in A and exit 967 while (iA.hasNext()) { 968 out.addFeature((Feature) iA.next()); 969 } 970 break; // loop to next source 971 } 972 } 973 } 974 } else { // already have minL, so need to compute maxL 975 if (spanA) { // progressing along A, so test maxA/minB 976 if (maxA <= minB) { 977 if (maxA == minB) { 978 // create new Feature for portion of 979 // featureA that doesn't overlap B 980 out.addFeature(new ExactFeature(minL, maxA-1, sourceObj)); 981 } else { 982 if (minL == minA) { 983 // same dimenions as featureA so add featureA 984 out.addFeature(featureA); 985 } else { 986 // create new Feature for portion 987 // of featureA that doesn't 988 // overlap with B 989 out.addFeature(new ExactFeature(minL, maxA, sourceObj)); 990 } 991 } 992 993 if (iA.hasNext()) { // increment featureA 994 featureA = (Feature) iA.next(); 995 minA = featureA.getMin(); 996 maxA = featureA.getMax(); 997 minL = -1; // reset to compare min's next time 998 } else { // no more Features in A 999 break; // loop to next source 1000 } 1001 } else { 1002 // create new Feature for portion of featureA 1003 // that doesn't overlap with B 1004 out.addFeature(new ExactFeature(minL, minB-1, sourceObj)); 1005 minL = -2; // reset to compare max's next time 1006 } 1007 } else { // progressing along B, so test maxB/minA 1008 if (maxB <= minA) { 1009 if (iB.hasNext()) { // increment featureB 1010 featureB = (Feature) iB.next(); 1011 minB = featureB.getMin(); 1012 maxB = featureB.getMax(); 1013 minL = -1; // reset to compare min's next time 1014 } else { 1015 // no more Features in B, so add featureA 1016 // and remaining Features in A 1017 out.addFeature(featureA); 1018 while (iA.hasNext()) { 1019 out.addFeature((Feature) iA.next()); 1020 } 1021 break; // loop to next source 1022 } 1023 } else { 1024 minL = -2; // reset to compare max's next time 1025 } 1026 } 1027 } 1028 } 1029 } 1030 return out; 1031 } 1032 1033 /** . (bPOS) : all contiguous F in T1 and T2, appropriately spaced */ 1034 /* 1035 public static Track fxn_bPOS(Track left, Operation operation) { 1036 Track out = new Track(false); 1037 Track oTrack = new Track(false); 1038 1039 // if either Track is empty then there will be no matches so 1040 // just return an empty Track 1041 if ((left.numFeatures() == 0) || (operation.numFeatures() == 0)) { 1042 return out; 1043 } 1044 1045 // put each set into a Track to get the Source Sets 1046 HashMap sourcesA = left.getSources(); 1047 1048 // merge all B's because only concerned with exact positions 1049 // in B that fall within the range specified. By merging B's 1050 // we don't need to worry about overlapping Features in B. 1051 // operation.track.mergeContiguous(); 1052 // HashMap sourcesB = operation.track.getSources(); 1053 Track trackB = operation.track.cloneMerged(); 1054 HashMap sourcesB = trackB.getSources(); 1055 1056 // step through each Sequence from the second Set and if there 1057 // are Features from the first Set that match the POS 1058 // requirements, then add them Features. 1059 for (Iterator s = sourcesB.keySet().iterator(); s.hasNext();) { 1060 String source = (String) s.next(); 1061 1062 // if sourcesA doesn't include 'source' then try next 1063 // source from sourcesB 1064 if (! sourcesA.containsKey(source)) continue; 1065 1066 // get Features for this source 1067 TreeSet featuresA = (TreeSet) sourcesA.get(source); 1068 TreeSet featuresB = (TreeSet) sourcesB.get(source); 1069 1070 for (Iterator iA = featuresA.iterator(); iA.hasNext();) { 1071 Feature featureA = (Feature) iA.next(); 1072 int minA = featureA.getMax() + operation.minPos; 1073 int maxA = featureA.getMax() + operation.maxPos; 1074 1075 for (Iterator iB = featuresB.iterator(); iB.hasNext();) { 1076 Feature featureB = (Feature) iB.next(); 1077 1078 Feature tmp = FeatureUtils.contained(featureB, minA, maxA); 1079 if (tmp != null) { 1080 out.addFeature(featureA); 1081 oTrack.addFeature(tmp); 1082 } 1083 } 1084 } 1085 } 1086 oTrack.mergeContiguous(); 1087 out.addFeatures(oTrack.getFeatures()); 1088 return out; 1089 } 1090 */ 1091 } // Operator.java