001 /* 002 * Copyright 2007, 2012 Stephen Fisher and Junhyong Kim, University of 003 * Pennsylvania. 004 * 005 * This file is part of Glo-DB. 006 * 007 * Glo-DB is free software: you can redistribute it and/or modify it 008 * under the terms of the GNU General Public License as published by 009 * the Free Software Foundation, either version 3 of the License, or 010 * (at your option) any later version. 011 * 012 * Glo-DB is distributed in the hope that it will be useful, but 013 * WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * along with Glo-DB. If not, see <http://www.gnu.org/licenses/>. 019 * 020 * @(#)Track.java 021 */ 022 023 package edu.upenn.gloDB; 024 025 import java.util.Comparator; 026 import java.util.Iterator; 027 import java.util.HashMap; 028 import java.util.TreeSet; 029 import java.util.Set; 030 import java.util.ArrayList; 031 import java.util.Random; 032 033 /** 034 * Tracks are collections of Features and allow accessing the Features 035 * as a sorted Set (sorted by source and position information) or 036 * grouped by the Feature source information. 037 * 038 * @author Stephen Fisher 039 * @version $Id: Track.java,v 1.1.2.34 2007/03/01 21:17:33 fisher Exp $ 040 */ 041 042 public class Track implements Cloneable { 043 044 /** 045 * This is a unique name for the Track, that is used by the 046 * parser to identify the Track. This is 'protected' to allow 047 * ObjectHandles to change the value. 048 */ 049 protected String id; 050 051 /** 052 * This is similar to "qualifiers" in GenBank (ex: scores, strand 053 * (+/-), phase (within codon)). 054 */ 055 protected HashMap attributes = new HashMap(); 056 057 /** 058 * TreeSet of Feature objects comprising the Track. 059 * @XXX We should be able to remove this set after we create 060 * 'sources' since that is a more useful structure for the Feature 061 * data. 062 */ 063 private TreeSet features = null; 064 065 /** 066 * Map of Sequence object IDs to Features. Maintaining this set 067 * slows down the adding/removing of Features but should speed up 068 * the management of the Features. It allows for easy access of 069 * Features by Sequence. For each sequence, a TreeSet of 070 * Features is maintained. 071 * @XXX This can not be directly changed by the user but rather is 072 * created and updated based on {@link #features features}. 073 */ 074 private HashMap sources = null; 075 076 /** Used to create random IDs. */ 077 private static Random random = new Random(System.currentTimeMillis()); 078 079 /** 080 * Create a new Track object and add it to the trackPool. 081 */ 082 public Track() { 083 this(true, ""); 084 } 085 086 /** 087 * Create a new Track object with the specified ID, and add it 088 * to the trackPool. 089 */ 090 public Track(String id) { 091 this(true, id); 092 } 093 094 /** 095 * Create a new Track object and add the newly create 096 * Track object to the trackPool if addToPool is true. 097 * @XXX This should probably be 'protected' instead of 'public' 098 * because all Tracks should really be added to trackPool. 099 */ 100 public Track(boolean addToPool) { 101 this(addToPool, ""); 102 } 103 104 /** 105 * Create a new Track object and add the newly create 106 * Track object to the trackPool if addToPool is true. 107 * @XXX This should probably be 'protected' instead of 'public' 108 * because all Tracks should really be added to trackPool. 109 */ 110 public Track(boolean addToPool, String id) { 111 // if no ID, then create a random ID for this Track 112 if (id == "") id = randomID("_T"); 113 this.id = id; 114 115 if (addToPool) { 116 try { 117 // add self to the trackPool 118 ObjectHandles.addTrack(this); 119 } catch (InvalidIDException e) { 120 String id_new = randomID("_T"); 121 String msg = "ID \"" + id + "\" already exists, using ID \"" + id_new + "\" instead."; 122 GloDBUtils.printWarning(msg); 123 124 // add self to set of all Tracks, using new ID 125 this.id = id_new; 126 ObjectHandles.addTrack(this); 127 } 128 } 129 } 130 131 //-------------------------------------------------------------------------- 132 // Setters and Getters 133 134 /** 135 * Set the ID. If the new ID is the same as the current ID, then 136 * doesn't do anything. If the new ID already exists in the 137 * trackPool, then throws an exception. 138 * @param id a String that is a unique identifier for the Track. 139 */ 140 public void setID(String id) throws InvalidIDException { 141 try { setID(id, true); } 142 catch (InvalidIDException e) { throw e; } 143 } 144 145 /** 146 * Set the ID. If the new ID is the same as the current ID, then 147 * doesn't do anything. If the new ID already exists in the 148 * trackPool, then throws an exception. If 'updatePool' is 149 * true, then the trackPool is updated. 'updatePool' must be 150 * true if the Track is in the trackPool, else the trackPool 151 * will become out of sync. 152 * @param id a String that is a unique identifier for the Track. 153 */ 154 public void setID(String id, boolean updatePool) throws InvalidIDException { 155 // don't do anything if new and old values are the same 156 if (this.id == id) return; 157 158 if (updatePool) { 159 // the Track should already be in trackPool, but if not 160 // then warn the user that the Track is being added to the 161 // pool 162 if (! ObjectHandles.trackPool.containsKey(this.id)) { 163 GloDBUtils.printWarning("Adding track to trackPool."); 164 } 165 166 // renameTrack() will do the actual changing of the 167 // Track's id. 168 try { ObjectHandles.renameTrack(this, id); } 169 catch (InvalidIDException e) { throw e; } 170 } else { 171 // since not in trackPool, just change ID 172 this.id = id; 173 } 174 } 175 176 /** Get the ID. */ 177 public String getID() { return id; } 178 179 /** 180 * Set the attributes. 181 * @param attributes a HashMap of Feature attributes 182 */ 183 public void setAttributes(HashMap attributes) { 184 // make sure attributes is never set to null 185 if (attributes == null) attributes = new HashMap(); 186 this.attributes = attributes; 187 } 188 189 /** Get the attributes. */ 190 public HashMap getAttributes() { return attributes; } 191 192 /** 193 * This will replace 'features' with the TreeSet argument. This 194 * will update the {@link #sources sources} HashMap based on the 195 * new set of Features. 196 */ 197 public void setFeatures(TreeSet features) { 198 // empty out the existing set of Features 199 this.features = null; 200 this.sources = null; 201 202 addFeatures(features); 203 } 204 205 /** Get the features, sorted by their min values. */ 206 public TreeSet getFeatures() { return features; } 207 208 /** 209 * Get the features, sorted by their max values. The TreeSet 210 * returned is effectively a clone of this Track's TreeSet and 211 * thus changes to the TreeSet will not be reflected in the 212 * Track's 'features' TreeSet. 213 */ 214 public TreeSet getFeaturesByMax() { 215 TreeSet featuresByMax = new TreeSet(new FeatureMaxComparator()); 216 featuresByMax.addAll(features); 217 return featuresByMax; 218 } 219 220 /** Get the map of source Sequences to Features. */ 221 public HashMap getSources() { return sources; } 222 223 //-------------------------------------------------------------------------- 224 // Miscellaneous Methods 225 226 /** Sort the Feature TreeSet by max values. */ 227 public static TreeSet sortByMax(TreeSet features) { 228 TreeSet featuresByMax = new TreeSet(new FeatureMaxComparator()); 229 featuresByMax.addAll(features); 230 return featuresByMax; 231 } 232 233 /** 234 * This will remove all Features from this Track that do not exist 235 * on the specified Sequence. 236 */ 237 public void filterOnSequence(String sequence) { 238 // if no Sequence or Features then don't do anything 239 if ((sequence.length() == 0) || (numFeatures() == 0)) return; 240 241 for (Iterator s = sources.keySet().iterator(); s.hasNext();) { 242 String source = (String) s.next(); 243 244 // if same sequence, then skip 245 if (source.compareToIgnoreCase(sequence) == 0) continue; 246 247 // different source, so remove the features 248 TreeSet sFeatures = (TreeSet) sources.get(source); 249 for (Iterator f = sFeatures.iterator(); f.hasNext();) { 250 features.remove(f.next()); 251 } 252 253 // now remove Sequence key from the sources HashMap 254 s.remove(); 255 } 256 } 257 258 /** 259 * This will remove all Features from this Track that are not 260 * within the 'min'/'max' boundaries. If 'max' is -1, then goes 261 * to maximum Sequence length. 262 * @XXX Should throw an exception if max < min. 263 */ 264 public void filterOnSeqPos(int min, int max) { 265 // make sure positions are legal 266 if ((max != -1) && (max < min)) { 267 GloDBUtils.printError("The max pos is less than the min pos, when filtering on sequence position."); 268 return; 269 } 270 271 // if max is 0 or no features then don't filter 272 if ((max == 0) || (numFeatures() == 0)) return; 273 274 for (Iterator s = sources.keySet().iterator(); s.hasNext();) { 275 String source = (String) s.next(); 276 277 // remove relevant features 278 TreeSet sFeatures = (TreeSet) sources.get(source); 279 for (Iterator f = sFeatures.iterator(); f.hasNext();) { 280 Feature feature = (Feature) f.next(); 281 if ((feature.getMin() < min) 282 || ((max != -1) && (feature.getMax() > max))) { 283 features.remove(feature); 284 f.remove(); // remove Feature from sources HashMap 285 } 286 } 287 288 // if no more Features on this source, then remove the source 289 if (sFeatures.size() == 0) s.remove(); 290 } 291 } 292 293 /** 294 * This will remove all Features from this Track that are outside 295 * of the specifed range. 296 * @XXX Should throw an exception if max < min. 297 */ 298 public void filterOnLength(int min, int max) { 299 // make sure lengths are legal 300 if (max < min) { 301 GloDBUtils.printError("The max length is less than the min length, when filtering on length."); 302 return; 303 } 304 305 // if max is 0 or no features then don't filter 306 if ((max == 0) || (numFeatures() == 0)) return; 307 308 for (Iterator s = sources.keySet().iterator(); s.hasNext();) { 309 String source = (String) s.next(); 310 311 // remove relevant features 312 TreeSet sFeatures = (TreeSet) sources.get(source); 313 for (Iterator f = sFeatures.iterator(); f.hasNext();) { 314 Feature feature = (Feature) f.next(); 315 if ((feature.length() < min) || (feature.length() > max)) { 316 features.remove(feature); 317 f.remove(); // remove Feature from sources HashMap 318 } 319 } 320 321 // if no more Features on this source, then remove the source 322 if (sFeatures.size() == 0) s.remove(); 323 } 324 } 325 326 /** 327 * This will remove all Features from this Track that do not 328 * conform to the repeat criterion. Within min/max are used to 329 * define the min/max space between features. Repeat min/max are 330 * used to define the min/max number of features that must follow 331 * in a row, based on the min/max criterion, in order for those 332 * features to be included. 333 * @XXX Should throw an exception if max < min. 334 * @XXX Need to allow for within values that don't have a min. 335 */ 336 public void filterOnRepeat(int minR, int maxR, int minW, int maxW) { 337 // make sure repeats are legal 338 if (maxR < minR) { 339 GloDBUtils.printError("The max repeat is less than the min repeat, when filtering on repeats."); 340 return; 341 } 342 if (maxW < minW) { 343 GloDBUtils.printError("The max within is less than the min within, when filtering on repeats."); 344 return; 345 } 346 347 // if minR/maxR are less than 2 or no features then nothing to 348 // filter 349 if ((minR < 2) || (maxR < 2) || (numFeatures() == 0)) return; 350 351 // this will contain all features that need to be removed from 352 // the track 353 ArrayList toBeRemoved = new ArrayList(); 354 355 for (Iterator s = sources.keySet().iterator(); s.hasNext();) { 356 String source = (String) s.next(); 357 358 // step through all features on current source 359 TreeSet features = (TreeSet) sources.get(source); 360 Iterator i = features.iterator(); 361 362 // this is the first feature of the current repeat 363 Feature fRepeat = (Feature) i.next(); 364 365 // fStart/fEnd refer to the range for the beginning of the 366 // next Feature 367 int fStart = (minW > 0) ? fRepeat.getMax() + minW : fRepeat.getMin(); 368 int fEnd = fRepeat.getMax() + maxW; 369 370 // count of how many repeats found 371 int count = 1; 372 373 while (i.hasNext()) { 374 Feature fCurrent = (Feature) i.next(); 375 376 // test if new feature is within appropriate range. 377 // if fStart = 0 then allow for features overlapping 378 if ((fCurrent.getMin() >= fStart) && (fCurrent.getMin() <= fEnd)) { 379 // valid repeat 380 count++; 381 382 if (count > maxR) { 383 // reached max number of features in a 384 // repeating group, so start over from here 385 fRepeat = fCurrent; 386 387 // reset repeat counter 388 count = 1; 389 } 390 391 if ((! i.hasNext()) && (count < minR)) { 392 // not enough repeats so discard from fRepeat 393 // to the current feature 394 toBeRemoved.addAll(features.subSet(fRepeat, fCurrent)); 395 toBeRemoved.add(fCurrent); 396 } 397 398 } else { 399 if (count < minR) { 400 // not enough repeats so discard from fRepeat 401 // to the current feature 402 toBeRemoved.addAll(features.subSet(fRepeat, fCurrent)); 403 } 404 405 if (! i.hasNext()) { 406 // started a new repeat but there is 407 // nothing left to repeat with 408 toBeRemoved.add(fCurrent); 409 } 410 411 // start new over from here, because were able to 412 // discard features from fRepeat, or found enough 413 // repeats (current >= minR) 414 fRepeat = fCurrent; 415 416 // reset repeat counter 417 count = 1; 418 } 419 420 // if count is 1, then we started a new group 421 if (count == 1) { 422 // update for new feature 423 fStart = (minW > 0) ? fCurrent.getMax() + minW : fCurrent.getMin(); 424 fEnd = fCurrent.getMax() + maxW; 425 } else { 426 // update for new feature 427 int tStart = (minW > 0) ? fCurrent.getMax() + minW : fCurrent.getMin(); 428 int tEnd = fCurrent.getMax() + maxW; 429 430 // only replace range boundaries if new feature has a 431 // stop point larger than the previous stop point 432 if (tEnd > fEnd) { 433 fStart = tStart; 434 fEnd = tEnd; 435 } 436 } 437 } 438 } 439 440 // remove non-repeating features 441 for (Iterator i = toBeRemoved.iterator(); i.hasNext();) { 442 removeFeature((Feature) i.next()); 443 } 444 } 445 446 /** 447 * This will remove all Features from this Track that do not 448 * contain the specified attribute. 449 */ 450 public void filterOnAttribute(String key, String value) { 451 // make sure key and value are non-null 452 if (key.length() == 0) { 453 GloDBUtils.printError("Empty key, when filtering on attribute."); 454 return; 455 } 456 /* 457 if (value.length() == 0) { 458 GloDBUtils.printError("Empty value, when filtering on attribute."); 459 return; 460 } 461 */ 462 463 // if no features then nothing to filter 464 if (numFeatures() == 0) return; 465 466 for (Iterator s = sources.keySet().iterator(); s.hasNext();) { 467 String source = (String) s.next(); 468 469 // remove relevant features 470 TreeSet sFeatures = (TreeSet) sources.get(source); 471 for (Iterator f = sFeatures.iterator(); f.hasNext();) { 472 Feature feature = (Feature) f.next(); 473 if ((! feature.containsAttribute(key)) 474 || (! feature.getAttribute(key).equals(value))) { 475 features.remove(feature); 476 f.remove(); // remove Feature from sources HashMap 477 } 478 } 479 480 // if no more Features on this source, then remove the source 481 if (sFeatures.size() == 0) s.remove(); 482 } 483 } 484 485 /** Add an attribute. */ 486 public void addAttribute(Object key, Object value) { attributes.put(key, value); } 487 488 /** Remove an attribute. */ 489 public void delAttribute(Object key) { attributes.remove(key); } 490 491 /** Returns true if attribute 'key' exists. */ 492 public boolean containsAttribute(Object key) { return attributes.containsKey(key); } 493 494 /** Get value for attribute 'key'. */ 495 public Object getAttribute(Object key) { return attributes.get(key); } 496 497 /** 498 * This will add 'features' to the current feature set. This will 499 * update the {@link #sources sources} HashMap based on the new 500 * set of Features. 501 */ 502 public void addFeatures(TreeSet features) { 503 // if features is null, then just need to update the 504 // ObjectHandle sets 505 if (features != null) { 506 for (Iterator i = features.iterator(); i.hasNext();) { 507 // use addFeature() to build the sources HashMap 508 addFeature((Feature) i.next(), false); 509 } 510 } 511 512 // rebuild trackPool lists if the Track is in the trackPool 513 if (ObjectHandles.containsTrack(id)) ObjectHandles.rebuildTrack(this); 514 } 515 516 /** 517 * Adds a Feature to 'features'. This will update the {@link 518 * #sources sources} HashMap. If 'features' doesn't exist a new 519 * TreeSet will be created. If 'newFeature' is null, then this 520 * method won't do anything. 521 */ 522 public void addFeature(Feature newFeature) { 523 // rebuild the trackPool lists if the Track is in the 524 // trackPool. 525 if (ObjectHandles.containsTrack(id)) { 526 addFeature(newFeature, true); 527 } else { 528 addFeature(newFeature, false); 529 } 530 } 531 532 /** 533 * Adds a Feature to 'features'. This will update the {@link 534 * #sources sources} HashMap. If 'features' doesn't exist a new 535 * TreeSet will be created. If 'newFeature' is null, then this 536 * method won't do anything. 537 */ 538 public void addFeature(Feature newFeature, boolean rebuildPool) { 539 // ignore null newFeature values 540 if (newFeature != null) { 541 // create features if it doesn't already exist. Create 542 // sources also, since if there are no features then 543 // sources must also not exist. 544 if (features == null) { 545 features = new TreeSet(); 546 sources = new HashMap(); 547 } 548 549 features.add(newFeature); 550 String sequence = newFeature.getSource().getID(); 551 TreeSet seqSet; 552 if (sources.containsKey(sequence)) { 553 seqSet = (TreeSet) sources.get(sequence); 554 } else { 555 seqSet = new TreeSet(); 556 } 557 seqSet.add(newFeature); 558 sources.put(sequence, seqSet); 559 560 // only rebuild the trackPool structures if necessary 561 if (rebuildPool) ObjectHandles.rebuildTrack(this); 562 } 563 } 564 565 /** 566 * Removes a Feature from 'features'. This will update 567 * the {@link #sources sources} HashMap. If 'newFeature' is null, 568 * then this method won't do anything. 569 */ 570 public void removeFeature(Feature newFeature) { 571 // don't do anything if newFeature or features is empty 572 if ((newFeature != null) && (features != null)) { 573 features.remove(newFeature); 574 String sequence = newFeature.getSource().getID(); 575 TreeSet seqSet = (TreeSet) sources.get(sequence); 576 if (seqSet.size() > 1) { 577 seqSet.remove(newFeature); 578 sources.put(sequence, seqSet); 579 } else { 580 // only Feature on Sequence, so remove entire key 581 sources.remove(sequence); 582 } 583 } 584 } 585 586 /** 587 * Returns the number of Features contained in the Track. If 588 * Features exactly overlap, they will be still be counted 589 * separately. 590 */ 591 public int numFeatures() { 592 if (features == null) { 593 return 0; 594 } else { 595 return features.size(); 596 } 597 } 598 599 /** Returns the number of Sources spanned by the Track. */ 600 public int numSources() { 601 if (numFeatures() == 0) { 602 return 0; 603 } else { 604 return sources.size(); 605 } 606 } 607 608 /** Returns an Iterator over 'features'. */ 609 public Iterator featureIterator() { 610 if (numFeatures() == 0) { 611 return null; 612 } else { 613 return features.iterator(); 614 } 615 } 616 617 /** Get the set of source Sequence objects. */ 618 public Set getSourceSet() { 619 if (sources == null) { 620 return null; 621 } else { 622 return sources.keySet(); 623 } 624 } 625 626 /** 627 * Get the set of Features based on the Sequence object. 628 */ 629 public TreeSet featuresBySource(Sequence sequence) { 630 return featuresBySource(sequence.getID()); 631 } 632 633 /** 634 * Get the set of Features based on the Sequence ID. 635 */ 636 public TreeSet featuresBySource(String sequence) { 637 if (sources == null) { 638 return null; 639 } else { 640 return (TreeSet) sources.get(sequence); 641 } 642 } 643 644 /** 645 * Returns the sequence data. Sequence data that occurs on 646 * different contigs or is non-contiguous with separate items in 647 * the ArrayList. 648 * @XXX Should return sets of Sequences. A new set for each 649 * sequence and within each Sequence set, a new set for each 650 * non-contiguous Feature. However, if 2 Sequences have same data 651 * (ie to contigs are a repeat), then using Sets won't work. 652 */ 653 public ArrayList getData() { 654 if (sources == null) return null; 655 656 ArrayList output = new ArrayList(); 657 658 // loop through all sources 659 for (Iterator seqs = sources.keySet().iterator(); seqs.hasNext();) { 660 String seqID = (String) seqs.next(); 661 Sequence seq = (Sequence) ObjectHandles.sequencePool.get(seqID); 662 TreeSet track = (TreeSet) sources.get(seqID); 663 664 // loop through the Set of features based on the source. 665 // If sequences overlap or are contiguous, then merge 666 // them. 667 Iterator features = track.iterator(); 668 Feature feature = (Feature) features.next(); 669 int start = feature.getMin(); 670 int stop = feature.getMax(); 671 672 while (features.hasNext()) { 673 feature = (Feature) features.next(); 674 675 // if find a gap then start next sequence string 676 if (feature.getMin() > (stop + 1)) { 677 output.add(seq.getDataBounded(start, stop)); 678 start = feature.getMin(); 679 } 680 681 if (stop < feature.getMax()) stop = feature.getMax(); 682 } 683 684 output.add(seq.getDataBounded(start, stop)); 685 } 686 return output; 687 } 688 689 /** 690 * Returns the sequence data formatted with "\n" inserted every 691 * Sequence.FORMAT_WIDTH characters and blank lines inserted 692 * between sequences. 693 */ 694 public String getDataFormatted() { 695 if (sources == null) return ""; 696 697 ArrayList dataArray = getData(); 698 699 StringBuffer out = new StringBuffer(""); 700 701 int i = 0; 702 int total = 0; 703 for (Iterator iDA = dataArray.iterator(); iDA.hasNext();) { 704 String data = (String) iDA.next(); 705 706 // don't insert blank line if first sequence. 707 if (total != 0) out.append("\n"); 708 709 i = Sequence.FORMAT_WIDTH; 710 total = data.length(); 711 712 while (i < total) { 713 out.append(data.substring(i - Sequence.FORMAT_WIDTH, i) + "\n"); 714 i += Sequence.FORMAT_WIDTH; 715 } 716 717 // have less than Sequence.FORMAT_WIDTH chars remaining in sequence 718 if (i >= total) out.append(data.substring(i - Sequence.FORMAT_WIDTH, total) + "\n"); 719 } 720 721 return out.toString(); 722 } 723 724 /** 725 * Returns the sequence data formatted as a multi-sequence FASTA 726 * file. New lines ("\n") are inserted every 727 * Sequence.FORMAT_WIDTH characters and a blank line is inserted 728 * between sequences. 729 */ 730 public String getDataFASTA() { 731 if (sources == null) return ""; 732 733 StringBuffer out = new StringBuffer(""); 734 735 // loop through all sources 736 for (Iterator seqs = sources.keySet().iterator(); seqs.hasNext();) { 737 String seqID = (String) seqs.next(); 738 TreeSet track = (TreeSet) sources.get(seqID); 739 740 // loop through the Set of features based on the source. 741 for (Iterator features = track.iterator(); features.hasNext();) { 742 Feature feature = (Feature) features.next(); 743 744 // XXX should include more formatting 745 String header = ">" + seqID + " "; 746 747 // create a copy of the attributes so we can remove 748 // objects from the HashMap as we process them below 749 HashMap attribs = feature.getAttributesMap(); 750 751 // start with the ID attribute, if not present, then 752 // use the Track's ID. 753 if (attribs.containsKey("ID")) { 754 header += attribs.get("ID"); 755 attribs.remove("ID"); 756 } else { 757 header += id; 758 } 759 760 // if contains 'descriptors' then remove label 761 if (attribs.containsKey("descriptors")) { 762 header += " " + attribs.get("descriptors"); 763 attribs.remove("descriptors"); 764 } 765 766 // if "gene_boundaries" already exists, then we 767 // probably didn't process this header and so we 768 // should just leave it alone 769 if (! attribs.containsKey("gene_boundaries")) { 770 String gb = "gene_boundaries:(" + feature.getSource().getID() + ":"; 771 gb += feature.getStart() + ".." + feature.getStop(); 772 773 // if 'source' already handled, then remove from 774 // attribs map 775 if (attribs.containsKey("source")) { 776 String value = (String) attribs.get("source"); 777 // if (value.equalsIgnoreCase(feature.getSource().getID())) { 778 if (value.equals(feature.getSource().getID())) { 779 attribs.remove("source"); 780 } 781 } 782 783 // if 'boundaries' exists then remove from attribs 784 // map, because this should be equivalent to the 785 // Feature's start/stop 786 attribs.remove("boundaries"); 787 788 if (attribs.containsKey("strand")) { 789 gb += "[" + (String) attribs.get("strand") + "]"; 790 attribs.remove("strand"); // don't need anymore 791 } 792 gb += ")"; 793 header += " " + gb; 794 } 795 796 // if contains 'dbxref' then remove label and enclose 797 // in '()' 798 if (attribs.containsKey("dbxref")) { 799 header += " ("; 800 Set dbxref = (Set) attribs.get("dbxref"); 801 for (Iterator dI = dbxref.iterator(); dI.hasNext();) { 802 header += dI.next(); 803 } 804 header += ")"; 805 attribs.remove("dbxref"); 806 } 807 808 // add remaining attributes to the header 809 for (Iterator l = (attribs.keySet()).iterator(); l.hasNext();) { 810 String key = (String) l.next(); 811 header += " " + key + ":" + attribs.get(key); 812 } 813 814 out.append(header + "\n"); 815 out.append(feature.getDataFormatted() + "\n"); 816 } 817 } 818 819 return out.toString(); 820 } 821 822 /** 823 * Returns 'true' if the Track does not contain gaps between 824 * Features. If the Features occur on different sequences , then 825 * this will return 'false'. 826 */ 827 public boolean isContiguous() { 828 // return false if more than one source or no features 829 if ((numFeatures() == 0) || (sources.size() > 1)) return false; 830 return FeatureUtils.isContiguous(features); 831 } 832 833 /** 834 * This will merge all overlapping Features in the Track, 835 * creating new Feature objects as necessary. 836 */ 837 public void mergeContiguous() { 838 setFeatures(FeatureUtils.mergeContiguous(this)); 839 } 840 841 /** 842 * This will merge all Features in the Track that are within 843 * maxSpace of each other. New Features will be created to span 844 * the entire cluster. Threshold sets the minimum number of 845 * Features necessary to be considered a cluster and thus included 846 * in the output set. A new Track will be returned containing the 847 * clusters. This will return 'null' if there is no match. 848 * @deprecated replaced with cluster.py 849 * @param id the name of the new Track 850 * @param maxSpace the maximum allowed space between Features in a 851 * cluster 852 * @param threshold the minimum number of Features needed in a 853 * cluster, for the cluster to be included in the output 854 */ 855 public Track cluster(String id, int maxSpace, int threshold) { 856 TreeSet features = FeatureUtils.cluster(this, maxSpace, threshold); 857 858 // return null if no match 859 if (features.size() == 0) return null; 860 861 if (id.length() == 0) { 862 // use the parent Track's ID as the base for a random ID. 863 // The clone isn't added to the trackPool but still make 864 // sure it has a valid ID. 865 id = randomID("_" + this.id + "_"); 866 } 867 Track track = new Track(id); 868 track.attributes = this.attributes; 869 track.setFeatures(features); 870 return track; 871 } 872 873 /** 874 * This will return a copy of the track without any duplicate 875 * features (based on start/stop values). 876 */ 877 public Track noRepeats() { 878 boolean saveFlag = GloDBUtils.ignoreAttributes(); 879 GloDBUtils.setIgnoreAttributes(true); 880 Track newTrack = (Track) clone(); 881 GloDBUtils.setIgnoreAttributes(saveFlag); 882 return newTrack; 883 } 884 885 /** 886 * This will remove duplicate features, based on start/stop 887 * values. Attributes for the first of the duplicates will be the 888 * primary attributes used. NOTE that this is not relevant if 889 * GloDBUtils.IGNORE_ATTRIBUTES is set to 'true'. 890 */ 891 /* 892 public void removeRepeats() { 893 if (sources == null) return; 894 895 // loop through all sources 896 for (Iterator s = sources.keySet().iterator(); s.hasNext();) { 897 String source = (String) s.next(); 898 899 // loop through the Set of features based on the source. 900 TreeSet sFeatures = (TreeSet) sources.get(source); 901 Iterator f = sFeatures.iterator(); 902 if (! f.hasNext()) return; 903 904 // get first feature 905 Feature featureLast = (Feature) f.next(); 906 int lastStart = featureLast.getStart(); 907 int lastStop = featureLast.getStop(); 908 909 while (f.hasNext()) { 910 Feature feature = (Feature) f.next(); 911 912 // XXX not sure why ".equals()" doesn't work 913 // if (featureLast.equals(feature)) { 914 if ((lastStart == feature.getStart()) && (lastStop == feature.getStop())) { 915 // merge attributes, keeping 916 // featureLast.attributes as the default 917 HashMap attribs = feature.getAttributesMap(); 918 attribs.putAll(featureLast.getAttributesMap()); 919 featureLast.setAttributes(attribs); 920 921 // remove current feature from collection 922 features.remove(feature); 923 f.remove(); // remove Feature from sources HashMap 924 } else { 925 featureLast = feature; 926 lastStart = featureLast.getStart(); 927 lastStop = featureLast.getStop(); 928 } 929 } 930 931 // if no more Features on this source, then remove the source 932 if (sFeatures.size() == 0) s.remove(); 933 } 934 } 935 */ 936 937 /** 938 * Returns 'true' if the Features contained in the Track all refer 939 * to the same sequence. This is similar to {@link #isContiguous() 940 * isContiguous()} but allows for gaps between Features. 941 */ 942 public boolean isSingleSource() { 943 try { 944 if (sources.size() > 1) { return false; } 945 else { return true; } 946 } catch (NullPointerException e) { 947 GloDBUtils.printError("No features."); 948 return false; 949 } 950 } 951 952 /** 953 * Inverts the positions of each feature in the Track. For 954 * example, if a feature had a start position of 10 and a stop 955 * position of 20 on a contig that was 100 positions long, then 956 * flipping the feature would result in a new Feature object 957 * with a start position of 80 and a stop position of 90. 958 * Flipping a Track will result in the creation of new Feature 959 * objects for each feature in the Track. 960 * @return Returns a new Track object in which the positions of 961 * all features are flipped. 962 * @XXX Not yet implemented. 963 */ 964 public Track flip() { return null; } 965 966 /** 967 * Returns the minimum start position in the Track. Will return 968 * '-1' if there are no features. This will return '-1' if the 969 * Track contains features on different contigs (ie {@link 970 * #isSingleSource() isSingleSource()} returns 'false'). 971 */ 972 public int getMin() { 973 if (numFeatures() == 0) return -1; 974 975 return ((Feature) features.first()).getMin(); 976 } 977 978 /** 979 * Returns the maximum stop position in the Track. Will return 980 * '-1' if there are no Features. Note that the Features are 981 * sorted by min values, so it's not clear what the max Feature 982 * value is, except by testing each Feature. This will return '-1' 983 * if the Track contains Features on different contigs (ie {@link 984 * #isSingleSource() isSingleSource()} returns 'false'). 985 */ 986 public int getMax() { 987 if (numFeatures() == 0) return -1; 988 989 Iterator i = features.iterator(); 990 Feature feature = (Feature) i.next(); 991 int max = feature.getMax(); 992 993 while (i.hasNext()) { 994 feature = (Feature) i.next(); 995 if (feature.getMax() > max) { max = feature.getMax(); } 996 } 997 return max; 998 } 999 1000 /** 1001 * Returns the number of positions contained in the Track. 1002 * Overlapping positions will only be counted once. 1003 */ 1004 public int length() { 1005 // if no Features then 0 length 1006 if (numFeatures() == 0) return -1; 1007 1008 int length = 0; 1009 1010 // loop through all features. If sequences overlap or are 1011 // contiguous, then merge them. 1012 Iterator i = features.iterator(); 1013 Feature feature = (Feature) i.next(); 1014 int start = feature.getMin(); 1015 int stop = feature.getMax(); 1016 1017 while (i.hasNext()) { 1018 feature = (Feature) i.next(); 1019 1020 // skip over gaps 1021 if (feature.getMin() > (stop + 1)) { 1022 length += (stop - start) + 1; 1023 start = feature.getMin(); 1024 } 1025 1026 if (stop < feature.getMax()) stop = feature.getMax(); 1027 } 1028 1029 length += (stop - start) + 1; 1030 1031 return length; 1032 } 1033 1034 /** 1035 * Compares this object with the specified object for order. 1036 * Returns a negative integer, zero, or a positive integer as this 1037 * object is less than, equal to, or greater than the specified 1038 * object. 1039 * @XXX This is necessary for 'Comparable'. 1040 * @XXX Not yet implemented. 1041 */ 1042 public int compareTo(Object o) { 1043 // convert object to Track 1044 Track track = (Track) o; 1045 1046 // who ever has min is less. 1047 if (this.getMin() < track.getMin()) { 1048 return -1; 1049 } else if (this.getMin() > track.getMin()) { 1050 return 1; 1051 } 1052 1053 // min are equal, so whoever ends first is less. 1054 if (this.getMax() < track.getMax()) { 1055 return -1; 1056 } else if (this.getMax() > track.getMax()) { 1057 return 1; 1058 } 1059 1060 // min and max are equal 1061 return 0; 1062 } 1063 1064 /** 1065 * Returns '-1' if this Track exists after the integer 'pos', 1066 * returns '0' if 'pos' is contained in this Track, and '1' if 1067 * 'pos' occurs after this Track. 1068 * @XXX This assumes 'pos' is positive within this Track's 1069 * Sequence boundaries. 1070 * @XXX Not clear how to deal with Sequences in Tracks. 1071 * @XXX For Tracks, this should test contains() for each 1072 * Feature within the Track. 1073 * @XXX Not yet implemented. 1074 */ 1075 public int contains(int pos) { return 0; } 1076 1077 /** 1078 * Returns 'true' if 'feature' exists in this Track. 1079 */ 1080 public boolean contains(Feature feature) { 1081 return features.contains(feature); 1082 } 1083 1084 /** 1085 * Returns 'true' if this Track contains any Features on 'source'. 1086 */ 1087 public boolean contains(String source) { 1088 if (sources == null) return false; 1089 Set keys = sources.keySet(); 1090 return keys.contains(source); 1091 } 1092 1093 /** 1094 * Returns 'true' if the Feature 'featureB' overlaps at least one 1095 * Feature in this Track. 1096 * @XXX Should use Sequences to limit the searches 1097 */ 1098 public boolean overlaps(Feature featureB) { 1099 if ((numFeatures() == 0) || (featureB == null)) return false; 1100 1101 Feature featureA; 1102 // only check Features that have the same source 1103 String source = featureB.getSourceID(); 1104 Iterator iA = featuresBySource(source).iterator(); 1105 1106 // get initial Features 1107 if (iA.hasNext()) { 1108 featureA = (Feature) iA.next(); 1109 } else { 1110 return false; 1111 } 1112 int minA = featureA.getMin(); 1113 int maxA = featureA.getMax(); 1114 int minB = featureB.getMin(); 1115 int maxB = featureB.getMax(); 1116 1117 while (true) { 1118 if (minB <= maxA) { 1119 if (minA <= maxB) { 1120 // B.min <= A.max and A.min <= B.max 1121 return true; 1122 } else { 1123 // featureB didn't match 1124 return false; 1125 } 1126 } else { 1127 if (iA.hasNext()) { 1128 // featureA is less than featureB, so increment featureA 1129 featureA = (Feature) iA.next(); 1130 minA = featureA.getMin(); 1131 maxA = featureA.getMax(); 1132 continue; 1133 } else { 1134 // have run out of Features in A 1135 return false; 1136 } 1137 } 1138 } 1139 } 1140 1141 /** 1142 * Returns 'true' if a Feature in trackB overlaps at least one 1143 * Feature in this Track. 1144 * @XXX Should use Sequences to limit the searches 1145 */ 1146 public boolean overlaps(Track trackB) { 1147 if ((numFeatures() == 0) || (trackB == null)) return false; 1148 1149 for (Iterator sourceIt = sources.keySet().iterator(); sourceIt.hasNext();) { 1150 String source = (String) sourceIt.next(); 1151 1152 // get Features for this source 1153 TreeSet featuresA = (TreeSet) sources.get(source); 1154 1155 // if sourcesB doesn't include 'source' then add all of 1156 // the Features on the current Sequence and continue. 1157 if (! trackB.contains(source)) continue; 1158 1159 // get Features for this source 1160 TreeSet featuresB = (TreeSet) trackB.featuresBySource(source); 1161 1162 Feature featureA, featureB; 1163 Iterator iA = featuresA.iterator(); 1164 Iterator iB = featuresB.iterator(); 1165 1166 // get initial Features 1167 if (iA.hasNext() && iB.hasNext()) { 1168 featureA = (Feature) iA.next(); 1169 featureB = (Feature) iB.next(); 1170 } else { 1171 return false; 1172 } 1173 int minA = featureA.getMin(); 1174 int maxA = featureA.getMax(); 1175 int minB = featureB.getMin(); 1176 int maxB = featureB.getMax(); 1177 1178 while (true) { 1179 if (minB <= maxA) { 1180 if (minA <= maxB) { 1181 // B.min <= A.max and A.min <= B.max 1182 return true; 1183 } else { 1184 if (iB.hasNext()) { 1185 // featureB is less than featureA, so increment featureB 1186 featureB = (Feature) iB.next(); 1187 minB = featureB.getMin(); 1188 maxB = featureB.getMax(); 1189 continue; 1190 } else { 1191 // have run out of Features in B 1192 return false; 1193 } 1194 } 1195 } else { 1196 if (iA.hasNext()) { 1197 // featureA is less than featureB, so increment featureA 1198 featureA = (Feature) iA.next(); 1199 minA = featureA.getMin(); 1200 maxA = featureA.getMax(); 1201 continue; 1202 } else { 1203 // have run out of Features in A 1204 return false; 1205 } 1206 } 1207 } 1208 } 1209 return false; 1210 } 1211 1212 /** 1213 * Create a shallow clone of the existing object (clone the 1214 * structure but not the Objects). This differs from clone() in 1215 * that the clone will have the Features merged. 1216 * @XXX Although public, this is not meant for use by the end user 1217 * and does will not add the Track to the ObjectHandles Track 1218 * pool. 1219 */ 1220 public Track cloneMerged() { 1221 Track track = new Track(false); 1222 // use the parent Track's ID as the base for a random ID. 1223 // The clone isn't added to the trackPool but still make 1224 // sure it has a valid ID. 1225 track.id = randomID("_" + this.id + "_"); 1226 track.attributes = this.attributes; 1227 track.setFeatures(FeatureUtils.mergeContiguous(this)); 1228 return track; 1229 } 1230 1231 /** 1232 * Create a shallow clone of the existing object (clone the 1233 * structure but not the Objects). This clone will be added to 1234 * ObjectHandles.trackPool. 1235 */ 1236 public Object clone() { 1237 return cloneTrack(true); 1238 } 1239 1240 /** 1241 * Create a shallow clone of the existing object (clone the 1242 * structure but not the Objects). 1243 * @XXX This could probably be done in a much more efficient way 1244 * by cloning each field of a Track, rather than rebuilding the 1245 * features. However, rebuilding the features allows us to use 1246 * IGNORE_ATTRIBUTES to remove repeats. 1247 */ 1248 public Object cloneTrack(boolean addToPool) { 1249 Track track = new Track(addToPool); 1250 // use the parent Track's ID as the base for a random ID. The 1251 // clone isn't added to the trackPool but still make sure it 1252 // has a valid ID. 1253 track.id = randomID("_" + this.id + "_"); 1254 track.attributes = this.attributes; 1255 track.setFeatures(this.features); 1256 return track; 1257 } 1258 1259 /** 1260 * Erases all Track information, except for the ID. 1261 */ 1262 public void erase() { 1263 setAttributes(null); 1264 setFeatures(null); 1265 } 1266 1267 /** 1268 * Uses 'base' to create a random ID string that doesn't already 1269 * exist in the trackPool. 1270 */ 1271 public static String randomID(String base) { 1272 String id = base + Long.toString(Math.abs(random.nextLong())); 1273 while (ObjectHandles.trackPool.containsKey(id)) { 1274 id = base + Long.toString(Math.abs(random.nextLong())); 1275 } 1276 return id; 1277 } 1278 1279 /** Only returns Feature start/stop position information. */ 1280 public String toString() { 1281 if (sources == null) return ""; 1282 1283 String out = ""; 1284 for (Iterator i = (sources.keySet()).iterator(); i.hasNext();) { 1285 String sequence = (String) i.next(); 1286 TreeSet features = featuresBySource(sequence); 1287 out += "\n Source (" + sequence + "): " + features.size() + " features"; 1288 // out += "\n " + features.size() + " features on source \"" + sequence + "\"\n"; 1289 } 1290 out += "\n"; 1291 return out; 1292 } 1293 1294 /** Only returns Feature start/stop position information. */ 1295 public String toStringMore() { 1296 if (sources == null) return ""; 1297 1298 String out = ""; 1299 for (Iterator i = (sources.keySet()).iterator(); i.hasNext();) { 1300 String sequence = (String) i.next(); 1301 TreeSet features = featuresBySource(sequence); 1302 out += "\n Source (" + sequence + "):"; 1303 for (Iterator j = features.iterator(); j.hasNext();) { 1304 out += " " + ((Feature) j.next()).toStringMin(); 1305 } 1306 } 1307 out += "\n"; 1308 return out; 1309 } 1310 1311 /** Returns all description and Feature information. */ 1312 public String toStringFull() { 1313 String out = ""; 1314 1315 out += "\nID: " + id + "\n"; 1316 1317 if (attributes == null) { 1318 out += "Attributes: none"; 1319 } else { 1320 out += "Attributes:\n " + attributes; // will convert itself to a string 1321 } 1322 1323 /* 1324 out += "\nSources: "; 1325 for (Iterator i = (sources.keySet()).iterator(); i.hasNext();) { 1326 out += " " + ((Sequence) i.next()).getID(); 1327 } 1328 // if (sources.isEmpty()) { out += "0"; } 1329 // else { out += sources.size(); } 1330 */ 1331 1332 out += "\nFeatures:\n"; 1333 if (numFeatures() == 0) { 1334 out += " null"; 1335 } else { 1336 for (Iterator i = features.iterator(); i.hasNext();) { 1337 out += " " + ((Feature) i.next()).toString(); 1338 } 1339 } 1340 out += "\n"; 1341 1342 return out; 1343 } 1344 1345 /* 1346 // allow the use of jython's built-in "+" function to add tracks 1347 public Track __add__(Track other) { 1348 Operation op = new Operation(other); 1349 Track left = (Track) clone(); 1350 return Operator.fxn_AND(left, op); 1351 } 1352 1353 // allow the use of jython's built-in "is" function to add tracks 1354 public Track is_(Track other) { 1355 Operation op = new Operation(other); 1356 Track left = (Track) clone(); 1357 return Operator.fxn_sAND(left, op); 1358 } 1359 1360 // allow the use of jython's built-in "-" function to add tracks 1361 public Track __sub__(Track other) { 1362 Operation op = new Operation(other); 1363 Track left = (Track) clone(); 1364 return Operator.fxn_MINUS(left, op); 1365 } 1366 */ 1367 1368 private static class FeatureMaxComparator implements Comparator { 1369 /** 1370 * Compares this object with the specified object for order. 1371 * Returns a negative integer, zero, or a positive integer as 1372 * this object is less than, equal to, or greater than the 1373 * specified object. If can't cast argument as an Feature, 1374 * then throws a java.lang.ClassCastException. If different 1375 * sources, then sorts on the source ID. 1376 */ 1377 public int compare(Object o1, Object o2) throws ClassCastException { 1378 // convert objects to Features 1379 Feature featureA; 1380 Feature featureB; 1381 try { 1382 featureA = (Feature) o1; 1383 featureB = (Feature) o2; 1384 } catch (ClassCastException e) { 1385 throw new ClassCastException("FeatureMaxComparator.compare() requires arguments of type Feature."); 1386 } 1387 1388 int source = featureA.getSourceID().compareTo(featureB.getSourceID()); 1389 if (source == 0) { // same source 1390 // who ever has max is less. 1391 if (featureA.getMax() < featureB.getMax()) { 1392 return -1; 1393 } else if (featureA.getMax() > featureB.getMax()) { 1394 return 1; 1395 } 1396 1397 // max are equal. 1398 if (featureA.getMin() < featureB.getMin()) { 1399 return -1; 1400 } else if (featureA.getMin() > featureB.getMin()) { 1401 return 1; 1402 } 1403 1404 if (GloDBUtils.ignoreAttributes()) { 1405 // don't use attributes to compare Features, so at 1406 // this point the Features are the same 1407 return 0; 1408 } else { 1409 // min and max are equal, so return the comparison of 1410 // the hashCodes for each of the attributes. If we 1411 // don't then when 2 Features overlap only one will be 1412 // included in the Track. 1413 Integer hashA = new Integer((featureA.getAttributes()).hashCode()); 1414 Integer hashB = new Integer((featureB.getAttributes()).hashCode()); 1415 return hashA.compareTo(hashB); 1416 } 1417 } else { 1418 // different sources so sort by source 1419 return source; 1420 } 1421 } 1422 } 1423 1424 } // Track.java 1425