001    /*
002     * CRIMSON
003     * Copyright (c) 2006, Stephen Fisher, Susan Davidson, and Junhyong Kim, 
004     * University of Pennsylvania.
005     *
006     * This program is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU General Public License as
008     * published by the Free Software Foundation; either version 2 of the
009     * License, or (at your option) any later version.
010     *
011     * This program is distributed in the hope that it will be useful, but
012     * WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019     * 02110-1301 USA.
020     *
021     * @(#)Tree.java
022     */
023    
024    package edu.upenn.crimson;
025    
026    import edu.upenn.crimson.io.*;
027    import java.util.HashSet;
028    import java.util.HashMap;
029    import java.util.ArrayList;
030    import java.util.TreeSet;
031    import java.util.Queue;
032    import java.util.LinkedList;
033    import java.util.Iterator;
034    import java.sql.SQLException;
035    
036    /**
037     * A rooted tree that is composed of Species objects.
038     *
039     * Species must be added to the tree using addSpecies() and similarly
040     * removed from the tree with removeSpecies().
041     *
042     * @XXX Should encapulate Species.add/removeChildren() in order to
043     * maintain a HshSet of all leaves/species.  This would optimize
044     * numSpecies/numLeaves and would allow for the temporary storage of
045     * min/max values (ie level and temporal depth).  These values could be
046     * stored when first computed and zeroed out when a species is
047     * added/removed from the tree.  Similarly, min/max Stem Lengths
048     * could be pre-computed.  Would need to also encapsulate setting of
049     * stem length and parent.
050     *
051     * @author  Stephen Fisher
052     * @version $Id: Tree.java,v 1.61 2009/07/21 01:19:12 fisher Exp $
053     */
054    
055    public class Tree implements Cloneable {
056             /** 
057              * If true, then whenever a tree is sampled, the list of species
058              * selected will be output to the file 'species.list'.
059              */
060             final static boolean DEBUG_OUTPUT = false;     
061    
062             /**
063              * Used in isUltrametric() to deal with machine error.  If
064              * (maxTempDepth() - minTempDepth() < MaxMinDIFF), consider it to
065              * be an ultrametric tree.
066              */
067             //      final static double MaxMinDIFF = 0.00001;      
068    
069             /** 
070              * This is a unique name for the Tree object. We don't allow the
071              * ID to change after a tree is created.
072              */
073             private String id = "";
074    
075             /** Model specific to this Tree object. */
076             private String modelID = "";
077    
078             /** Notes specific to this Tree object. */
079             private String notes = "";
080    
081             /** The root Species of the tree. */
082             private Species root = null;
083    
084             /** Array containing references to all species in the tree. */
085             private ArrayList speciesList = new ArrayList();
086    
087             /** 
088              * The number of species is equivalent to speciesList.size().  It
089              * is stored separately, in case the tree has not been built from
090              * the newick string.
091              */
092             private int numSpecies = -1;
093    
094             /** 
095              * The number of leaves is stored, in case the tree has not been
096              * built from the newick string.
097              */
098             private int numLeaves = -1;
099    
100             /** True when all inner nodes have exactly 2 children. */
101             private boolean binary = false;
102    
103             /** 
104              * True when all leaves have the same temporal depth:
105              * (minTempDepth = maxTempDepth)
106              */
107             private boolean ultrametric = false;
108    
109             /** 
110              * The minimum number of species between the leaves and root.
111              * This is inclusive of the root.  Thus a leaf that is a child of
112              * the root would have a level of 1.
113              */
114             private int minLevel = -1;
115    
116             /** 
117              * The maximum number of species between the leaves and root.
118              * This is inclusive of the root.  Thus a leaf that is a child of
119              * the root would have a level of 1.
120              */
121             private int maxLevel = -1;
122    
123             /** The minimum distance between the species and their parents. */
124             private double minStemLength = -1.0;
125    
126             /** The maximum distance between the species and their parents. */
127             private double maxStemLength = -1.0;
128    
129             /** The minimum distance between the leaves and root. */
130             private double minTempDepth = -1.0;
131    
132             /** The maximum distance between the leaves and root. */
133             private double maxTempDepth = -1.0;
134    
135             /** This contains the partitions loaded for this tree. */
136             private HashSet partitions = new HashSet();
137    
138             public Tree(String id) throws InvalidIDException {
139                      this(id, true);
140             }
141    
142             public Tree(String id, boolean addToPool) throws InvalidIDException {
143                      // if no ID, then error
144                      if (CrimsonUtils.isEmpty(id)) {
145                                    CrimsonUtils.printError("Invalid tree ID.");
146                                    throw new InvalidIDException("Tree ID can not be empty.");
147                      } else if (ObjectHandles.containsTree(id)) {
148                                    CrimsonUtils.printError("Tree ID already exists, much specify a different ID:" + id);
149                                    throw new InvalidIDException("Tree ID already exists.");
150                      }
151                      this.id = id.trim().toUpperCase();
152    
153                      // add the tree to the relevant lists
154                      if (addToPool) ObjectHandles.addTree(this);
155             }
156    
157             /** 
158              * Trees should not be instantiated by the user, using this
159              * routine.  It is used to load trees from the database.
160              */
161             public Tree(String id, Species root, Model model, String notes, int numSpecies, int numLeaves,
162                                             boolean binary, boolean ultrametric, int minLevel, int maxLevel, 
163                                             double minTempDepth, double maxTempDepth, double minStemLength, double maxStemLength) 
164                      throws InvalidIDException {
165                      // if no ID, then error
166                      if (CrimsonUtils.isEmpty(id)) {
167                                    CrimsonUtils.printError("Invalid tree ID.");
168                                    throw new InvalidIDException("Tree ID can not be empty.");
169                      } else if (ObjectHandles.containsTree(id)) {
170                                    CrimsonUtils.printError("Tree ID already exists, much specify a different ID:" + id);
171                                    throw new InvalidIDException("Tree ID already exists.");
172                      }
173    
174                      this.id = id.trim().toUpperCase();
175                      this.root = root;
176                      this.notes = notes;
177    
178                      // the user should not be setting these values.  These are set
179                      // by computeStats(). This routine exists so that Trees can
180                      // set this when the tree is loaded from the database.
181                      this.numSpecies = numSpecies;
182                      this.numLeaves = numLeaves;
183                      this.binary = binary;
184                      this.ultrametric = ultrametric;
185                      this.minLevel = minLevel;
186                      this.maxLevel = maxLevel;
187                      this.minTempDepth = minTempDepth;
188                      this.maxTempDepth = maxTempDepth;
189                      this.minStemLength = minStemLength;
190                      this.maxStemLength = maxStemLength;
191    
192                      // we require a model object rather than an ID, to make sure
193                      // that the model really exists.
194                      if (model != null) this.modelID = model.getID();
195    
196                      // add the tree to the relevant lists
197                      ObjectHandles.addTree(this);
198             }
199    
200        //--------------------------------------------------------------------------
201        // Setters and Getters
202    
203        /** Get the ID. */
204        public String getID() { return id; }
205    
206             /** Returns the ID for the model underlying this tree. */
207             public String getModelID() { return modelID; }
208                      
209             /** 
210              * Sets the ID for the model underlying this tree. We require a
211              * model object rather than an ID, to make sure that the model
212              * really exists.
213              */
214             public void setModelID(Model model) { 
215                      if (model == null) return;
216    
217                      this.modelID = model.getID();
218    
219                      // update the TREES database
220                      if (! Trees.dbContains(id)) {
221                                    CrimsonUtils.printError("Tree (" + id + ") not in database, tree model ID not saved to database.");
222                                    return;
223                      }
224                                    
225                      // UPDATE trees SET model_id = modelID WHERE id = id
226                      String sql = "UPDATE trees SET model_id = '" + modelID.toUpperCase() + "' WHERE id = '" + id.toUpperCase() + "'";
227                      if (! Database.execUpdate(sql))
228                                    CrimsonUtils.printError("Error updating model_id in tree " + id + ".");
229             }
230             
231             public String getNotes() { return this.notes; }
232    
233             /** Updates the local notes field and the TREES table. */
234             public void setNotes(String notes) { 
235                      this.notes = notes;
236    
237                      if (! Trees.dbContains(id)) {
238                                    CrimsonUtils.printError("Tree (" + id + ") not in database, notes not saved to database.");
239                                    return;
240                      }
241                      
242                      // UPDATE trees SET notes = notes WHERE id = id
243                      String sql = "UPDATE trees SET notes = '" + notes + "' WHERE id = '" + id.toUpperCase() + "'";
244                      if (! Database.execUpdate(sql))
245                                    CrimsonUtils.printError("Error updating notes in " + id + ".");
246             }
247    
248             public Species getRoot() { return root; }
249    
250             /** 
251              * The root's label will be set to "_Root" if there is currently
252              * no label.  The root will also be added to the species list.
253              */
254             public void setRoot(Species root) { 
255                      if ((root != null) && CrimsonUtils.isEmpty(root.getID())) root.setID("_Root");
256                      this.root = root; 
257             }
258    
259             /** Returns the list of all species in the tree. */
260             public ArrayList getSpeciesList() { return speciesList; }
261    
262             /** This will return the set of partitions. */
263             public HashSet getPartitions() { return partitions; }
264    
265             /** This will return the number of species in the tree. */
266             public int getNumSpecies() { return numSpecies; }
267    
268             /** This will return the number of leaves in the tree. */
269             public int getNumLeaves() { return numLeaves; }
270    
271             public int getMinLevel() { return minLevel; }
272    
273             public boolean isBinary() { return binary; }
274    
275             public boolean isUltrametric() { return ultrametric; }
276    
277             public int getMaxLevel() { return maxLevel; }
278    
279             public double getMinStemLength() { return minStemLength; }
280    
281             public double getMaxStemLength() { return maxStemLength; }
282    
283             public double getMinTempDepth() { return minTempDepth; }
284    
285             public double getMaxTempDepth() { return maxTempDepth; }
286    
287        //--------------------------------------------------------------------------
288        // Miscellaneous Methods
289    
290             /** 
291              * Returns the newick string for this tree, as stored in the
292              * database.  This is retrieved from the database as needed to
293              * conserve RAM, instead of storing it as a string in the Tree
294              * object.  
295              */
296             public String getNewick() { 
297                      String sql = "SELECT newick FROM TREES WHERE id = '" + id.toUpperCase() + "'";
298                      ArrayList out = Database.sqlToArray(sql);
299    
300                      // if the query failed then exit
301                      if (CrimsonUtils.isEmpty(out)) {
302                              // tree not in database, so try and compute newick
303                              String newick = toNewick();
304                              if (CrimsonUtils.isEmpty(newick)) {
305                                      CrimsonUtils.printError("Tree (" + id + ") not in database and unable to compute newick string.");
306                                      return "";
307                              } else {
308                                      return newick;
309                              }
310                      }
311                                    
312                      try {
313                                    String newick = Database.readClob(out.get(0));
314                                    if (CrimsonUtils.isEmpty(newick)) {
315                                             // no newick string in the database, so try to compute
316                                             // it from the tree
317                                             newick = toNewick();
318                                             
319                                             if (CrimsonUtils.isEmpty(newick)) {
320                                                      CrimsonUtils.printError("No newick string stored in database and unable to compute newick string.");
321                                                      return "";
322                                             }
323                                    }
324                                    return newick;
325                      } catch (SQLException e) { 
326                                    CrimsonUtils.printError("SQL error reading Newick CLOB from TREES.");
327                                    CrimsonUtils.printError(e.getMessage());
328                                    return "";
329                      }
330             }
331    
332             /** This will update the newick string stored in the database. */
333             public void setNewick(String newick) { 
334                      if (! Trees.dbContains(id)) {
335                                    CrimsonUtils.printError("Tree (" + id + ") not in database, newick not updated.");
336                                    return;
337                      }
338                      
339                      // UPDATE trees SET newick = newick WHERE id = id
340                      String sql = "UPDATE trees SET newic = '" + newick + "' WHERE id = '" + id.toUpperCase() + "'";
341                      if (! Database.execUpdate(sql))
342                                    CrimsonUtils.printError("Error updating newick in " + id + ".");
343             }
344    
345             /** Returns true if the tree has been built from the newick string. */
346             public boolean isBuilt() {
347                      if (root != null) return true;
348                      else return false;
349             }
350    
351             /** This will build the tree from the nexick string. */
352             public boolean buildTree() { 
353                      if (isBuilt()) return true;
354    
355                      CrimsonUtils.printMsg("Building tree.");
356                      NexusFile.parseNewick(this); 
357    
358                      CrimsonUtils.printMsg("Parsed Newick.");
359    
360                      // if root not set, then can't build tree
361                      if (root == null) return false;
362    
363                      // set the temporal depth and level for the tree.
364                      Species species;
365                      Queue queue = new LinkedList();
366    
367                      // start with the root
368                      queue.offer(root);
369    
370                      CrimsonUtils.printMsg("Computing tree stats.");
371    
372                      while ((species = (Species) queue.poll()) != null) {
373                                    species.computeLevel();
374                                    species.computeTempDepth();
375    
376                                    // add children to the queue
377                                    queue = addChildrenToQueue(queue, species.getChildren());
378                      }
379    
380                      return true;
381             }
382    
383             /** This will add the children to the queue. */
384             private Queue addChildrenToQueue(Queue queue, Species[] children) {
385                      if (children != null) {
386                                    for (int i = 0, n = children.length; i < n; i++) {
387                                             queue.offer(children[i]);
388                                    }
389                      }
390                      return queue;
391             }
392    
393             /** 
394              * This will release the tree structure from memory.  This can be
395              * used to free up memory after large trees are built but the
396              * built structure is no longer needed.  
397              */
398             public void clearStructure() {
399                      // remove links to the Species objects created
400                      root = null;
401                      speciesList.clear();
402                      
403                      // run the garbage collector to remove the Species objects
404                      CrimsonMain.runGC();
405             }
406    
407             public void addSpecies(Species species) { 
408                      speciesList.add(species); 
409             }
410    
411             public void removeSpecies(Species species) { 
412                      speciesList.remove(species); 
413             }
414    
415             /** 
416              * This will take a list of species IDs and return a list of
417              * species objects. 
418              */
419             public ArrayList getSpeciesByID(ArrayList speciesIn) {
420                      ArrayList speciesOut = new ArrayList();
421                      if (! CrimsonUtils.isEmpty(speciesIn)) {
422                                    for (int i = 0; i < speciesIn.size(); i++) {
423                                             Species species = getSpeciesByID((String) speciesIn.get(i));
424                                             if (species == null) {
425                                                      CrimsonUtils.printWarning("Ignoring invalid species ID: " + speciesIn.get(i));
426                                             } else {
427                                                      speciesOut.add(species);
428                                             }
429                                    }
430                      }
431    
432                      return speciesOut;
433             }
434    
435             /** 
436              * This steps through every species in the tree and compares the
437              * species IDs until it finds the species requested. 
438              */
439             public Species getSpeciesByID(String id) {
440                      if (! CrimsonUtils.isEmpty(id)) {
441                                    for (int i = 0, n = speciesList.size(); i < n; i++) {
442                                             Species species = (Species) speciesList.get(i);
443                                             if (species.getID().equals(id)) return species;
444                                    }
445                      }
446    
447                      return null;
448             }
449    
450             /** This will compute the various tree statistics. */
451             public void computeStats() { 
452                      // if root has a value then we've parsed the newick string
453                      if (! buildTree()) {
454                                    CrimsonUtils.printError("Can't computer stats because no valid tree.");
455                                    return;
456                      }
457                      
458                      numSpecies = speciesList.size();
459                      numLeaves = 0;
460    
461                      minLevel = Integer.MAX_VALUE;
462                      maxLevel = 0;
463                      minStemLength = Double.MAX_VALUE;
464                      maxStemLength = 0.0;
465                      minTempDepth = Double.MAX_VALUE;
466                      maxTempDepth = 0.0;
467                      double minNonZeroSL = Double.MAX_VALUE;
468                      for (int i = 0, n = numSpecies; i < n; i++) {
469                                    Species species = (Species) speciesList.get(i);
470                                    if (species.isLeaf()) {
471                                             numLeaves++;
472                                             if (species.getLevel() < minLevel) minLevel = species.getLevel();
473                                             if (species.getLevel() > maxLevel) maxLevel = species.getLevel();
474                                             if (species.getStemLength() < minStemLength) minStemLength = species.getStemLength();
475                                             if (species.getStemLength() > maxStemLength) maxStemLength = species.getStemLength();
476                                             if (species.getTempDepth() < minTempDepth) minTempDepth = species.getTempDepth();
477                                             if (species.getTempDepth() > maxTempDepth) maxTempDepth = species.getTempDepth();
478                                             if (species.getStemLength() < minNonZeroSL) {
479                                                      if (species.getStemLength() > 0.0) minNonZeroSL = species.getStemLength();
480                                             }
481                                    }
482                      }
483    
484                      // compute binary
485                      binary = computeBinary();
486    
487                      // to compute ultrametric, we use the minimum non-zero stem
488                      //length multiplied by 0.1 as the determination of the number
489                      //of decimal places for comparison of the doubles.
490                      ultrametric = (maxTempDepth - minTempDepth < (minNonZeroSL * 0.1));
491                      // ultrametric = (maxTempDepth - minTempDepth < MaxMinDIFF);
492             }
493    
494             /** Returns true if the tree is binary. */
495             public boolean computeBinary() { 
496                      if (! buildTree()) return false;
497    
498                      Species species;
499                      Queue queue = new LinkedList();
500    
501                      // start with the root
502                      queue.offer(root);
503    
504                      while ((species = (Species) queue.poll()) != null) {
505                                    if (species.isBinary()) {
506                                             // we are binary, so add children to the queue
507                                             queue = addChildrenToQueue(queue, species.getChildren());
508                                    } else {
509                                             // we have either 1 or more than 2 children, so not binary
510                                             return false;
511                                    }
512                      }
513    
514                      return true;
515             } 
516    
517             /** Returns the set of all leaves in the tree. */
518             public ArrayList getLeaves() { 
519                      if (! buildTree()) return null;
520    
521                      ArrayList leaves = new ArrayList();
522                      for (int i = 0, n = speciesList.size(); i < n; i++) {
523                                    Species species = (Species) speciesList.get(i);
524                                    if (species.isLeaf()) leaves.add(species);
525                      }
526    
527                      return leaves;
528             }
529    
530             /** This will duplicate the tree and all species. */
531             public Object clone() {
532                      Tree newTree = new Tree(id + "_" + Long.toString(Math.abs(CrimsonUtils.random.nextLong())), false);
533    
534                      Species species;
535                      Species newSpecies;
536                      Queue queue = new LinkedList();
537    
538                      // start with the root
539                      queue.offer(root);
540                      
541                      while ((species = (Species) queue.poll()) != null) {
542                                    newSpecies = species.copy(newTree);
543                                    if (newTree.getRoot() == null) newTree.setRoot(newSpecies);
544                                    
545                                    // add children to the queue
546                                    Species[] children = species.getChildren();
547                                    if (children != null) {
548                                             for (int i = 0; i < children.length; i++) {
549                                                      newSpecies.addChild(children[i]);
550                                                      queue.offer(children[i]);
551                                             }
552                                    }
553                      }
554    
555                      newTree.computeStats();
556                      return newTree;
557             }
558    
559            /** 
560             * This will print out this species and all children as a newick
561             * formatted tree. This is meant to be used for trees which are
562             * created by the application.  When trees are loaded into the
563             * system from a NEXUS file, their newick structure is stored in
564             * the database and the method "newick()" can be used to reall the
565             * structure.  Trees that are created by a query do not have
566             * newick structures and thus this routine can be used to create
567             * that structure. The inner sequence IDs will be included and the
568             * tree will be rooted.
569             * @return returns newick string
570             */
571            public String toNewick() { return toNewick(true, true); }
572    
573            /** 
574             * This will print out this species and all children as a newick
575             * formatted tree.  The tree will be rooted. This is meant to be
576             * used for trees which are created by the application.  When
577             * trees are loaded into the system from a NEXUS file, their
578             * newick structure is stored in the database and the method
579             * "newick()" can be used to recall the structure.  Trees that are
580             * created by a query do not have newick structures and thus this
581             * routine can be used to create that structure. 
582             * @param incSequence when true, inner sequence IDs will be
583             * included in the newick output
584             * @return returns newick string
585             */
586            public String toNewick(boolean incSequence) { return toNewick(incSequence, true); }
587    
588            /** 
589             * This will output this species and all children as a newick
590             * formatted tree. This is meant to be used for trees which are
591             * created by the application.  When trees are loaded into the
592             * system from a NEXUS file, their newick structure is stored in
593             * the database and the method "newick()" can be used to recall
594             * the structure.  Trees that are created by a query do not have
595             * newick structures and thus this routine can be used to create
596             * that structure. 
597             * @param incSequence when true, inner sequence IDs will be
598             * included in the newick output
599             * @param rooted when true, the tree will rooted
600             * @return returns newick string
601             */
602            public String toNewick(boolean incSequence, boolean rooted) { 
603                    // if root has a value then we've parsed the newick string
604                    if (! buildTree()) {
605                            CrimsonUtils.printError("Can't print newick because no valid tree.");
606                            return "";
607                    }
608                    
609                    // reset all children counters
610                    for (int i = 0; i < numSpecies; i++) {
611                            ((Species) speciesList.get(i)).resetChildIndex();
612                    }
613                    
614                    Species species = root;
615                    
616                    // use a StringBuffer sized to approximately fit the entire
617                    // string to facilitate appending to the string.
618                    StringBuffer out = new StringBuffer(numSpecies*10);
619                    
620                    int fdbkVal = numSpecies / 10;
621                    if (fdbkVal == 0) fdbkVal = 1;
622                    int fdbkInc = 1;
623                    CrimsonUtils.printMsg("Processing newick: .", false);
624                    while (species != null) {
625                            if ((fdbkInc++ % fdbkVal) == 0) CrimsonUtils.printMsg(".", false);
626                            
627                            if (species.hasNextChild()) {
628                                    // process next child
629                                    if (species.firstChild()) out.append("(");
630                                    else out.append(",");
631                                    species = species.nextChild();
632                                    
633                            } else {
634                                    // no more children so add species and then move up
635                                    // the tree
636                                    if (species.isLeaf()) out.append(species.getID() + ":" + species.getStemLength());
637                                    else {
638                                            if (incSequence) out.append(")" + species.getID() + ":" + species.getStemLength());
639                                            // else out.append("):" + species.getStemLength());
640                                            else {
641                                                    if (rooted) {
642                                                            out.append("):" + species.getStemLength());
643                                                    } else {
644                                                            // not rooted so only include branch
645                                                            // length if species has a parent
646                                                            if (species.getParent() != null) out.append("):" + species.getStemLength());
647                                                            else out.append(")");
648                                                    }
649                                            }
650                                    }
651                                    species = species.getParent();
652                            }
653                    }
654                    CrimsonUtils.printMsg("");
655                    return out.toString();
656            }
657            
658             /** 
659              * This will print out the tree, one species per line, indenting
660              * each level.  This is meant to be used as a debugging tool and
661              * is not meant to be used to print out large trees.
662              * @XXX This will give a stack overflow for deep trees.
663              */
664             public String printTree() { 
665                      // if root has a value then we've parsed the newick string
666                      if (! buildTree()) {
667                                    CrimsonUtils.printError("Can't print tree because no valid tree.");
668                                    return "";
669                      }
670                      
671                      return root.printTree(""); 
672             }
673    
674             /** This will print lots of specs about the tree.  */
675             public String toString() {
676                      String out = "";
677    
678                      out += "Tree ID: " + id + "\n";
679                      out += "Num Leaves: " + numLeaves + "\n";
680                      out += "Num Species: " + numSpecies + "\n";
681                      out += "Ultrametric: " + isUltrametric() + "\n";
682                      out += "Binary: " + isBinary() + "\n";
683                      out += "Level: [" + minLevel + ", " + maxLevel + "]\n";
684                      out += "Stem Length: [" + minStemLength + ", " + maxStemLength + "]\n";
685                      out += "Temporal Depth: [" + minTempDepth + ", " + maxTempDepth + "]\n";
686    
687                      if (partitions.size() > 0) {
688                                    out += "Partitions (" + partitions.size() + "): ";
689                                    for (Iterator i = partitions.iterator(); i.hasNext();) {
690                                             out += "\n\t" + (Partition) i.next();
691                                    }
692                      } else {
693                                    out += "Partitions: None\n";
694                      }
695    
696                      return out;
697             }
698    
699        //--------------------------------------------------------------------------
700        // Partition Related Methods
701    
702             /** The number of partitions loaded for this tree. */
703             public int getNumPartitions() { return partitions.size(); }
704    
705             /** This is the sum of the length of all partitions. */
706             public int getLength() { 
707                      int len = 0;
708                      for (Iterator i = partitions.iterator(); i.hasNext();) {
709                                    len += ((Partition) i.next()).getLength();
710                      }
711                      return len; 
712             }
713    
714             public void addPartition(Partition partition) {
715                      partitions.add(partition);
716             }
717    
718             public void removePartition(Partition partition) {
719                      partitions.remove(partition);
720             }
721    
722             /**
723              * This will return an ArrayList containing all of the partitions
724              * in the current tree that are based on the specified model.
725              */
726             public ArrayList getPartitionsByModel(String modelID) {
727                      ArrayList out = new ArrayList();
728    
729                      for (Iterator i = partitions.iterator(); i.hasNext();) {
730                                    Partition partition = (Partition) i.next();
731                                    if (partition.getModelID().equalsIgnoreCase(modelID)) {
732                                             out.add(partition);
733                                    }
734                      }
735    
736                      return out;
737             }
738    
739        //--------------------------------------------------------------------------
740        // These methods will modify the tree or return subtrees
741    
742             /** 
743              * This will remove all internal species that have only one child,
744              * linking the child to species' parent and adjusting the child's
745              * stem lengths accordingly.  
746              */
747             public void collapse() { 
748                      for (Iterator i = speciesList.iterator(); i.hasNext();) {
749                                    // if collapsed then remove the species from speciesList
750                                    if (((Species) i.next()).collapse()) i.remove();
751                      }
752             }
753    
754             /**
755              * This will return a list of leaf counts for each subtree
756              * specified by the subroots.  This can be used in conjunction
757              * with getTreesByTempDepth() and getTreesByLevel() to get the
758              * leaf counts for subtrees below a threshold value.
759              */
760             public ArrayList getLeafCounts(ArrayList subroots) {
761                      if ((subroots == null) || (subroots.size() == 0)) return subroots;
762    
763                      ArrayList counts = new ArrayList();
764    
765                      for (Iterator i = subroots.iterator(); i.hasNext();) {
766                                    // get the subtree for the subroot
767                                    Tree subtree = getSubtree((Species) i.next());
768    
769                                    // add count to output
770                                    counts.add(new Integer(subtree.getNumLeaves()));
771                      }
772    
773                      return counts;
774             }
775    
776             /** 
777              * Returns the subtree given the specified root. This will not
778              * create a unique Tree but instead will contain the species
779              * objects from the original tree.  The trees returned will not be
780              * added to the tree pool and are meant to be used as temporary
781              * trees.  If a unique trees are needed, then use Tree.clone().
782              */
783             public Tree getSubtree(Species subroot) {
784                      if (subroot == null) return null;
785                      
786                      // these trees are not added to the treePool
787                      Tree newTree = new Tree(id + "_" + subroot.getID(), false);
788    
789                      // set the root for the new tree
790                      newTree.setRoot(subroot);
791                                    
792                      Species species;
793                      Queue queue = new LinkedList();
794    
795                      // start with this root
796                      queue.offer(subroot);
797    
798                      while ((species = (Species) queue.poll()) != null) {
799                                    // register species with new tree
800                                    newTree.addSpecies(species);
801                                    
802                                    // add children to the queue
803                                    addChildrenToQueue(queue, species.getChildren());
804                      }
805    
806                      // the subtree is already built - it inherits this from the
807                      // parent. 
808                      newTree.computeStats();
809                      return newTree;
810             }
811    
812             /** 
813              * Returns the set of root species for any subtrees that are below
814              * a specified level.
815              */
816             public ArrayList getTreesByLevel(int level) {
817                      // list of roots of the subtrees
818                      ArrayList roots = new ArrayList();
819    
820                      // no trees
821                      if (level > maxLevel) return roots;
822    
823                      Species species;
824                      Queue queue = new LinkedList();
825    
826                      // start with the root
827                      queue.offer(root);
828    
829                      while ((species = (Species) queue.poll()) != null) {
830                                    if (species.getLevel() >= level) {
831                                             // add species as root.  don't add it's children to
832                                             // the queue, if it's an inner node.
833                                             roots.add(species);
834                                    } else {
835                                             // the species isn't deep enought, so add any children
836                                             // to queue
837                                             queue = addChildrenToQueue(queue, species.getChildren());
838                                    }
839                      }
840    
841                      return roots;
842             }
843    
844             /** 
845              * Returns the random, uniform sampling of species from each
846              * subtree demarkated by the level value. The number of species
847              * included from each subtree will be 'num' or the total number of
848              * leaves in each subtree if a subtree has less than 'num' leaves.
849              */
850             public Tree uniformSampleByLevel(int level, int num) {
851                      if (! buildTree()) {
852                                    CrimsonUtils.printError("Can't sample tree because no valid tree.");
853                                    return null;
854                      }
855                      
856                      // get the list of roots for each subtree that matches
857                      ArrayList subroots = getTreesByLevel(level);
858    
859                      if (subroots.size() == 0) {
860                                    CrimsonUtils.printError("Sampling returned no valid trees.");
861                                    return null;
862                      }
863    
864                      // this will be the list of leaves that have been selected
865                      // from the various subtrees
866                      ArrayList leaves = new ArrayList();
867    
868                      // compute the number of leaves to select, per subtree
869                      int perSubtree = num / subroots.size();
870                      int remainder = num % subroots.size();
871    
872                      // pick leaves
873                      for (int i = 0, n = subroots.size(); i < n; i++) {
874                                    // randomly select a subtree (ie subroot) from the array
875                                    int index = CrimsonUtils.random.nextInt(subroots.size());
876    
877                                    // using the subroot, get the subtree
878                                    Tree subtree = getSubtree((Species) subroots.get(index));
879    
880                                    // remove the subtree from the array so we don't pick it again
881                                    subroots.remove(index);
882    
883                                    int numLeaves = subtree.getNumLeaves();
884    
885                                    // if less leaves in subtree than being sampled, then we
886                                    // need to add the entire subtree, otherwise, we need to
887                                    // randomly sample the subtree.
888                                    if (numLeaves > perSubtree) {
889                                             int leavesNeeded;
890                                             if (remainder > 0) {
891                                                      // spread remainder out over subtrees. if
892                                                      // subtree.numLeaves is less than perSubtree + 1,
893                                                      // then the remainder will be skipped. this will
894                                                      // result in 1 less leaf selected.
895                                                      leavesNeeded = perSubtree + 1;
896                                                      remainder--;
897                                             } else {
898                                                      // if more subtrees than 'num', we won't be
899                                                      // sampling from every subtree
900                                                      if (perSubtree > 0) {
901                                                                    leavesNeeded = perSubtree;
902                                                      } else {
903                                                                    // if we get to this point then we'd fulfilled
904                                                                    // the sampling requirements, even though we
905                                                                    // haven't sampled from all the subtrees.
906                                                                    break;
907                                                      }
908                                             }
909                                             ArrayList subLeaves = subtree.getLeaves();
910                                             for (int l = 0; l < leavesNeeded; l++) {
911                                                      // randomly select a leaf from the array
912                                                      int iLeaf = CrimsonUtils.random.nextInt(subLeaves.size());
913    
914                                                      // add leaf to output array
915                                                      leaves.add(subLeaves.get(iLeaf));
916    
917                                                      // remove the subtree from the array so we don't pick it again
918                                                      subLeaves.remove(iLeaf);
919                                             }
920                                    } else {
921                                             // add all leaves
922                                             leaves.addAll(subtree.getLeaves());
923                                    }
924                      }
925    
926                      return manualSampleByID(leaves);
927             }
928    
929    
930             /** 
931              * Returns the random, weighted sampling of species from each
932              * subtree demarkated by the level value.  The number of leaves
933              * included from each subtree is a function of the total number of
934              * leaves requested and the relative number of leaves in each
935              * subtree.  This is computed by pooling all leaves from all
936              * subtrees and then randomly selecting leaves from the pool.
937              */
938             public Tree weightedSampleByLevel(int level, int num) {
939                      if (! buildTree()) {
940                                    CrimsonUtils.printError("Can't sample tree because no valid tree.");
941                                    return null;
942                      }
943                      
944                      ArrayList subtrees = getTreesByLevel(level);
945    
946                      if (subtrees.size() == 0) {
947                                    CrimsonUtils.printError("Sampling returned no valid trees.");
948                                    return null;
949                      }
950    
951                      // create leaf pool, the total set of all leaves from all
952                      // subtrees
953                      ArrayList leafPool = new ArrayList();
954                      for (Iterator i = subtrees.iterator(); i.hasNext();) {
955                                    // get the subtree for the subroot
956                                    Tree subtree = getSubtree((Species) i.next());
957    
958                                    // add leaves to pool
959                                    leafPool.addAll(subtree.getLeaves());
960                      }
961    
962                      // randomly sample from the leaf pool, creating the list of
963                      // selected leaves
964                      ArrayList leaves = new ArrayList();
965                      if (num >= leafPool.size()) {
966                                    // add all leaves
967                                    for (Iterator i = leafPool.iterator(); i.hasNext();) {
968                                             // add leaf IDs
969                                             leaves.add(i.next());
970                                    }
971                      } else {
972                                    for (int i = 0; i < num; i++) {
973                                             // randomly select a species from the leaf pool
974                                             int index = CrimsonUtils.random.nextInt(leafPool.size());
975                                             Species species = (Species) leafPool.get(index);
976                                             
977                                             // add the ID of the random species to the list
978                                             leaves.add(species);
979                                             
980                                             // remove the random selection from the pool
981                                             leafPool.remove(index);
982                                    }
983                      }
984                      return manualSampleByID(leaves);
985             }
986    
987             /** 
988              * Returns the set of root species for any subtrees that are below
989              * a specified level.
990              */
991             public ArrayList getTreesByTempDepth(double tempDepth) {
992                      // list of roots of the subtrees
993                      ArrayList roots = new ArrayList();
994    
995                      // no trees
996                      if (tempDepth > maxTempDepth) return roots;
997    
998                      Species species;
999                      Queue queue = new LinkedList();
1000    
1001                      // start with the root
1002                      queue.offer(root);
1003    
1004                      while ((species = (Species) queue.poll()) != null) {
1005                                    if (species.getTempDepth() >= tempDepth) {
1006                                             // add species as root.  don't add it's children to
1007                                             // the queue, if it's an inner node.
1008                                             roots.add(species);
1009                                    } else {
1010                                             // the species isn't deep enough, so add any children
1011                                             // to queue
1012                                             queue = addChildrenToQueue(queue, species.getChildren());
1013                                    }
1014                      }
1015    
1016                      return roots;
1017             }
1018    
1019             /** 
1020              * Returns the random, uniform sampling of species from each
1021              * subtree demarkated by the temporal depth value. The number of
1022              * species included from each subtree will be 'num' or the total
1023              * number of leaves in each subtree if a subtree has less than
1024              * 'num' leaves.
1025              */
1026             public Tree uniformSampleByTempDepth(double tempDepth, int num) {
1027                      if (! buildTree()) {
1028                                    CrimsonUtils.printError("Can't sample tree because no valid tree.");
1029                                    return null;
1030                      }
1031                      
1032                      // get the list of roots for each subtree that matches
1033                      ArrayList subroots = getTreesByTempDepth(tempDepth);
1034    
1035                      if (subroots.size() == 0) {
1036                                    CrimsonUtils.printError("Sampling returned no valid trees.");
1037                                    return null;
1038                      }
1039    
1040                      // this will be the list of leaves that have been selected
1041                      // from the various subtrees
1042                      ArrayList leaves = new ArrayList();
1043    
1044                      // compute the number of leaves to select, per subtree
1045                      int perSubtree = num / subroots.size();
1046                      int remainder = num % subroots.size();
1047    
1048                      // pick leaves
1049                      for (int i = 0, n = subroots.size(); i < n; i++) {
1050                                    // randomly select a subtree (ie subroot) from the array
1051                                    int index = CrimsonUtils.random.nextInt(subroots.size());
1052    
1053                                    // using the subroot, get the subtree
1054                                    Tree subtree = getSubtree((Species) subroots.get(index));
1055    
1056                                    // remove the subroot from the array so we don't pick it again
1057                                    subroots.remove(index);
1058    
1059                                    int numLeaves = subtree.getNumLeaves();
1060    
1061                                    // if less leaves in subtree than being sampled, then we
1062                                    // need to add the entire subtree, otherwise, we need to
1063                                    // randomly sample the subtree.
1064                                    if (numLeaves > perSubtree) {
1065                                             int leavesNeeded;
1066                                             if (remainder > 0) {
1067                                                      // spread remainder out over subtrees. if
1068                                                      // subtree.numLeaves is less than perSubtree + 1,
1069                                                      // then the remainder will be skipped. this will
1070                                                      // result in 1 less leaf selected.
1071                                                      leavesNeeded = perSubtree + 1;
1072                                                      remainder--;
1073                                             } else {
1074                                                      // if more subtrees than 'num', we won't be
1075                                                      // sampling from every subtree
1076                                                      if (perSubtree > 0) { 
1077                                                                    leavesNeeded = perSubtree;
1078                                                      } else {
1079                                                                    // if we get to this point then we'd fulfilled
1080                                                                    // the sampling requirements, even though we
1081                                                                    // haven't sampled from all the subtrees.
1082                                                                    break;
1083                                                      }
1084    
1085                                             }
1086                                             ArrayList subLeaves = subtree.getLeaves();
1087                                             for (int l = 0; l < leavesNeeded; l++) {
1088                                                      // randomly select a leaf from the array
1089                                                      int iLeaf = CrimsonUtils.random.nextInt(subLeaves.size());
1090    
1091                                                      // add leaf to output array
1092                                                      leaves.add(subLeaves.get(iLeaf));
1093    
1094                                                      // remove the subtree from the array so we don't pick it again
1095                                                      subLeaves.remove(iLeaf);
1096                                             }
1097                                    } else {
1098                                             // add all leaves
1099                                             leaves.addAll(subtree.getLeaves());
1100                                    }
1101                      }
1102    
1103                      return manualSampleByID(leaves);
1104             }
1105    
1106    
1107             /** 
1108              * Returns the random, weighted sampling of species from each
1109              * subtree demarkated by the temporal depth value.  The number of
1110              * leaves included from each subtree is a function of the total
1111              * number of leaves requested and the relative number of leaves in
1112              * each subtree.  This is computed by pooling all leaves from all
1113              * subtrees and then randomly selecting leaves from the pool.
1114              */
1115             public Tree weightedSampleByTempDepth(double tempDepth, int num) {
1116                      if (! buildTree()) {
1117                                    CrimsonUtils.printError("Can't sample tree because no valid tree.");
1118                                    return null;
1119                      }
1120                      
1121                      ArrayList subtrees = getTreesByTempDepth(tempDepth);
1122    
1123                      if (subtrees.size() == 0) {
1124                                    CrimsonUtils.printError("Sampling returned no valid trees.");
1125                                    return null;
1126                      }
1127    
1128                      // create leaf pool, the total set of all leaves from all
1129                      // subtrees
1130                      ArrayList leafPool = new ArrayList();
1131                      for (Iterator i = subtrees.iterator(); i.hasNext();) {
1132                                    // get the subtree for the subroot
1133                                    Tree subtree = getSubtree((Species) i.next());
1134    
1135                                    // add leaves to pool
1136                                    leafPool.addAll(subtree.getLeaves());
1137                      }
1138    
1139                      // randomly sample from the leaf pool, creating the list of
1140                      // selected leaves
1141                      ArrayList leaves = new ArrayList();
1142                      if (num >= leafPool.size()) {
1143                                    // add all leaves
1144                                    for (Iterator i = leafPool.iterator(); i.hasNext();) {
1145                                             // add leaf IDs
1146                                             leaves.add(i.next());
1147                                    }
1148                      } else {
1149                                    for (int i = 0; i < num; i++) {
1150                                             // randomly select a species from the leaf pool
1151                                             int index = CrimsonUtils.random.nextInt(leafPool.size());
1152                                             Species species = (Species) leafPool.get(index);
1153                                             
1154                                             // add the ID of the random species to the list
1155                                             leaves.add(species);
1156                                             
1157                                             // remove the random selection from the pool
1158                                             leafPool.remove(index);
1159                                    }
1160                      }
1161    
1162                      return manualSampleByID(leaves);
1163             }
1164    
1165             /** 
1166              * This will return a subtree that contains the specified leaves.
1167              * The tree returned will contain species that are copies of those
1168              * in the original tree.  The input for this method is an
1169              * ArrayList containing species IDs or species objects.  It takes
1170              * a significant amount of time to convert from species IDs to
1171              * species objects.
1172              */
1173             public Tree manualSampleByID(ArrayList oldLeaves) {
1174                      if (! buildTree()) {
1175                                    CrimsonUtils.printError("Can't sample tree because no valid tree.");
1176                                    return null;
1177                      }
1178                      
1179                      if ((oldLeaves == null) || (oldLeaves.size() == 0)) return null;
1180    
1181                      // if we have a list of species IDs, then we need to convert
1182                      // the IDs to objects before we begin.  This is a slow
1183                      // process.
1184                      Object objTest = oldLeaves.get(0);
1185                      if (objTest instanceof String) oldLeaves = getSpeciesByID(oldLeaves);
1186    
1187                      // give user some feedback
1188                      CrimsonUtils.printMsg("ManualSampleByID: .", false);
1189    
1190                      // this will output a list of all leaves to the file
1191                      // 'species.list'
1192                      if (DEBUG_OUTPUT) FileIO.printSpecies(new TreeSet(oldLeaves), true);
1193                      //              if (DEBUG_OUTPUT) FileIO.printLeaves(new TreeSet(oldLeaves));
1194    
1195                      Tree newTree = new Tree(id + "_" + Long.toString(Math.abs(CrimsonUtils.random.nextLong())), false);
1196    
1197                      // since the new tree contains copies of the species in the
1198                      // old tree, we need to store a reference to original species
1199                      // that is now the root of the new tree.
1200                      Species oldRoot = null;
1201    
1202                      // we maintain a map of the species IDs and references to the
1203                      // cloned species contained in the new tree.  These will
1204                      // facilitate adding new species.
1205                      HashMap newSpeciesMap = new HashMap();
1206                      HashSet newSpeciesSet = new HashSet();
1207    
1208                      // PROCESS FIRST SELECTION
1209    
1210                      // randomly select a leaf from the list 
1211                      Species oldLeaf = (Species) oldLeaves.get(0); 
1212    
1213                      if (oldLeaf == null) {
1214                                    CrimsonUtils.printError("Invalid species ID: " + oldLeaf.getID());
1215                                    return null;
1216                      }
1217    
1218                      // we don't want to muck with the old leaf but instead,
1219                      // will add a copy of the old leaf to the new tree.  The
1220                      // copy will have no children, parent, or level.  It will
1221                      // retain the original id and stem length.
1222                      Species newLeaf = (Species) oldLeaf.copy(newTree);
1223    
1224                      // maintain a mapping between spcies IDs and references to the
1225                      // species in the new tree (ie the clones). The contains()
1226                      // method is much slower with a Map than it is with a Set, so
1227                      // we maintain both a Map and a Set.  The memory cost is less
1228                      // significant than the temporal cost.
1229    
1230                      newSpeciesMap.put(newLeaf.getID(), newLeaf);
1231                      newSpeciesSet.add(newLeaf);
1232    
1233                      // this is the first species, so add it as root
1234                      newTree.setRoot(newLeaf);
1235    
1236                      // keep a reference to the original species within the
1237                      // original tree.
1238                      oldRoot = oldLeaf;
1239                      
1240                      // for simplicity, store the number of leaves to be included
1241                      int num = oldLeaves.size();
1242    
1243                      // use this to figure out when to print a "."
1244                      int inc = (num > 10) ? num / 10 : 1;
1245    
1246                      // PROCESS REMAINING SELECTIONS. Loop until all leaves have
1247                      // been sampled
1248                      for (int index = 1; index < num; index++) {
1249                                    // add "." every 10% completed
1250                                    if ((index % inc) == 0) CrimsonUtils.printMsg(".", false);
1251    
1252                                    // get next leaf from the list 
1253                                    oldLeaf = (Species) oldLeaves.get(index);
1254    
1255                                    if (oldLeaf == null) {
1256                                             CrimsonUtils.printError("Invalid species ID: " + oldLeaf.getID());
1257                                             return null;
1258                                    }
1259    
1260                                    // we don't want to muck with the old leaf but instead,
1261                                    // will add a copy of the old leaf to the new tree.  The
1262                                    // copy will have no children, parent, or level.  It will
1263                                    // retain the original id and stem length.
1264                                    newLeaf = (Species) oldLeaf.copy(newTree);
1265                                    // maintain a mapping between spcies IDs and references to
1266                                    // the species in the new tree (ie the clones)
1267                                    newSpeciesMap.put(newLeaf.getID(), newLeaf);
1268                                    newSpeciesSet.add(newLeaf);
1269    
1270                                    // since not the first leaf, we need to find the LCA
1271                                    // between the root of the new tree and the new leaf
1272                                    Species oldLCA = getLCA(oldRoot, oldLeaf);
1273                                    
1274                                    if (oldLCA == oldRoot) {
1275                                             // loop until we've propogated up the tree from the
1276                                             // leaf to the LCA.
1277                                             while (true) {
1278                                                      // get the parent 
1279                                                      Species oldParent = oldLeaf.getParent();
1280    
1281                                                      if (newSpeciesSet.contains(oldParent)) {
1282                                                                    // we found the LCA, so now we need to add the
1283                                                                    // new child as a child of the LCA
1284                                                                    
1285                                                                    // get a reference to the LCA in the new tree,
1286                                                                    // using the species ID
1287                                                                    Species newLCA = (Species) newSpeciesMap.get(oldParent.getID());
1288    
1289                                                                    // add the new child to the new LCA
1290                                                                    newLCA.addChild(newLeaf);
1291                                                                    
1292                                                                    break;
1293                                                      } else {
1294                                                                    // the inner node doesn't exist in the new tree,
1295                                                                    // so we will need to add it to the tree
1296    
1297                                                                    // create a copy of the parent to be added to
1298                                                                    // the new tree
1299                                                                    Species newParent = (Species) oldParent.copy(newTree);
1300    
1301                                                                    // add the cloned child to the cloned parent
1302                                                                    newParent.addChild(newLeaf);
1303    
1304                                                                    // now we need to update the parents
1305                                                                    newLeaf = newParent;
1306                                                                    oldLeaf = oldParent;
1307    
1308                                                                    // add the new parent to the newTree map
1309                                                                    newSpeciesMap.put(newParent.getID(), newParent);
1310                                                                    newSpeciesSet.add(newParent);
1311                                                      }
1312                                             }
1313    
1314                                    } else {
1315                                             // lca != nRoot: extend oldRoot and newLeaf to lca.
1316    
1317                                             // need to extend the root of the new tree to the LCA,
1318                                             // via it's parents in the original tree.  loop until
1319                                             // we've propogated up the tree from the NEW ROOT TO
1320                                             // THE LCA.
1321                                             while (true) {
1322                                                      // get the parent of the new root, from the
1323                                                      // original tree
1324                                                      Species oldParent = oldRoot.getParent();
1325    
1326                                                      // create a copy of the parent to be added to
1327                                                      // the new tree
1328                                                      Species newParent = (Species) oldParent.copy(newTree);
1329                                                      
1330                                                      // add the cloned child to the cloned parent
1331                                                      newParent.addChild(newTree.getRoot());
1332                                                      
1333                                                      // update root
1334                                                      oldRoot = oldParent;
1335                                                      newTree.setRoot(newParent);
1336                                                      
1337                                                      // since we added a new node to the tree, we need
1338                                                      // to update the newTree's HashMap
1339                                                      newSpeciesMap.put(newParent.getID(), newParent);
1340                                                      newSpeciesSet.add(newParent);
1341    
1342                                                      // if we found the LCA, then we're done, else
1343                                                      // continue adding parents
1344                                                      if (oldLCA == oldParent) break;
1345                                             }
1346    
1347                                             // need to extend the new leaf via it's parents up to
1348                                             // the LCA.  loop until we've propogated up the tree
1349                                             // from the NEW LEAF TO THE LCA.
1350                                             while (true) {
1351                                                      // get the parent 
1352                                                      Species oldParent = oldLeaf.getParent();
1353    
1354                                                      if (newSpeciesSet.contains(oldParent)) {
1355                                                                    // we found the LCA, so now we need to add the
1356                                                                    // new child as a child of the LCA
1357                                                                    
1358                                                                    // get a reference to the LCA in the new tree,
1359                                                                    // using the species ID
1360                                                                    Species newLCA = (Species) newSpeciesMap.get(oldParent.getID());
1361    
1362                                                                    // add the new child to the new LCA
1363                                                                    newLCA.addChild(newLeaf);
1364                                                                    
1365                                                                    break;
1366                                                      } else {
1367                                                                    // the inner node doesn't exist in the new tree,
1368                                                                    // so we will need to add it to the tree
1369    
1370                                                                    // create a copy of the parent to be added to
1371                                                                    // the new tree
1372                                                                    Species newParent = (Species) oldParent.copy(newTree);
1373    
1374                                                                    // add the cloned child to the cloned parent
1375                                                                    newParent.addChild(newLeaf);
1376    
1377                                                                    // now we can just need to deal with the 
1378                                                                    // parents
1379                                                                    newLeaf = newParent;
1380                                                                    oldLeaf = oldParent;
1381    
1382                                                                    // since we've now effectively added a new
1383                                                                    // node (newParent) to newTree, we need to
1384                                                                    // update the newTree's HashMap
1385                                                                    newSpeciesMap.put(newParent.getID(), newParent);
1386                                                                    newSpeciesSet.add(newParent);
1387                                                      }
1388                                             }
1389                                    }
1390                      }
1391    
1392                      // add CR-LF to end of "."
1393                      CrimsonUtils.printMsg("");
1394    
1395                      newTree.collapse();
1396                      newTree.computeStats();
1397                      return newTree;
1398             }
1399    
1400    
1401             /** 
1402              * This will randomly sample the tree, returning a subtree that
1403              * contains 'num' leaves.  The tree returned will contain species
1404              * that are copies of those in the original tree.
1405              */
1406             public Tree randomSample(int num) {
1407                      if (! buildTree()) {
1408                                    CrimsonUtils.printError("Can't sample tree because no valid tree.");
1409                                    return null;
1410                      }
1411                      
1412                      if (num < 1) return null;
1413    
1414                      // need the leaves as a list so we can more easily (randomly)
1415                      // select leaves
1416                      ArrayList oldLeaves = getLeaves();
1417    
1418                      // make sure enough leaves to fullfil request
1419                      if (oldLeaves.size() < num) {
1420                                    String msg = "Less leaves (" + String.valueOf(oldLeaves.size()) + ") ";
1421                                    msg += "in tree, than requested in tree sample (" + String.valueOf(num) + ")\n";
1422                                    CrimsonUtils.printWarning(msg);
1423    
1424                                    // since we're returning all leaves, we just need to clone the tree
1425                                    return (Tree) this.clone();
1426                      }
1427    
1428                      // give user some feedback
1429                      CrimsonUtils.printMsg("RandomSample: .", false);
1430    
1431                      //              Tree newTree = new Tree(id + "_" + String.valueOf(num));
1432                      Tree newTree = new Tree(id + "_" + Long.toString(Math.abs(CrimsonUtils.random.nextLong())), false);
1433    
1434                      // since the new tree contains copies of the species in the
1435                      // old tree, we need to store a reference to original species
1436                      // that is now the root of the new tree.
1437                      Species oldRoot = null;
1438    
1439                      // we maintain a map of the species IDs and references to the
1440                      // cloned species contained in the new tree.  These will
1441                      // facilitate adding new species.
1442                      HashMap newSpeciesMap = new HashMap();
1443                      HashSet newSpeciesSet = new HashSet();
1444    
1445                      // PROCESS FIRST SELECTION
1446    
1447                      // randomly select a leaf from the list 
1448                      int index = CrimsonUtils.random.nextInt(oldLeaves.size());
1449                      Species oldLeaf = (Species) oldLeaves.get(index);
1450    
1451                      // remove the leaf from the list of leaves.  This way we
1452                      // don't have to worry about selecting it again
1453                      oldLeaves.remove(index);
1454    
1455                      // we don't want to muck with the old leaf but instead,
1456                      // will add a copy of the old leaf to the new tree.  The
1457                      // copy will have no children, parent, or level.  It will
1458                      // retain the original id and stem length.
1459                      Species newLeaf = (Species) oldLeaf.copy(newTree);
1460    
1461                      // maintain a mapping between spcies IDs and references to the
1462                      // species in the new tree (ie the clones). The contains()
1463                      // method is much slower with a Map than it is with a Set, so
1464                      // we maintain both a Map and a Set.  The memory cost is less
1465                      // significant than the temporal cost.
1466    
1467                      newSpeciesMap.put(newLeaf.getID(), newLeaf);
1468                      newSpeciesSet.add(newLeaf);
1469    
1470                      // this is the first species, so add it as root
1471                      newTree.setRoot(newLeaf);
1472    
1473                      // keep a reference to the original species within the
1474                      // original tree.
1475                      oldRoot = oldLeaf;
1476                      
1477                      // use this to figure out when to print a "."
1478                      int inc = (num > 10) ? num / 10 : 1;
1479    
1480                      // PROCESS REMAINING SELECTIONS. Loop until all leaves have
1481                      // been sampled
1482                      for (int i = 1; i < num; i++) {
1483                                    // add "." every 10% completed
1484                                    if ((i % inc) == 0) CrimsonUtils.printMsg(".", false);
1485    
1486                                    // randomly select a leaf from the list 
1487                                    index = CrimsonUtils.random.nextInt(oldLeaves.size());
1488                                    oldLeaf = (Species) oldLeaves.get(index);
1489    
1490                                    // remove the leaf from the list of leaves.  This way we
1491                                    // don't have to worry about selecting it again
1492                                    oldLeaves.remove(index);
1493    
1494                                    // we don't want to muck with the old leaf but instead,
1495                                    // will add a copy of the old leaf to the new tree.  The
1496                                    // copy will have no children, parent, or level.  It will
1497                                    // retain the original id and stem length.
1498                                    newLeaf = (Species) oldLeaf.copy(newTree);
1499                                    // maintain a mapping between spcies IDs and references to
1500                                    // the species in the new tree (ie the clones)
1501                                    newSpeciesMap.put(newLeaf.getID(), newLeaf);
1502                                    newSpeciesSet.add(newLeaf);
1503    
1504                                    // since not the first leaf, we need to find the LCA
1505                                    // between the root of the new tree and the new leaf
1506                                    Species oldLCA = getLCA(oldRoot, oldLeaf);
1507                                    
1508                                    if (oldLCA == oldRoot) {
1509                                             // loop until we've propogated up the tree from the
1510                                             // leaf to the LCA.
1511                                             while (true) {
1512                                                      // get the parent 
1513                                                      Species oldParent = oldLeaf.getParent();
1514    
1515                                                      if (newSpeciesSet.contains(oldParent)) {
1516                                                                    // we found the LCA, so now we need to add the
1517                                                                    // new child as a child of the LCA
1518                                                                    
1519                                                                    // get a reference to the LCA in the new tree,
1520                                                                    // using the species ID
1521                                                                    Species newLCA = (Species) newSpeciesMap.get(oldParent.getID());
1522    
1523                                                                    // add the new child to the new LCA
1524                                                                    newLCA.addChild(newLeaf);
1525                                                                    
1526                                                                    break;
1527                                                      } else {
1528                                                                    // the inner node doesn't exist in the new tree,
1529                                                                    // so we will need to add it to the tree
1530    
1531                                                                    // create a copy of the parent to be added to
1532                                                                    // the new tree
1533                                                                    Species newParent = (Species) oldParent.copy(newTree);
1534    
1535                                                                    // add the cloned child to the cloned parent
1536                                                                    newParent.addChild(newLeaf);
1537    
1538                                                                    // now we need to update the parents
1539                                                                    newLeaf = newParent;
1540                                                                    oldLeaf = oldParent;
1541    
1542                                                                    // add the new parent to the newTree map
1543                                                                    newSpeciesMap.put(newParent.getID(), newParent);
1544                                                                    newSpeciesSet.add(newParent);
1545                                                      }
1546                                             }
1547    
1548                                    } else {
1549                                             // lca != nRoot: extend oldRoot and newLeaf to lca.
1550    
1551                                             // need to extend the root of the new tree to the LCA,
1552                                             // via it's parents in the original tree.  loop until
1553                                             // we've propogated up the tree from the NEW ROOT TO
1554                                             // THE LCA.
1555                                             while (true) {
1556                                                      // get the parent of the new root, from the
1557                                                      // original tree
1558                                                      Species oldParent = oldRoot.getParent();
1559    
1560                                                      // create a copy of the parent to be added to
1561                                                      // the new tree
1562                                                      Species newParent = (Species) oldParent.copy(newTree);
1563                                                      
1564                                                      // add the cloned child to the cloned parent
1565                                                      newParent.addChild(newTree.getRoot());
1566                                                      
1567                                                      // update root
1568                                                      oldRoot = oldParent;
1569                                                      newTree.setRoot(newParent);
1570                                                      
1571                                                      // since we added a new node to the tree, we need
1572                                                      // to update the newTree's HashMap
1573                                                      newSpeciesMap.put(newParent.getID(), newParent);
1574                                                      newSpeciesSet.add(newParent);
1575    
1576                                                      // if we found the LCA, then we're done, else
1577                                                      // continue adding parents
1578                                                      if (oldLCA == oldParent) break;
1579                                             }
1580    
1581                                             // need to extend the new leaf via it's parents up to
1582                                             // the LCA.  loop until we've propogated up the tree
1583                                             // from the NEW LEAF TO THE LCA.
1584                                             while (true) {
1585                                                      // get the parent 
1586                                                      Species oldParent = oldLeaf.getParent();
1587    
1588                                                      if (newSpeciesSet.contains(oldParent)) {
1589                                                                    // we found the LCA, so now we need to add the
1590                                                                    // new child as a child of the LCA
1591                                                                    
1592                                                                    // get a reference to the LCA in the new tree,
1593                                                                    // using the species ID
1594                                                                    Species newLCA = (Species) newSpeciesMap.get(oldParent.getID());
1595    
1596                                                                    // add the new child to the new LCA
1597                                                                    newLCA.addChild(newLeaf);
1598                                                                    
1599                                                                    break;
1600                                                      } else {
1601                                                                    // the inner node doesn't exist in the new tree,
1602                                                                    // so we will need to add it to the tree
1603    
1604                                                                    // create a copy of the parent to be added to
1605                                                                    // the new tree
1606                                                                    Species newParent = (Species) oldParent.copy(newTree);
1607    
1608                                                                    // add the cloned child to the cloned parent
1609                                                                    newParent.addChild(newLeaf);
1610    
1611                                                                    // now we can just need to deal with the 
1612                                                                    // parents
1613                                                                    newLeaf = newParent;
1614                                                                    oldLeaf = oldParent;
1615    
1616                                                                    // since we've now effectively added a new
1617                                                                    // node (newParent) to newTree, we need to
1618                                                                    // update the newTree's HashMap
1619                                                                    newSpeciesMap.put(newParent.getID(), newParent);
1620                                                                    newSpeciesSet.add(newParent);
1621                                                      }
1622                                             }
1623                                    }
1624                      }
1625    
1626                      // add CR-LF to end of "."
1627                      CrimsonUtils.printMsg("");
1628    
1629                      newTree.collapse();
1630                      newTree.computeStats();
1631    
1632                      // this will output a list of all leaves to the file
1633                      // 'leaf.list'
1634                      //              if (DEBUG_OUTPUT) FileIO.printSpecies(new TreeSet(newSpeciesSet), true);
1635                      if (DEBUG_OUTPUT) FileIO.printSpecies(newTree.getSpeciesList(), false);
1636                      if (DEBUG_OUTPUT) FileIO.printSpecies(newSpeciesSet, true);
1637    
1638                      return newTree;
1639             }
1640    
1641             /** 
1642              * This will return the LCA for all of the leaves in the input
1643              * set.
1644              */
1645             /*
1646             public Species getRootLCA(ArrayList leaves) {
1647                      if (! buildTree()) {
1648                                    CrimsonUtils.printError("Can't sample tree because no valid tree.");
1649                                    return null;
1650                      }
1651                      
1652                      if ((leaves == null) || (leaves.size() == 0)) return null;
1653    
1654                      // this will output a list of all leaves to the file
1655                      // 'species.list'
1656                      if (DEBUG_OUTPUT) FileIO.printSpecies(new TreeSet(newSpeciesSet), true);
1657                      //              if (DEBUG_OUTPUT) FileIO.printLeaves(new TreeSet(leaves));
1658    
1659                      // keep track of inner nodes processed
1660                      HashSet speciesSet = new HashSet();
1661    
1662                      // PROCESS FIRST SELECTION
1663    
1664                      // randomly select a leaf from the list 
1665                      String speciesID = (String) leaves.get(0); 
1666                      Species leaf = getSpeciesByID(speciesID);
1667                      if (leaf == null) {
1668                                    CrimsonUtils.printError("Invalid species ID: " + speciesID);
1669                                    return null;
1670                      }
1671                      
1672                      // this is the first species, so add it as root. since it's a
1673                      // leaf (and not an inner node) we don't need to add it to
1674                      // speciesSet.
1675                      Species root = leaf;
1676    
1677                      // for simplicity, store the number of leaves to be included
1678                      int num = leaves.size();
1679    
1680                      // use this to figure out when to print a "."
1681                      int inc = (num > 10) ? num / 10 : 1;
1682    
1683                      // give user some feedback
1684                      CrimsonUtils.printMsg("GetRootLCA: .", false);
1685    
1686                      // PROCESS REMAINING SELECTIONS. Loop until all leaves have
1687                      // been sampled
1688                      for (int index = 1; index < num; index++) {
1689                                    // add "." every 10% completed
1690                                    if ((index % inc) == 0) CrimsonUtils.printMsg(".", false);
1691    
1692                                    // get next leaf from the list 
1693                                    speciesID = (String) leaves.get(index); 
1694                                    leaf = getSpeciesByID(speciesID);
1695                                    if (leaf == null) {
1696                                             CrimsonUtils.printError("Invalid species ID: " + speciesID);
1697                                             return null;
1698                                    }
1699    
1700                                    // since not the first leaf, we need to find the LCA
1701                                    // between the root of the new tree and the new leaf
1702                                    Species lca = getLCA(root, leaf);
1703                                    
1704                                    if (lca == root) {
1705                                             // loop until we've propogated up the tree from the
1706                                             // leaf to the LCA.
1707                                             while (true) {
1708                                                      // get the parent 
1709                                                      Species parent = leaf.getParent();
1710    
1711                                                      if (speciesSet.contains(parent)) {
1712                                                                    break;  // found lca
1713                                                      } else {
1714                                                                    // this is a new inner node, so add it to set
1715                                                                    speciesSet.add(parent);
1716                                                      }
1717                                             }
1718                                    } else {
1719                                             // lca != root: extend root and leaf to lca.
1720    
1721                                             // need to extend the root to the LCA, via it's
1722                                             // parents in the tree.  loop until we've propogated
1723                                             // up the tree from the NEW ROOT TO THE LCA.
1724                                             while (true) {
1725                                                      // get the parent of the new root, from the
1726                                                      // original tree
1727                                                      Species parent = root.getParent();
1728                                                      
1729                                                      // update root
1730                                                      root = parent;
1731                                                      
1732                                                      // this is a new inner node, so add it to set
1733                                                      speciesSet.add(parent);
1734    
1735                                                      // if we found the LCA, then we're done, else
1736                                                      // continue adding parents
1737                                                      if (lca == parent) break;
1738                                             }
1739    
1740                                             // need to extend the new leaf via it's parents up to
1741                                             // the LCA.  loop until we've propogated up the tree
1742                                             // from the NEW LEAF TO THE LCA.
1743                                             while (true) {
1744                                                      // get the parent 
1745                                                      Species parent = leaf.getParent();
1746    
1747                                                      if (speciesSet.contains(parent)) {
1748                                                                    break;  // found lca
1749                                                      } else {
1750                                                                    // new inner node so add it to the set
1751                                                                    speciesSet.add(parent);
1752                                                      }
1753                                             }
1754                                    }
1755                      }
1756    
1757                      // add CR-LF to end of "."
1758                      CrimsonUtils.printMsg("");
1759                      return root;
1760             }
1761             */
1762    
1763             /** 
1764              * This will return the LCA for a random sampling of leaves from
1765              * the tree.
1766              */
1767             public Species getRootLCA(int num) {
1768                      if (! buildTree()) {
1769                                    CrimsonUtils.printError("Can't sample tree because no valid tree.");
1770                                    return null;
1771                      }
1772                      
1773                      if (num < 1) return null;
1774    
1775                      // need the leaves as a list so we can more easily (randomly)
1776                      // select leaves
1777                      ArrayList leaves = getLeaves();
1778    
1779                      // make sure enough leaves to fullfil request
1780                      if (leaves.size() < num) {
1781                                    String msg = "Less leaves (" + String.valueOf(leaves.size()) + ") ";
1782                                    msg += "in tree, than requested in tree sample (" + String.valueOf(num) + ")\n";
1783                                    CrimsonUtils.printWarning(msg);
1784    
1785                                    // since we're returning all leaves, we just need to clone the tree
1786                                    return null;
1787                      }
1788    
1789                      // keep track of inner nodes proccessed
1790                      HashSet speciesSet = new HashSet();
1791    
1792                      // PROCESS FIRST SELECTION
1793    
1794                      // randomly select a leaf from the list 
1795                      int index = CrimsonUtils.random.nextInt(leaves.size());
1796                      Species leaf = (Species) leaves.get(index);
1797    
1798                      // this is the first species, so add it as root. since it's a
1799                      // leaf (and not an inner node) we don't need to add it to
1800                      // speciesSet.
1801                      Species root = leaf;
1802    
1803                      // remove the leaf from the list of leaves.  This way we
1804                      // don't have to worry about selecting it again
1805                      leaves.remove(index);
1806    
1807                      // use this to figure out when to print a "."
1808                      int inc = (num > 10) ? num / 10 : 1;
1809    
1810                      // give user some feedback
1811                      CrimsonUtils.printMsg("GetRootLCA: .", false);
1812    
1813                      // PROCESS REMAINING SELECTIONS. Loop until all leaves have
1814                      // been sampled
1815                      for (int i = 1; i < num; i++) {
1816                                    // add "." every 10% completed
1817                                    if ((i % inc) == 0) CrimsonUtils.printMsg(".", false);
1818    
1819                                    // randomly select a leaf from the list 
1820                                    index = CrimsonUtils.random.nextInt(leaves.size());
1821                                    leaf = (Species) leaves.get(index);
1822    
1823                                    // remove the leaf from the list of leaves.  This way we
1824                                    // don't have to worry about selecting it again
1825                                    leaves.remove(index);
1826    
1827                                    // this is a new inner node, so we will need to add it to
1828                                    // our set
1829                                    speciesSet.add(leaf);
1830    
1831                                    // since not the first leaf, we need to find the LCA
1832                                    // between the root of the new tree and the new leaf
1833                                    Species lca = getLCA(root, leaf);
1834                                    
1835                                    if (lca == root) {
1836                                             // loop until we've propogated up the tree from the
1837                                             // leaf to the LCA.
1838                                             while (true) {
1839                                                      // get the parent 
1840                                                      Species parent = leaf.getParent();
1841    
1842                                                      if (speciesSet.contains(parent)) {
1843                                                                    break;  // we found the LCA
1844                                                      } else {
1845                                                                    // this is a new inner node, so add it to set
1846                                                                    speciesSet.add(parent);
1847                                                      }
1848                                             }
1849    
1850                                    } else {
1851                                             // lca != nRoot: extend root and newLeaf to lca.
1852    
1853                                             // need to extend the root of the new tree to the LCA,
1854                                             // via it's parents in the original tree.  loop until
1855                                             // we've propogated up the tree from the NEW ROOT TO
1856                                             // THE LCA.
1857                                             while (true) {
1858                                                      // get the parent of the new root, from the
1859                                                      // original tree
1860                                                      Species parent = root.getParent();
1861    
1862                                                      // update root
1863                                                      root = parent;
1864                                                      
1865                                                      // this is a new inner node, so add it to set
1866                                                      speciesSet.add(parent);
1867    
1868                                                      // if we found the LCA, then we're done, else
1869                                                      // continue adding parents
1870                                                      if (lca == parent) break;
1871                                             }
1872    
1873                                             // need to extend the new leaf via it's parents up to
1874                                             // the LCA.  loop until we've propogated up the tree
1875                                             // from the NEW LEAF TO THE LCA.
1876                                             while (true) {
1877                                                      // get the parent 
1878                                                      Species parent = leaf.getParent();
1879    
1880                                                      if (speciesSet.contains(parent)) {
1881                                                                    break;  // found lca
1882                                                      } else {
1883                                                                    // new inner node so add it to the set
1884                                                                    speciesSet.add(parent);
1885                                                      }
1886                                             }
1887                                    }
1888                      }
1889    
1890                      // add CR-LF to end of "."
1891                      CrimsonUtils.printMsg("");
1892    
1893                      // this will output a list of all leaves to the file
1894                      // 'leaf.list'
1895                      if (DEBUG_OUTPUT) FileIO.printSpecies(new TreeSet(speciesSet), true);
1896    
1897                      return root;
1898             }
1899    
1900        //--------------------------------------------------------------------------
1901        // Non-specific Tree Related Functions.
1902    
1903             /** 
1904              * Returns the LCA for the tree, assuming this species is the root
1905              * of the tree. This requires that the level values have been set.
1906              */
1907             public static Species getLCA(Species s1, Species s2) {
1908                      if (s1 == s2) return s1;
1909                      if (s1.isRoot()) return s1;
1910                      if (s2.isRoot()) return s2;
1911    
1912                      Species lca = null;
1913                      Species des = null;
1914                      if (s1.getLevel() < s2.getLevel()) {
1915                                    lca = s1;
1916                                    des = s2;
1917                      } else if  (s1.getLevel() > s2.getLevel()) {
1918                                    lca = s2;
1919                                    des = s1;
1920                      } else {
1921                                    lca = s1.getParent();
1922                                    des = s2;
1923                      }
1924    
1925                      while (des.getLevel() - lca.getLevel() > 1) {
1926                                    des = des.getParent();
1927                      }
1928    
1929                      while ((! lca.isRoot()) && (des.getParent() != lca)) {
1930                                    //                while ((! lca.isRoot()) && (! (des.getParent()).equals(lca))) {
1931                                    des = des.getParent();
1932                                    if (des.getLevel() == lca.getLevel()) {
1933                                             lca = lca.getParent();
1934                                    }
1935                      }
1936    
1937                      return lca;
1938             }
1939    
1940    } // Tree.java
1941