001    /*
002     * CRIMSON
003     * Copyright (c) 2006, Stephen Fisher, Susan Davidson, and Junhyong Kim, 
004     * University of Pennsylvania.
005     *
006     * This program is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU General Public License as
008     * published by the Free Software Foundation; either version 2 of the
009     * License, or (at your option) any later version.
010     *
011     * This program is distributed in the hope that it will be useful, but
012     * WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019     * 02110-1301 USA.
020     *
021     * @(#)Queries.java
022     */
023    
024    package edu.upenn.crimson.io;
025    
026    import edu.upenn.crimson.*;
027    import java.util.HashSet;
028    import java.util.ArrayList;
029    import java.util.TreeSet;
030    import java.util.Iterator;
031    import java.util.Scanner;
032    import java.io.*;
033    import java.sql.Clob;
034    import java.sql.SQLException;
035    
036    /**
037     * Functions related to QUERIES table.
038     *
039     * @XXX Should see if can optimize db access by maintaining one
040     * Statement that is created every time a database connection is made.
041     * 
042     * @author  Stephen Fisher
043     * @version $Id: Queries.java,v 1.66 2009/07/21 01:19:12 fisher Exp $
044     */
045    
046    public class Queries {
047            /** 
048             * If true, then whenever a (bp or codon) position selection query
049             * is run, the list of positions selected will be output to the
050             * file 'positions.list'.
051             */
052            final static boolean DEBUG_OUTPUT = true;       
053            
054            /**
055             * When writing data to a NEXUS file ('run()'), this is the number
056             * of species queried per SQL statement.  If more than 'SPECIES_CHUNK'
057             * number of species to query, than multiple queries will be
058             * performed, each one with 'SPECIES_CHUNK' number of species.
059             */
060            final static int SPECIES_CHUNK = 50;    
061            
062            
063        //--------------------------------------------------------------------------
064        // Miscellaneous Methods
065            
066            /** 
067             * This will load the specified query from the QUERIES table in
068             * the current database. If no query is specified then all queries
069             * will be loaded.
070             */
071            public static void load(String id) {
072                    // make sure a database is open. We don't need to check for
073                    // the existance of the QUERIES database because this was done
074                    // when the database was opened.
075                    if (! Database.isOpen()) {
076                            CrimsonUtils.printError("Can't load query, no open database.");
077                            return;
078                    }
079                    
080                    CrimsonUtils.printMsg("Loading query info...");
081    
082                    // get the list of queries in the QUERIES table. 
083                    /*
084                    String sql = "SELECT id, tree_id, notes, leaf_select, num_leaves, ";
085                    sql += "temp_depth_thresh, level_thresh, seq_select, num_pos, seed ";
086                    sql += " FROM QUERIES";
087                    */
088                    String sql = "SELECT id, tree_id, notes, leaf_select, num_leaves, ";
089                    sql += "temp_depth_thresh, level_thresh, seq_select, num_pos, seed, ";
090                    sql += "leaves, partitions, positions FROM QUERIES";
091                    // if ID exists, then only load that query
092                    if (! CrimsonUtils.isEmpty(id)) sql += " WHERE id = '" + id.toUpperCase() + "'";
093                    sql = sql.toUpperCase();
094    
095                    if (Database.isProxy()) {
096                            StringBuilder results = Database.ImportQuery(sql);
097                            
098                            // if the query failed then exit
099                            if (results == null) {
100                                    CrimsonUtils.printError("Unable to build queries lists, error accessing QUERIES table.");
101                                    return;
102                            }
103                            
104                            Query.dbLoadProxy(results);
105                    } else {
106                            ArrayList results = Database.sqlToArray(sql);
107                            
108                            // if the query failed then exit
109                            if (results == null) {
110                                    CrimsonUtils.printError("Unable to build queries lists, error accessing QUERIES table.");
111                                    return;
112                            }
113                            
114                            // convert each row in the results to a Query
115                            for (Iterator rows = results.iterator(); rows.hasNext();) {
116                                    Query.dbLoad((ArrayList) rows.next());
117                            }
118                    }
119    
120                    /*
121                    try {
122                            // convert each row in the results to a Query
123                            for (Iterator rows = results.iterator(); rows.hasNext();) {
124                                    ArrayList row = (ArrayList) rows.next();
125                                                            
126                                    Query query = new Query(String.valueOf(row.get(1)), String.valueOf(row.get(0)));
127                                    query.setNotes(String.valueOf(row.get(2)));
128                                    query.setLeafSelection(Integer.parseInt(String.valueOf(row.get(3))));
129                                    query.setNumLeaves(Integer.parseInt(String.valueOf(row.get(4))));
130                                    query.setTempDepthThresh(Double.parseDouble(String.valueOf(row.get(5))));
131                                    query.setLevelThresh(Integer.parseInt(String.valueOf(row.get(6))));
132                                    query.setSequenceSelection(Integer.parseInt(String.valueOf(row.get(7))));
133                                    query.setNumPositions(Integer.parseInt(String.valueOf(row.get(8))));
134                                    query.setSeed(Long.parseLong(String.valueOf(row.get(9))));
135                                    
136                                    // leaves, partitions, and postitions are stored as
137                                    // ';' delimited strings in the table so they need to
138                                    // be parsed when extracted from the table
139                                    // ******************************************************
140                                    // These MUST be uncommented when any query contains a CLOB
141                                    // ******************************************************
142                                    //                              query.setLeaves(Database.readClobSet(row.get(10)));
143                                    //                              query.setPartitions(Database.readClobSet(row.get(11)));
144                                    //                              query.setPositions(Database.readClobSet(row.get(12)));
145                                    
146                                    query.setSaved(true); // saved since we just got it from the db
147                            }
148                            // UNCOMMENT THIS WHEN CLOBS ARE RESTORED
149                            //              } catch (SQLException e) { 
150                    } catch (Exception e) { 
151                            CrimsonUtils.printError("SQL error loading CLOB in QUERIES.");
152                            CrimsonUtils.printError(e.getMessage());
153                            return;
154                    }
155                    */
156            }
157            
158            /** 
159             * This will publish the specified query in the QUERIES table in the
160             * current database.  It will overwrite any query already in the
161             * table with the same name.
162             */
163            public static void publish(Query query) {
164                    if (query == null) {
165                            CrimsonUtils.printError("Unable to publish 'null' query .");
166                            return;
167                    }
168                    
169                    publish(query, query.getID(), true);
170            }
171            
172            /** 
173             * This will publish the specified query in the QUERIES table in
174             * the current database.  If 'overwrite' is true then this will
175             * replace any query object with the same name, already in the
176             * table.  If 'id' is not empty, then it will be used, instead of
177             * query.id, as the name of the query to be published
178             * (ie. QUERIES.id).
179             */
180            public static void publish(Query query, String id, boolean overwrite) {
181                    if (query == null) {
182                            CrimsonUtils.printError("Unable to publish 'null' query .");
183                            return;
184                    }
185                    
186                    // make sure a database is open. We don't need to check for
187                    // the existance of the QUERIES database because this was done
188                    // when the database was opened.
189                    if (! Database.isOpen()) {
190                            CrimsonUtils.printError("Can't publish query, no open database.");
191                            return;
192                    }
193                    
194                    // use 'id' if present, otherwise query.id
195                    if (CrimsonUtils.isEmpty(id)) id = query.getID();
196                    
197                    String sql;
198                    // if query already exists then update, else insert
199                    if (dbContains(id)) {
200                            // if 'overwrite' is false then exit on error
201                            if (! overwrite) {
202                                    CrimsonUtils.printError("Query already exists in database.");
203                                    return;
204                            }
205                            
206                            // update existing query
207                            sql = "UPDATE QUERIES SET";
208                            sql += " notes = '" + query.getNotes() + "'";
209                            sql += " , tree_id = '" + query.getTreeID() + "'";
210                            sql += " , leaf_select = " + query.getLeafSelection();
211                            sql += " , num_leaves = " + query.getNumLeaves();
212                            sql += " , temp_depth_thresh = " + query.getTempDepthThresh();
213                            sql += " , level_thresh = " + query.getLevelThresh();
214                            sql += " , seq_select = " + query.getSequenceSelection();
215                            sql += " , num_pos = " + query.getNumPositions();
216                            sql += " , seed = " + query.getSeed();
217                            sql += " WHERE id = '" + id.toUpperCase() + "' ";
218                            
219                    } else { // insert new query
220                            sql = "INSERT INTO QUERIES ";
221                            sql += "(id, tree_id, notes, leaf_select, num_leaves, temp_depth_thresh, level_thresh";
222                            sql += ", seq_select, num_pos, seed) VALUES ('" + id + "'";
223                            sql += ", '" + query.getTreeID() + "'";
224                            sql += ", '" + query.getNotes() + "'";
225                            sql += ", " + query.getLeafSelection();
226                            sql += ", " + query.getNumLeaves();
227                            sql += ", " + query.getTempDepthThresh();
228                            sql += ", " + query.getLevelThresh();
229                            sql += ", " + query.getSequenceSelection();
230                            sql += ", " + query.getNumPositions();
231                            sql += ", " + query.getSeed() + ")";
232                    }
233                    
234                    // UPDATE the VARCHAR2 and NUMBER fields
235                    if (! Database.execUpdate(sql)) {
236                            CrimsonUtils.printError("Query NOT published.");
237                            return;
238                    }
239                    
240                    // UPDATE the CLOBs
241                    // leaves, partitions, and postitions are stored as ';'
242                    // delimited strings in the table so they need to be
243                    // parsed when added to the table
244                    if (! Database.writeClobSet("QUERIES", id, "leaves", query.getLeaves())) {
245                            CrimsonUtils.printError("Can't update leaves. Query NOT published.");
246                            return;
247                    }
248                    if (! Database.writeClobSet("QUERIES", id, "partitions", query.getPartitions())) {
249                            CrimsonUtils.printError("Can't update partitions. Query NOT published.");
250                            return;
251                    }
252                    if (! Database.writeClobSet("QUERIES", id, "positions", query.getPositions())) {
253                            CrimsonUtils.printError("Can't update positions. Query NOT published.");
254                            return;
255                    }
256                    
257                    query.setSaved(true); // saved since we just publised it to the db
258                    
259                    // if we get this far then we didn't error anywhere
260                    CrimsonUtils.printMsg("Query published.");
261            }
262            
263            /** 
264             * This will return an ArrayList with id's for all queries in the
265             * QUERIES table. 
266             */
267            public static ArrayList dbList() {
268                    if (! Database.isOpen()) {
269                            CrimsonUtils.printError("No database open.");
270                            return null;
271                    }
272                    
273                    String sql = "SELECT id FROM QUERIES";
274                    return Database.sqlToArray(sql);
275            }
276            
277            /** 
278             * This will return true if a query exists in QUERIES with an id
279             * equal to 'id'. 
280             */
281            public static boolean dbContains(String id) {
282                    ArrayList out = Database.sqlToArray("SELECT id FROM QUERIES WHERE id = '" + id.toUpperCase() + "'");
283                    if ((out == null) || (out.size() == 0)) return false;
284                    else return true;
285            }
286            
287            /** Removes a query from the database. */
288            public static boolean delete(String id) { 
289                    boolean val = Database.delete("QUERIES", id); 
290                    
291                    // remove the query from the relevant lists
292                    ObjectHandles.deleteQuery(id);
293                    
294                    return val;
295            }
296            
297            /** 
298             * This will repeatedly run a query, generating a NEXUS file for
299             * each run. The output will be a NEXUS file that includes the
300             * Crimson and tree blocks. Query notes will not be included.
301             * @param queryID string ID for query to be run
302             * @param filename filename for NEXUS output
303             * @param incSequence when true, inner sequence IDs will be
304             * included in the NEXUS file
305             * @param numRuns number of runs
306             */
307            public static void run(String queryID, String filename, boolean incSequence, int numRuns) {
308                    if (CrimsonUtils.isEmpty(filename)) {
309                            CrimsonUtils.printError("No NEXUS file specified.");
310                            return;
311                    }
312                    
313                    if (numRuns > 1) {
314                            Query query = ObjectHandles.getQuery(queryID);
315                            if (query == null) {
316                                    CrimsonUtils.printError("Query not found.");
317                                    return;
318                            }
319                            if (query.getSeed() > -1) {
320                                    CrimsonUtils.printError("Can not run query repetitions when query seed is set.");
321                                    return;
322                            }
323                            
324                            for (int i = 0; i < numRuns; i++) {
325                                    run(queryID, filename + "_" + i, incSequence, true, 1, false);
326                            }
327                    } else {
328                            run(queryID, filename, incSequence, true, 1, false);
329                    }
330            }
331            
332            /*
333            // This will perform a query of a tree. *** REPLACED with expanded version below
334            public static void run(String queryID, String filename, boolean incSequence) {
335                    if (! Database.isOpen()) {
336                            CrimsonUtils.printError("Must open a database before running a query.");
337                            return;
338                    }
339                    
340                    if (CrimsonUtils.isEmpty(filename)) {
341                            CrimsonUtils.printError("No NEXUS file specified.");
342                            return;
343                    }
344                    File file = new File(filename);
345                    // if the file already exists and not supposed to overwrite
346                    // it, then return on error.
347    
348                    //              if (file.exists()) {
349                    //                      CrimsonUtils.printError("File \"" + filename + "\" already exists.");
350                    //                      return;
351                    //              }
352                    
353                    Query query = ObjectHandles.getQuery(queryID);
354                    if (query == null) {
355                            CrimsonUtils.printError("Query not found.");
356                            return;
357                    }
358                    
359                    if (CrimsonUtils.isEmpty(query.getTreeID())) {
360                            CrimsonUtils.printError("Tree not specified within query.");
361                            return;
362                    }
363                    
364                    Tree treeOrig = ObjectHandles.getTree(query.getTreeID());
365                    if (treeOrig == null) {
366                            CrimsonUtils.printError("Tree does not exist: " + query.getTreeID());
367                            return;
368                    }
369                    
370                    // set seed prior to sampling tree
371                    if (query.getSeed() > -1) CrimsonUtils.setRandomSeed(query.getSeed());
372                    
373                    // use Tree() routines to process the leaf selection.  With
374                    // the new tree we can get the new newick string and species.
375                    Tree treeNew;
376                    switch (query.getLeafSelection()) {
377                    case 1:  // random (numLeaves)
378                            treeNew = treeOrig.randomSample(query.getNumLeaves());
379                            if (treeNew == null) return;
380                            treeNew.computeStats();
381                            break;
382                    case 2:  // depth distributed (numLeaves, tempDepthThresh)
383                            treeNew = treeOrig.uniformSampleByTempDepth(query.getTempDepthThresh(), query.getNumLeaves());
384                            if (treeNew == null) return;
385                            treeNew.computeStats();
386                            break;
387                    case 3:  // depth weighted (numLeaves, tempDepthThresh)
388                            treeNew = treeOrig.weightedSampleByTempDepth(query.getTempDepthThresh(), query.getNumLeaves());
389                            if (treeNew == null) return;
390                            treeNew.computeStats();
391                            break;
392                    case 4:  // level distributed (numLeaves, levelThresh)
393                            treeNew = treeOrig.uniformSampleByLevel(query.getLevelThresh(), query.getNumLeaves());
394                            if (treeNew == null) return;
395                            treeNew.computeStats();
396                            break;
397                    case 5:  // level weighted (numLeaves, levelThresh)
398                            treeNew = treeOrig.weightedSampleByLevel(query.getLevelThresh(), query.getNumLeaves());
399                            if (treeNew == null) return;
400                            treeNew.computeStats();
401                            break;
402                    case 6:  // manual (leaves)
403                            treeNew = treeOrig.manualSampleByID(new ArrayList(query.getLeaves()));
404                            if (treeNew == null) return;
405                            treeNew.computeStats();
406                            break;
407                    default:
408                            // this is equivalent to "case 0".  if tree not already
409                            // built, then build it now
410                            if (! treeOrig.isBuilt()) treeOrig.computeStats();
411                            treeNew = treeOrig;
412                    }
413                    
414                    // make sure the tree is valid
415                    if (treeNew.getNumSpecies() < 1) {
416                            CrimsonUtils.printError("Sampled tree has no leaves.");
417                            return;
418                    }
419                    
420                    String nchar;
421                    ArrayList partitions;  // this is an array to preserve order
422                    
423                    // used by seq selection methods 1-4, this will contain a list
424                    // of bp positions to be included in the output.  these are
425                    // arrays so that they can differ per partition, in case
426                    // partitions very in size.
427                    ArrayList sampledPositions = new ArrayList();
428                    
429                    // this will contain the partition data printed at the end of
430                    // multi-partition NEXUS files
431                    StringBuffer charPartition = new StringBuffer();
432                    
433                    /////////////
434                    // get the partitions to be included and the values for nchar,
435                    // sampledPositions, and CharPartition
436                    if ((query.getSequenceSelection() == 5) || (query.isOnlyStruct())) {
437                            // only structure, so no partitions to be included
438                            partitions = new ArrayList();
439                            nchar = "0";
440                    } else {
441                            // figure out partition data to include
442                            partitions = new ArrayList(query.getPartitions());
443                            if ((partitions == null) || (partitions.size() == 0)) {
444                                    // no partitions specificed, so assume 'ALL'.
445                                    partitions = Database.sqlToArray("SELECT id FROM PARTITIONS WHERE tree_id = '" + treeOrig.getID() + "'");
446                                    if (partitions == null) partitions = new ArrayList();
447                            }
448                            
449                            TreeSet expandedPositions = null;
450                            if ((query.getSequenceSelection() == 3) || (query.getSequenceSelection() == 4)) {
451                                    // for manual selections, need to expand position
452                                    // string to figure.  this is independent of the
453                                    // partition
454                                    expandedPositions = expandPositions(query.getPositions(), (query.getSequenceSelection() == 3));
455                                    // if not able to expand then stop
456                                    if (expandedPositions == null) return;
457                            }                                        
458                            
459                            // used to count num positions across partitions
460                            int iLength = 0;
461                            
462                            // set up the array such that each partition will have
463                            // it's own set of sampled positions
464                            sampledPositions = new ArrayList();
465                            
466                            // make sure all partitions are valid and compute relevant
467                            // sampling positions
468                            for (int iPartition = 0; iPartition < partitions.size(); iPartition++) {
469                                    String partitionID = (String) partitions.get(iPartition);
470                                    
471                                    // shouldn't need to test both database and partitionPool, but
472                                    // it's better to be careful
473                                    if ((! Partitions.dbContains(partitionID)) || (! ObjectHandles.containsPartition(partitionID))) {
474                                            CrimsonUtils.printError("Partition doesn't exists: " + partitionID);
475                                            return;
476                                    }
477                                    
478                                    // get partition length
479                                    int pLength = (ObjectHandles.getPartition(partitionID)).getLength();
480                                    if (pLength <= 0) {
481                                            CrimsonUtils.printError("Partition (" + partitionID + ") has invalid length: " + pLength);
482                                            return;
483                                    }
484                                    
485                                    if ((query.getSequenceSelection() == 3) || (query.getSequenceSelection() == 4)) {
486                                            // manual selection
487                                            if (partitions.size() > 1) {
488                                                    // manual selection is only valid for a single
489                                                    // data partition.
490                                                    CrimsonUtils.printError("Can't manually select positions when using more than one data partition.");
491                                                    return;
492                                            }
493                                            
494                                            // add valid positions to sampledPositions.  Do
495                                            // this for every partition since partitions will
496                                            // have different lengths.  Thus a valid position
497                                            // for one partition may be invalid for
498                                            // another. HOWEVER, only allow one partition to
499                                            // be selected, so doing this here should not be
500                                            // necessary.
501                                            TreeSet sampPos = new TreeSet();
502                                            for (Iterator iExpPos = expandedPositions.iterator(); iExpPos.hasNext();) {
503                                                    Integer indx = (Integer) iExpPos.next();
504                                                    if (indx.intValue() >= pLength) {
505                                                            CrimsonUtils.printWarning("Ignoring out of bounds position:" + indx);
506                                                    } else {
507                                                            sampPos.add(indx);
508                                                    }
509                                            }
510                                            
511                                            // with manual selection, nchar is number of positions
512                                            iLength = sampPos.size();
513                                            
514                                            sampledPositions.add(sampPos);
515                                            
516                                    } else {
517                                            // compute random positions to be sampled. We do
518                                            // this for each partition because the partition
519                                            // lengths may be different.
520                                            if (query.getSequenceSelection() == 1) {  // random codon sampling
521                                                    sampledPositions.add(getRandomPositions(pLength, query.getNumPositions(), true));
522                                            } else if (query.getSequenceSelection() == 2) {  // random bp sampling
523                                                    sampledPositions.add(getRandomPositions(pLength, query.getNumPositions(), false));
524                                            }
525                                            
526                                            // this is the case for seq selection 1 and 2
527                                            int numPositions = query.getNumPositions();
528                                            // seq selection 0 is select all
529                                            if (query.getSequenceSelection() == 0) numPositions = pLength;
530                                            
531                                            // count num of positions to include per partition
532                                            if (pLength < numPositions) {
533                                                    // warn user that partition length is smaller
534                                                    // than requested number of positions
535                                                    String msg = "Partition '" + partitionID + "' does not contain '"; 
536                                                    msg += numPositions + "' positions as requested.";
537                                                    CrimsonUtils.printWarning(msg);
538                                                    
539                                                    // setup charPartition string for printing at
540                                                    // end of NEXUS file
541                                                    charPartition.append("    " + NexusFile.quote(partitionID) + ":");
542                                                    charPartition.append((iLength + 1) + "-" + (iLength + pLength) + "\n");
543                                                    charPartition.append((iPartition + 1 < partitions.size()) ? "," : ";");
544                                                    
545                                                    iLength += pLength;
546                                            } else {
547                                                    // setup charPartition string for printing at
548                                                    // end of NEXUS file
549                                                    charPartition.append("    " + NexusFile.quote(partitionID) + ":");
550                                                    charPartition.append((iLength + 1) + "-" + (iLength + numPositions) + "\n");
551                                                    charPartition.append((iPartition + 1 < partitions.size()) ? "," : ";");
552                                                    
553                                                    iLength += numPositions;
554                                            }
555                                    }
556                            }
557                            
558                            // convert integer nchar value to string which will be
559                            // used below
560                            nchar = String.valueOf(iLength);
561                    }
562                    
563                    ////////////////////////////////
564                    // begin writting the NEXUS file
565                    ////////////////////////////////
566                    try {
567                            FileWriter fWriter = new FileWriter(file);
568                            BufferedWriter bWriter = new BufferedWriter(fWriter);
569                            
570                            bWriter.write("#NEXUS\n\n");
571                            
572                            // include query as comments at top of NEXUS file
573                            bWriter.write("[\nCRIMSON QUERY:\n");
574                            bWriter.write(query.toString());
575                            bWriter.write("]\n\n");
576                            
577                            ArrayList species =  incSequence ? treeNew.getSpeciesList() : treeNew.getLeaves(); 
578                            int ntax = species.size();
579                            
580                            // if no partitions then no data
581                            if (partitions.size() > 0) {
582                                    // print character data header
583                                    bWriter.write("BEGIN DATA;\n");
584                                    bWriter.write("  DIMENSIONS NTAX=" + ntax + " NCHAR=" + nchar + ";\n");
585                                    if (partitions.size() > 1) bWriter.write("  FORMAT DATATYPE=rna GAP=- INTERLEAVE;\n");
586                                    else bWriter.write("  FORMAT DATATYPE=rna GAP=-;\n");
587                                    bWriter.write("  MATRIX\n");
588                                    
589                                    // process sequence data
590                                    for (int iPartition = 0; iPartition < partitions.size(); iPartition++) {
591                                            String partitionID = (String) partitions.get(iPartition);
592                                            
593                                            // if more than 1 partition, then add a comment
594                                            // containing the name of the partition to help
595                                            // deliniate the interleaved sections
596                                            if (partitions.size() > 1) bWriter.write(" [" + partitionID.toUpperCase() + "]\n");
597                                            
598                                            // process species in chunks of size SPECIES_CHUNK
599                                            for (int iSpecies = 0; iSpecies < ntax; iSpecies += SPECIES_CHUNK) {
600                                                    int endChunk = iSpecies + SPECIES_CHUNK;
601                                                    if (endChunk > ntax) endChunk = ntax;
602                                                    
603                                                    // build query
604                                                    String sql = "SELECT species_id, sequence FROM PART_DATA WHERE partition_id = '" + partitionID + "'";
605                                                    sql += " and (species_id = '" + ((Species) species.get(iSpecies)).getID() + "'";
606                                                    for (int iChunk = iSpecies + 1; iChunk < endChunk; iChunk++) {
607                                                            sql += " or species_id = '" + ((Species) species.get(iChunk)).getID() + "'";
608                                                    }
609                                                    sql += ")";
610                                                    
611                                                    ArrayList results = Database.sqlToArray(sql);
612                                                    
613                                                    if (results == null) {
614                                                            CrimsonUtils.printError("NEXUS file incomplete. Unable to get partition data: " + sql);
615                                                            bWriter.flush();
616                                                            bWriter.close();
617                                                            fWriter.close();
618                                                            return;
619                                                    }
620                                                    
621                                                    for (Iterator rows = results.iterator(); rows.hasNext();) {
622                                                            ArrayList row = (ArrayList) rows.next();
623                                                            String data = Database.readClob(row.get(1));
624                                                            
625                                                            if ((sampledPositions.size() > 0) && (sampledPositions.get(iPartition) != null)) {
626                                                                    // only include positions in sampledPositions[]
627                                                                    StringBuffer sampledData = new StringBuffer();
628                                                                    TreeSet sampPos = (TreeSet) sampledPositions.get(iPartition);
629                                                                    for (Iterator i = sampPos.iterator(); i.hasNext();) {
630                                                                            sampledData.append(data.charAt(((Integer) i.next()).intValue()));
631                                                                    }
632                                                                    data = sampledData.toString();
633                                                            }
634                                                            bWriter.write("  " + String.valueOf(row.get(0)) + "   " + data + "\n");
635                                                    }
636                                            }
637                                            
638                                            // put in a space between partitions
639                                            bWriter.write("\n");
640                                    }
641                                    bWriter.write(";\nEND;\n\n");
642                                    
643                                    // print crimson data header
644                                    bWriter.write("BEGIN CRIMSON;\n");
645                                    bWriter.write("  MATRIX\n");
646                                    
647                                    // process structure data
648                                    for (int iPartition = 0; iPartition < partitions.size(); iPartition++) {
649                                            String partitionID = (String) partitions.get(iPartition);
650                                            
651                                            // if more than 1 partition, then add a comment
652                                            // containing the name of the partition to help
653                                            // deliniate the interleaved sections
654                                            if (partitions.size() > 1) bWriter.write(" [" + partitionID.toUpperCase() + "]\n");
655                                            
656                                            // process species in chunks of size SPECIES_CHUNK
657                                            for (int iSpecies = 0; iSpecies < ntax; iSpecies += SPECIES_CHUNK) {
658                                                    int endChunk = iSpecies + SPECIES_CHUNK;
659                                                    if (endChunk > ntax) endChunk = ntax;
660                                                    
661                                                    // build query
662                                                    String sql = "SELECT species_id, structure FROM PART_DATA WHERE partition_id = '" + partitionID + "'";
663                                                    sql += " and (species_id = '" + ((Species) species.get(iSpecies)).getID() + "'";
664                                                    for (int iChunk = iSpecies + 1; iChunk < endChunk; iChunk++) {
665                                                            sql += " or species_id = '" + ((Species) species.get(iChunk)).getID() + "'";
666                                                    }
667                                                    sql += ")";
668                                                    
669                                                    ArrayList results = Database.sqlToArray(sql);
670                                                    
671                                                    if (results == null) {
672                                                            CrimsonUtils.printError("NEXUS file incomplete. Unable to get partition data: " + sql);
673                                                            bWriter.flush();
674                                                            bWriter.close();
675                                                            fWriter.close();
676                                                            return;
677                                                    }
678                                                    
679                                                    for (Iterator rows = results.iterator(); rows.hasNext();) {
680                                                            ArrayList row = (ArrayList) rows.next();
681                                                            String data = Database.readClob(row.get(1));
682                                                            
683                                                            if ((sampledPositions.size() > 0) && (sampledPositions.get(iPartition) != null)) {
684                                                                    // only include positions in sampledPositions[]
685                                                                    StringBuffer sampledData = new StringBuffer();
686                                                                    TreeSet sampPos = (TreeSet) sampledPositions.get(iPartition);
687                                                                    for (Iterator i = sampPos.iterator(); i.hasNext();) {
688                                                                            sampledData.append(data.charAt(((Integer) i.next()).intValue()));
689                                                                    }
690                                                                    data = sampledData.toString();
691                                                            }
692                                                            bWriter.write("  " + String.valueOf(row.get(0)) + "   " + data + "\n");
693                                                    }
694                                            }
695                                            
696                                            // put in a space between partitions
697                                            bWriter.write("\n");
698                                    }
699                                    bWriter.write(";\nEND;\n\n");
700                                    
701                                    if (partitions.size() > 1) {
702                                            bWriter.write("BEGIN SETS;\n");
703                                            bWriter.write("  CharPartition partition = \n");
704                                            bWriter.write(charPartition.toString());
705                                            bWriter.write("END;\n");
706                                    }
707                            } else {
708                                    // the taxa are currently being loaded from the DATA
709                                    // block. if there is no DATA block then we need to
710                                    // add the TAXA block so that the TREES block will bel
711                                    // properly loaded.  We currently only include the
712                                    // TAXA block if no DATA block, because we can't
713                                    // easily control the order of the species in the DATA
714                                    // block and this order needs to be the same in the
715                                    // DATA and TAXA blocks.
716                                    
717                                    // insert TAXA block
718                                    bWriter.write("BEGIN TAXA;\n");
719                                    bWriter.write("  DIMENSIONS NTAX=" + ntax + ";\n");
720                                    bWriter.write("  TAXLABELS\n");
721                                    for (int iSpecies = 0; iSpecies < ntax; iSpecies++) {
722                                            bWriter.write("    " + ((Species) species.get(iSpecies)).getID() + "\n");
723                                    }
724                                    bWriter.write(";\nEND;\n\n");
725                            }
726                            
727                            // insert TREES block.  This is placed after the data so
728                            // that the taxa can be loaded from the DATA block.
729                            bWriter.write("BEGIN TREES;\n");
730                            // make sure queryID is properly quoted
731                            bWriter.write("  TREE " + NexusFile.quote(queryID) + " = " + treeNew.toNewick(incSequence) + ";\n");
732                            bWriter.write("END;\n\n");
733                            
734                            bWriter.flush();
735                            bWriter.close();
736                            fWriter.close();
737                    } catch (SQLException e) { 
738                            CrimsonUtils.printError("SQL error processing CLOB.");
739                            CrimsonUtils.printError(e.getMessage());
740                            return;
741                    } catch (FileNotFoundException e) { // problem with FileOutputStream
742                            CrimsonUtils.printError("File \"" + filename + "\" can not be opened.");
743                            CrimsonUtils.printError(e.getMessage());
744                            return;
745                    } catch (IOException e) { // problem with ObjectOutputStream.  
746                            // XXX do we need to close bWriter()?
747                            CrimsonUtils.printError("Error writting output file \"" + filename + "\".");
748                            CrimsonUtils.printError(e.getMessage());
749                            return;
750                    }
751                    
752                    CrimsonUtils.printMsg("Query finished.");
753            }
754        */
755            
756            /**
757             * This will perform a tree query, generating a NEXUS file. If
758             * Query.onlyStruct is true then incCrimson and incTree will be
759             * ignored.
760             * @param queryID string ID for query to be run
761             * @param filename filename for NEXUS output
762             * @param incSequence when true, inner sequence IDs will be
763             * included in the NEXUS file
764             * @param incCrimson when true, the CRIMSON block will be included
765             * in the NEXUS file
766             * @param incTree There are multiple options dictated by incTree: 0 = no
767             * tree will be output; 1 = tree is included in NEXUS file; 2 =
768             * tree ouptut to second NEXUS file ("*.tree"); 3 = rooted tree
769             * ouptut to a newick file ("*.newick"); 4 = unrooted tree ouptut
770             * to a newick file ("*.newick")
771             * @param incNotes When true, the Query.notes will be appended
772             * to the end of the NEXUS file. The notes can, for example,
773             * contain a PAUP block. It is assumed that the notes are properly
774             * formatted for the NEXUS file.
775             * @return The found tree object is returned (null when errors).
776             */
777            public static Tree run(String queryID, String filename, boolean incSequence, boolean incCrimson, 
778                                                       int incTree, boolean incNotes) {
779                    if (! Database.isOpen()) {
780                            CrimsonUtils.printError("Must open a database before running a query.");
781                            return null;
782                    }
783                    
784                    if (CrimsonUtils.isEmpty(filename)) {
785                            CrimsonUtils.printError("No NEXUS file specified.");
786                            return null;
787                    }
788                    File file = new File(filename);
789                    
790                    Query query = ObjectHandles.getQuery(queryID);
791                    if (query == null) {
792                            CrimsonUtils.printError("Query not found.");
793                            return null;
794                    }
795                    
796                    if (CrimsonUtils.isEmpty(query.getTreeID())) {
797                            CrimsonUtils.printError("Tree not specified within query.");
798                            return null;
799                    }
800                    
801                    Tree treeOrig = ObjectHandles.getTree(query.getTreeID());
802                    if (treeOrig == null) {
803                            CrimsonUtils.printError("Tree does not exist: " + query.getTreeID());
804                            return null;
805                    }
806                    
807                    // set seed prior to sampling tree
808                    if (query.getSeed() > -1) CrimsonUtils.setRandomSeed(query.getSeed());
809                    
810                    // use Tree() routines to process the leaf selection.  With
811                    // the new tree we can get the new newick string and species.
812                    Tree treeNew;
813                    switch (query.getLeafSelection()) {
814                    case 1:  // random (numLeaves)
815                            treeNew = treeOrig.randomSample(query.getNumLeaves());
816                            if (treeNew == null) return null;
817                            treeNew.computeStats();
818                            break;
819                    case 2:  // depth distributed (numLeaves, tempDepthThresh)
820                            treeNew = treeOrig.uniformSampleByTempDepth(query.getTempDepthThresh(), query.getNumLeaves());
821                            if (treeNew == null) return null;
822                            treeNew.computeStats();
823                            break;
824                    case 3:  // depth weighted (numLeaves, tempDepthThresh)
825                            treeNew = treeOrig.weightedSampleByTempDepth(query.getTempDepthThresh(), query.getNumLeaves());
826                            if (treeNew == null) return null;
827                            treeNew.computeStats();
828                            break;
829                    case 4:  // level distributed (numLeaves, levelThresh)
830                            treeNew = treeOrig.uniformSampleByLevel(query.getLevelThresh(), query.getNumLeaves());
831                            if (treeNew == null) return null;
832                            treeNew.computeStats();
833                            break;
834                    case 5:  // level weighted (numLeaves, levelThresh)
835                            treeNew = treeOrig.weightedSampleByLevel(query.getLevelThresh(), query.getNumLeaves());
836                            if (treeNew == null) return null;
837                            treeNew.computeStats();
838                            break;
839                    case 6:  // manual (leaves)
840                            treeNew = treeOrig.manualSampleByID(new ArrayList(query.getLeaves()));
841                            if (treeNew == null) return null;
842                            treeNew.computeStats();
843                            break;
844                    default:
845                            // this is equivalent to "case 0".  if tree not already
846                            // built, then build it now
847                            if (! treeOrig.isBuilt()) treeOrig.computeStats();
848                            treeNew = treeOrig;
849                    }
850                    
851                    // make sure the tree is valid
852                    if (treeNew.getNumSpecies() < 1) {
853                            CrimsonUtils.printError("Sampled tree has no leaves.");
854                            return null;
855                    }
856                    
857                    String nchar;
858                    ArrayList partitions;  // this is an array to preserve order
859                    
860                    // used by seq selection methods 1-4, this will contain a list
861                    // of bp positions to be included in the output.  these are
862                    // arrays so that they can differ per partition, in case
863                    // partitions very in size.
864                    ArrayList sampledPositions = new ArrayList();
865                    
866                    // this will contain the partition data printed at the end of
867                    // multi-partition NEXUS files
868                    StringBuffer charPartition = new StringBuffer();
869                    
870                    /////////////
871                    // get the partitions to be included and the values for nchar,
872                    // sampledPositions, and CharPartition
873                    if ((query.getSequenceSelection() == 5) || (query.isOnlyStruct())) {
874                            // only structure, so no partitions to be included
875                            partitions = new ArrayList();
876                            nchar = "0";
877                    } else {
878                            // figure out partition data to include
879                            partitions = new ArrayList(query.getPartitions());
880                            if ((partitions == null) || (partitions.size() == 0)) {
881                                    // no partitions specificed, so assume 'ALL'.
882                                    partitions = Database.sqlToArray("SELECT id FROM PARTITIONS WHERE tree_id = '" + treeOrig.getID() + "'");
883                                    if (partitions == null) partitions = new ArrayList();
884                            }
885                            
886                            TreeSet expandedPositions = null;
887                            if ((query.getSequenceSelection() == 3) || (query.getSequenceSelection() == 4)) {
888                                    // for manual selections, need to expand position
889                                    // string to figure.  this is independent of the
890                                    // partition
891                                    expandedPositions = expandPositions(query.getPositions(), (query.getSequenceSelection() == 3));
892                                    // if not able to expand then stop
893                                    if (expandedPositions == null) return null;
894                            }                                        
895                            
896                            // used to count num positions across partitions
897                            int iLength = 0;
898                            
899                            // set up the array such that each partition will have
900                            // it's own set of sampled positions
901                            sampledPositions = new ArrayList();
902                            
903                            // make sure all partitions are valid and compute relevant
904                            // sampling positions
905                            for (int iPartition = 0; iPartition < partitions.size(); iPartition++) {
906                                    String partitionID = (String) partitions.get(iPartition);
907                                    
908                                    // shouldn't need to test both database and partitionPool, but
909                                    // it's better to be careful
910                                    if ((! Partitions.dbContains(partitionID)) || (! ObjectHandles.containsPartition(partitionID))) {
911                                            CrimsonUtils.printError("Partition doesn't exists: " + partitionID);
912                                            return null;
913                                    }
914                                    
915                                    // get partition length
916                                    int pLength = (ObjectHandles.getPartition(partitionID)).getLength();
917                                    if (pLength <= 0) {
918                                            CrimsonUtils.printError("Partition (" + partitionID + ") has invalid length: " + pLength);
919                                            return null;
920                                    }
921                                    
922                                    if ((query.getSequenceSelection() == 3) || (query.getSequenceSelection() == 4)) {
923                                            // manual selection
924                                            if (partitions.size() > 1) {
925                                                    // manual selection is only valid for a single
926                                                    // data partition.
927                                                    CrimsonUtils.printError("Can't manually select positions when using more than one data partition.");
928                                                    return null;
929                                            }
930                                            
931                                            // add valid positions to sampledPositions.  Do
932                                            // this for every partition since partitions will
933                                            // have different lengths.  Thus a valid position
934                                            // for one partition may be invalid for
935                                            // another. HOWEVER, only allow one partition to
936                                            // be selected, so doing this here should not be
937                                            // necessary.
938                                            TreeSet sampPos = new TreeSet();
939                                            for (Iterator iExpPos = expandedPositions.iterator(); iExpPos.hasNext();) {
940                                                    Integer indx = (Integer) iExpPos.next();
941                                                    if (indx.intValue() >= pLength) {
942                                                            CrimsonUtils.printWarning("Ignoring out of bounds position:" + indx);
943                                                    } else {
944                                                            sampPos.add(indx);
945                                                    }
946                                            }
947                                            
948                                            // with manual selection, nchar is number of positions
949                                            iLength = sampPos.size();
950                                            
951                                            sampledPositions.add(sampPos);
952                                            
953                                    } else {
954                                            // compute random positions to be sampled. We do
955                                            // this for each partition because the partition
956                                            // lengths may be different.
957                                            if (query.getSequenceSelection() == 1) {  // random codon sampling
958                                                    sampledPositions.add(getRandomPositions(pLength, query.getNumPositions(), true));
959                                            } else if (query.getSequenceSelection() == 2) {  // random bp sampling
960                                                    sampledPositions.add(getRandomPositions(pLength, query.getNumPositions(), false));
961                                            }
962                                            
963                                            // this is the case for seq selection 1 and 2
964                                            int numPositions = query.getNumPositions();
965                                            // seq selection 0 is select all
966                                            if (query.getSequenceSelection() == 0) numPositions = pLength;
967                                            
968                                            // count num of positions to include per partition
969                                            if (pLength < numPositions) {
970                                                    // warn user that partition length is smaller
971                                                    // than requested number of positions
972                                                    String msg = "Partition '" + partitionID + "' does not contain '"; 
973                                                    msg += numPositions + "' positions as requested.";
974                                                    CrimsonUtils.printWarning(msg);
975                                                    
976                                                    // setup charPartition string for printing at
977                                                    // end of NEXUS file
978                                                    charPartition.append("    " + NexusFile.quote(partitionID) + ":");
979                                                    charPartition.append((iLength + 1) + "-" + (iLength + pLength) + "\n");
980                                                    charPartition.append((iPartition + 1 < partitions.size()) ? "," : ";");
981                                                    
982                                                    iLength += pLength;
983                                            } else {
984                                                    // setup charPartition string for printing at
985                                                    // end of NEXUS file
986                                                    charPartition.append("    " + NexusFile.quote(partitionID) + ":");
987                                                    charPartition.append((iLength + 1) + "-" + (iLength + numPositions) + "\n");
988                                                    charPartition.append((iPartition + 1 < partitions.size()) ? "," : ";");
989                                                    
990                                                    iLength += numPositions;
991                                            }
992                                    }
993                            }
994                            
995                            // convert integer nchar value to string which will be
996                            // used below
997                            nchar = String.valueOf(iLength);
998                    }
999                    
1000                    ////////////////////////////////
1001                    // begin writting the NEXUS file
1002                    ////////////////////////////////
1003                    try {
1004                            FileWriter fWriter = new FileWriter(file);
1005                            BufferedWriter bWriter = new BufferedWriter(fWriter);
1006                            
1007                            bWriter.write("#NEXUS\n\n");
1008                            
1009                            // include query as comments at top of NEXUS file
1010                            bWriter.write("[\nCRIMSON QUERY:\n");
1011                            bWriter.write(query.toString());
1012                            bWriter.write("]\n\n");
1013                            
1014                            ArrayList species =  incSequence ? treeNew.getSpeciesList() : treeNew.getLeaves(); 
1015                            int ntax = species.size();
1016    
1017                            // if no partitions then no data
1018                            if (partitions.size() > 0) {
1019                                    // print character data header
1020                                    bWriter.write("BEGIN DATA;\n");
1021                                    bWriter.write("  DIMENSIONS NTAX=" + ntax + " NCHAR=" + nchar + ";\n");
1022                                    if (partitions.size() > 1) bWriter.write("  FORMAT DATATYPE=rna GAP=- INTERLEAVE;\n");
1023                                    else bWriter.write("  FORMAT DATATYPE=rna GAP=-;\n");
1024                                    bWriter.write("  MATRIX\n");
1025                                    
1026                                    // process sequence data
1027                                    for (int iPartition = 0; iPartition < partitions.size(); iPartition++) {
1028                                            String partitionID = (String) partitions.get(iPartition);
1029                                            
1030                                            // if more than 1 partition, then add a comment
1031                                            // containing the name of the partition to help
1032                                            // deliniate the interleaved sections
1033                                            if (partitions.size() > 1) bWriter.write(" [" + partitionID.toUpperCase() + "]\n");
1034                                            
1035                                            // process species in chunks of size SPECIES_CHUNK
1036                                            for (int iSpecies = 0; iSpecies < ntax; iSpecies += SPECIES_CHUNK) {
1037                                                    int endChunk = iSpecies + SPECIES_CHUNK;
1038                                                    if (endChunk > ntax) endChunk = ntax;
1039                                                    
1040                                                    // build query
1041                                                    String sql = "SELECT species_id, sequence FROM PART_DATA WHERE partition_id = '" + partitionID + "'";
1042                                                    sql += " and (species_id = '" + ((Species) species.get(iSpecies)).getID() + "'";
1043                                                    for (int iChunk = iSpecies + 1; iChunk < endChunk; iChunk++) {
1044                                                            sql += " or species_id = '" + ((Species) species.get(iChunk)).getID() + "'";
1045                                                    }
1046                                                    sql += ")";
1047    
1048                                                    // *** DEBUGGING ***
1049                                                    //FileIO.writeString(file+".debug", true, sql + "\n");
1050                                                    // *** DEBUGGING ***
1051    
1052                                                    ArrayList results = Database.sqlToArray(sql);
1053                                                    
1054                                                    if (results == null) {
1055                                                            CrimsonUtils.printError("NEXUS file incomplete. Unable to get partition data: " + sql);
1056                                                            bWriter.flush();
1057                                                            bWriter.close();
1058                                                            fWriter.close();
1059                                                            return null;
1060                                                    }
1061                                                    
1062                                                    for (Iterator rows = results.iterator(); rows.hasNext();) {
1063                                                            ArrayList row = (ArrayList) rows.next();
1064                                                            String data = Database.readClob(row.get(1));
1065                                                            
1066                                                            if ((sampledPositions.size() > 0) && (sampledPositions.get(iPartition) != null)) {
1067                                                                    // only include positions in sampledPositions[]
1068                                                                    StringBuffer sampledData = new StringBuffer();
1069                                                                    TreeSet sampPos = (TreeSet) sampledPositions.get(iPartition);
1070                                                                    for (Iterator i = sampPos.iterator(); i.hasNext();) {
1071                                                                            sampledData.append(data.charAt(((Integer) i.next()).intValue()));
1072                                                                    }
1073                                                                    data = sampledData.toString();
1074                                                            }
1075                                                            bWriter.write("  " + String.valueOf(row.get(0)) + "   " + data + "\n");
1076                                                    }
1077                                            }
1078                                            
1079                                            // put in a space between partitions
1080                                            bWriter.write("\n");
1081                                    }
1082                                    bWriter.write(";\nEND;\n\n");
1083                                    // done printing DATA block
1084                                    
1085                                    if (incCrimson) {
1086                                            // print crimson data header
1087                                            bWriter.write("BEGIN CRIMSON;\n");
1088                                            bWriter.write("  MATRIX\n");
1089                                            
1090                                            // process structure data
1091                                            for (int iPartition = 0; iPartition < partitions.size(); iPartition++) {
1092                                                    String partitionID = (String) partitions.get(iPartition);
1093                                                    
1094                                                    // if more than 1 partition, then add a comment
1095                                                    // containing the name of the partition to help
1096                                                    // deliniate the interleaved sections
1097                                                    if (partitions.size() > 1) bWriter.write(" [" + partitionID.toUpperCase() + "]\n");
1098                                                    
1099                                                    // process species in chunks of size SPECIES_CHUNK
1100                                                    for (int iSpecies = 0; iSpecies < ntax; iSpecies += SPECIES_CHUNK) {
1101                                                            int endChunk = iSpecies + SPECIES_CHUNK;
1102                                                            if (endChunk > ntax) endChunk = ntax;
1103                                                            
1104                                                            // build query
1105                                                            String sql = "SELECT species_id, structure FROM PART_DATA WHERE partition_id = '" + partitionID + "'";
1106                                                            sql += " and (species_id = '" + ((Species) species.get(iSpecies)).getID() + "'";
1107                                                            for (int iChunk = iSpecies + 1; iChunk < endChunk; iChunk++) {
1108                                                                    sql += " or species_id = '" + ((Species) species.get(iChunk)).getID() + "'";
1109                                                            }
1110                                                            sql += ")";
1111                                                            
1112                                                            ArrayList results = Database.sqlToArray(sql);
1113                                                            
1114                                                            if (results == null) {
1115                                                                    CrimsonUtils.printError("NEXUS file incomplete. Unable to get partition data: " + sql);
1116                                                                    bWriter.flush();
1117                                                                    bWriter.close();
1118                                                                    fWriter.close();
1119                                                                    return null;
1120                                                            }
1121                                                            
1122                                                            for (Iterator rows = results.iterator(); rows.hasNext();) {
1123                                                                    ArrayList row = (ArrayList) rows.next();
1124                                                                    String data = Database.readClob(row.get(1));
1125                                                                    
1126                                                                    if ((sampledPositions.size() > 0) && (sampledPositions.get(iPartition) != null)) {
1127                                                                            // only include positions in sampledPositions[]
1128                                                                            StringBuffer sampledData = new StringBuffer();
1129                                                                            TreeSet sampPos = (TreeSet) sampledPositions.get(iPartition);
1130                                                                            for (Iterator i = sampPos.iterator(); i.hasNext();) {
1131                                                                                    sampledData.append(data.charAt(((Integer) i.next()).intValue()));
1132                                                                            }
1133                                                                            data = sampledData.toString();
1134                                                                    }
1135                                                                    bWriter.write("  " + String.valueOf(row.get(0)) + "   " + data + "\n");
1136                                                            }
1137                                                    }
1138                                                    
1139                                                    // put in a space between partitions
1140                                                    bWriter.write("\n");
1141                                            }
1142                                            bWriter.write(";\nEND;\n\n");
1143                                    } // done printing CRIMSON block
1144    
1145                                    if (partitions.size() > 1) {
1146                                            bWriter.write("BEGIN SETS;\n");
1147                                            bWriter.write("  CharPartition partition = \n");
1148                                            bWriter.write(charPartition.toString());
1149                                            bWriter.write("END;\n");
1150                                    }
1151                            } else {
1152                                    // the taxa are currently being loaded from the DATA
1153                                    // block. if there is no DATA block then we need to
1154                                    // add the TAXA block so that the TREES block will bel
1155                                    // properly loaded.  We currently only include the
1156                                    // TAXA block if no DATA block, because we can't
1157                                    // easily control the order of the species in the DATA
1158                                    // block and this order needs to be the same in the
1159                                    // DATA and TAXA blocks.
1160                                    
1161                                    // insert TAXA block
1162                                    bWriter.write("BEGIN TAXA;\n");
1163                                    bWriter.write("  DIMENSIONS NTAX=" + ntax + ";\n");
1164                                    bWriter.write("  TAXLABELS\n");
1165                                    for (int iSpecies = 0; iSpecies < ntax; iSpecies++) {
1166                                            bWriter.write("    " + ((Species) species.get(iSpecies)).getID() + "\n");
1167                                    }
1168                                    bWriter.write(";\nEND;\n\n");
1169                            }
1170                            
1171                            switch (incTree) {
1172                            case 1:
1173                                    // insert TREES block.  This is placed after the data
1174                                    // so that the taxa can be loaded from the DATA block.
1175                                    bWriter.write("BEGIN TREES;\n");
1176                                    // make sure queryID is properly quoted
1177                                    bWriter.write("  TREE " + NexusFile.quote(queryID) + " = " + treeNew.toNewick(incSequence) + ";\n");
1178                                    bWriter.write("END;\n\n");
1179                                    break;
1180                            case 2:
1181                                    FileWriter fWriter_t = new FileWriter(file + ".tree");
1182                                    BufferedWriter bWriter_t = new BufferedWriter(fWriter_t);
1183                                    // write newick to NEXUS file
1184                                    bWriter_t.write("#NEXUS;\n\n");
1185                                    bWriter_t.write("BEGIN TREES;\n");
1186                                    // make sure queryID is properly quoted
1187                                    bWriter_t.write("  TREE " + NexusFile.quote(queryID) + " = " + treeNew.toNewick(incSequence) + ";\n");
1188                                    bWriter_t.write("END;\n\n");
1189                                    bWriter_t.flush();
1190                                    bWriter_t.close();
1191                                    fWriter_t.close();
1192                                    break;
1193                            case 3:
1194                            case 4:
1195                                    fWriter_t = new FileWriter(file + ".newick");
1196                                    bWriter_t = new BufferedWriter(fWriter_t);
1197                                    // output newick rooted
1198                                    if (incTree == 3) bWriter_t.write(treeNew.toNewick(incSequence, true) + ";\n");
1199                                    // output newick unrooted
1200                                    else bWriter_t.write(treeNew.toNewick(incSequence, false) + ";\n");
1201                                    bWriter_t.flush();
1202                                    bWriter_t.close();
1203                                    fWriter_t.close();
1204                                    break;
1205                            } // done printing TREES block
1206                            
1207                            if (incNotes) {
1208                                    // insert Query.notes. It is assumed that the notes
1209                                    // are properly formatted for the NEXUS file.
1210                                    bWriter.write(query.getNotes() + "\n");
1211                            } // done printing TREES block
1212                            
1213                            bWriter.flush();
1214                            bWriter.close();
1215                            fWriter.close();
1216                    } catch (SQLException e) { 
1217                            CrimsonUtils.printError("SQL error processing CLOB.");
1218                            CrimsonUtils.printError(e.getMessage());
1219                            return null;
1220                    } catch (FileNotFoundException e) { // problem with FileOutputStream
1221                            CrimsonUtils.printError("File \"" + filename + "\" can not be opened.");
1222                            CrimsonUtils.printError(e.getMessage());
1223                            return null;
1224                    } catch (IOException e) { // problem with ObjectOutputStream.  
1225                            // XXX do we need to close bWriter()?
1226                            CrimsonUtils.printError("Error writting output file \"" + filename + "\".");
1227                            CrimsonUtils.printError(e.getMessage());
1228                            return null;
1229                    }
1230                    
1231                    CrimsonUtils.printMsg("Query finished.");
1232    
1233                    return treeNew;
1234            }
1235            
1236            /**
1237             * This will perform a tree query, generating a phylip formatted
1238             * file. 
1239             * @param queryID string ID for query to be run
1240             * @param filename filename for NEXUS output
1241             * @param incTree There are multiple options dictated by incTree: 0 = no
1242             * tree will be output; 1 = no tree will be output; 2 =
1243             * tree ouptut to second NEXUS file ("*.tree"); 3 = rooted tree
1244             * ouptut to a newick file ("*.newick"); 4 = unrooted tree ouptut
1245             * to a newick file ("*.newick")
1246             * @return The found tree object is returned (null when errors).
1247            */
1248            public static Tree runPhylip(String queryID, String filename, int incTree) {
1249                    if (! Database.isOpen()) {
1250                            CrimsonUtils.printError("Must open a database before running a query.");
1251                            return null;
1252                    }
1253                    
1254                    if (CrimsonUtils.isEmpty(filename)) {
1255                            CrimsonUtils.printError("No PHYLIP file specified.");
1256                            return null;
1257                    }
1258                    File file = new File(filename);
1259                    
1260                    Query query = ObjectHandles.getQuery(queryID);
1261                    if (query == null) {
1262                            CrimsonUtils.printError("Query not found.");
1263                            return null;
1264                    }
1265                    
1266                    if (CrimsonUtils.isEmpty(query.getTreeID())) {
1267                            CrimsonUtils.printError("Tree not specified within query.");
1268                            return null;
1269                    }
1270                    
1271                    Tree treeOrig = ObjectHandles.getTree(query.getTreeID());
1272                    if (treeOrig == null) {
1273                            CrimsonUtils.printError("Tree does not exist: " + query.getTreeID());
1274                            return null;
1275                    }
1276                    
1277                    // set seed prior to sampling tree
1278                    if (query.getSeed() > -1) CrimsonUtils.setRandomSeed(query.getSeed());
1279                    
1280                    // use Tree() routines to process the leaf selection.  With
1281                    // the new tree we can get the new newick string and species.
1282                    Tree treeNew;
1283                    switch (query.getLeafSelection()) {
1284                    case 1:  // random (numLeaves)
1285                            treeNew = treeOrig.randomSample(query.getNumLeaves());
1286                            if (treeNew == null) return null;
1287                            treeNew.computeStats();
1288                            break;
1289                    case 2:  // depth distributed (numLeaves, tempDepthThresh)
1290                            treeNew = treeOrig.uniformSampleByTempDepth(query.getTempDepthThresh(), query.getNumLeaves());
1291                            if (treeNew == null) return null;
1292                            treeNew.computeStats();
1293                            break;
1294                    case 3:  // depth weighted (numLeaves, tempDepthThresh)
1295                            treeNew = treeOrig.weightedSampleByTempDepth(query.getTempDepthThresh(), query.getNumLeaves());
1296                            if (treeNew == null) return null;
1297                            treeNew.computeStats();
1298                            break;
1299                    case 4:  // level distributed (numLeaves, levelThresh)
1300                            treeNew = treeOrig.uniformSampleByLevel(query.getLevelThresh(), query.getNumLeaves());
1301                            if (treeNew == null) return null;
1302                            treeNew.computeStats();
1303                            break;
1304                    case 5:  // level weighted (numLeaves, levelThresh)
1305                            treeNew = treeOrig.weightedSampleByLevel(query.getLevelThresh(), query.getNumLeaves());
1306                            if (treeNew == null) return null;
1307                            treeNew.computeStats();
1308                            break;
1309                    case 6:  // manual (leaves)
1310                            treeNew = treeOrig.manualSampleByID(new ArrayList(query.getLeaves()));
1311                            if (treeNew == null) return null;
1312                            treeNew.computeStats();
1313                            break;
1314                    default:
1315                            // this is equivalent to "case 0".  if tree not already
1316                            // built, then build it now
1317                            if (! treeOrig.isBuilt()) treeOrig.computeStats();
1318                            treeNew = treeOrig;
1319                    }
1320                    
1321                    // make sure the tree is valid
1322                    if (treeNew.getNumSpecies() < 1) {
1323                            CrimsonUtils.printError("Sampled tree has no leaves.");
1324                            return null;
1325                    }
1326                    
1327                    String nchar;
1328                    ArrayList partitions;  // this is an array to preserve order
1329                    
1330                    // used by seq selection methods 1-4, this will contain a list
1331                    // of bp positions to be included in the output.  these are
1332                    // arrays so that they can differ per partition, in case
1333                    // partitions very in size.
1334                    ArrayList sampledPositions = new ArrayList();
1335                    
1336                    // this will contain the partition data printed at the end of
1337                    // multi-partition PHYLIP files
1338                    StringBuffer charPartition = new StringBuffer();
1339                    
1340                    /////////////
1341                    // get the partitions to be included and the values for nchar,
1342                    // sampledPositions, and CharPartition
1343                    if ((query.getSequenceSelection() == 5) || (query.isOnlyStruct())) {
1344                            // only structure, so no partitions to be included
1345                            partitions = new ArrayList();
1346                            nchar = "0";
1347                    } else {
1348                            // figure out partition data to include
1349                            partitions = new ArrayList(query.getPartitions());
1350                            if ((partitions == null) || (partitions.size() == 0)) {
1351                                    // no partitions specificed, so assume 'ALL'.
1352                                    partitions = Database.sqlToArray("SELECT id FROM PARTITIONS WHERE tree_id = '" + treeOrig.getID() + "'");
1353                                    if (partitions == null) partitions = new ArrayList();
1354                            }
1355                            
1356                            TreeSet expandedPositions = null;
1357                            if ((query.getSequenceSelection() == 3) || (query.getSequenceSelection() == 4)) {
1358                                    // for manual selections, need to expand position
1359                                    // string to figure.  this is independent of the
1360                                    // partition
1361                                    expandedPositions = expandPositions(query.getPositions(), (query.getSequenceSelection() == 3));
1362                                    // if not able to expand then stop
1363                                    if (expandedPositions == null) return null;
1364                            }                                        
1365                            
1366                            // used to count num positions across partitions
1367                            int iLength = 0;
1368                            
1369                            // set up the array such that each partition will have
1370                            // it's own set of sampled positions
1371                            sampledPositions = new ArrayList();
1372                            
1373                            // make sure all partitions are valid and compute relevant
1374                            // sampling positions
1375                            for (int iPartition = 0; iPartition < partitions.size(); iPartition++) {
1376                                    String partitionID = (String) partitions.get(iPartition);
1377                                    
1378                                    // shouldn't need to test both database and partitionPool, but
1379                                    // it's better to be careful
1380                                    if ((! Partitions.dbContains(partitionID)) || (! ObjectHandles.containsPartition(partitionID))) {
1381                                            CrimsonUtils.printError("Partition doesn't exists: " + partitionID);
1382                                            return null;
1383                                    }
1384                                    
1385                                    // get partition length
1386                                    int pLength = (ObjectHandles.getPartition(partitionID)).getLength();
1387                                    if (pLength <= 0) {
1388                                            CrimsonUtils.printError("Partition (" + partitionID + ") has invalid length: " + pLength);
1389                                            return null;
1390                                    }
1391                                    
1392                                    if ((query.getSequenceSelection() == 3) || (query.getSequenceSelection() == 4)) {
1393                                            // manual selection
1394                                            if (partitions.size() > 1) {
1395                                                    // manual selection is only valid for a single
1396                                                    // data partition.
1397                                                    CrimsonUtils.printError("Can't manually select positions when using more than one data partition.");
1398                                                    return null;
1399                                            }
1400                                            
1401                                            // add valid positions to sampledPositions.  Do
1402                                            // this for every partition since partitions will
1403                                            // have different lengths.  Thus a valid position
1404                                            // for one partition may be invalid for
1405                                            // another. HOWEVER, only allow one partition to
1406                                            // be selected, so doing this here should not be
1407                                            // necessary.
1408                                            TreeSet sampPos = new TreeSet();
1409                                            for (Iterator iExpPos = expandedPositions.iterator(); iExpPos.hasNext();) {
1410                                                    Integer indx = (Integer) iExpPos.next();
1411                                                    if (indx.intValue() >= pLength) {
1412                                                            CrimsonUtils.printWarning("Ignoring out of bounds position:" + indx);
1413                                                    } else {
1414                                                            sampPos.add(indx);
1415                                                    }
1416                                            }
1417                                            
1418                                            // with manual selection, nchar is number of positions
1419                                            iLength = sampPos.size();
1420                                            
1421                                            sampledPositions.add(sampPos);
1422                                            
1423                                    } else {
1424                                            // compute random positions to be sampled. We do
1425                                            // this for each partition because the partition
1426                                            // lengths may be different.
1427                                            if (query.getSequenceSelection() == 1) {  // random codon sampling
1428                                                    sampledPositions.add(getRandomPositions(pLength, query.getNumPositions(), true));
1429                                            } else if (query.getSequenceSelection() == 2) {  // random bp sampling
1430                                                    sampledPositions.add(getRandomPositions(pLength, query.getNumPositions(), false));
1431                                            }
1432                                            
1433                                            // this is the case for seq selection 1 and 2
1434                                            int numPositions = query.getNumPositions();
1435                                            // seq selection 0 is select all
1436                                            if (query.getSequenceSelection() == 0) numPositions = pLength;
1437                                            
1438                                            // count num of positions to include per partition
1439                                            if (pLength < numPositions) {
1440                                                    // warn user that partition length is smaller
1441                                                    // than requested number of positions
1442                                                    String msg = "Partition '" + partitionID + "' does not contain '"; 
1443                                                    msg += numPositions + "' positions as requested.";
1444                                                    CrimsonUtils.printWarning(msg);
1445                                                    
1446                                                    // setup charPartition string for printing at
1447                                                    // end of NEXUS file
1448                                                    charPartition.append("    " + NexusFile.quote(partitionID) + ":");
1449                                                    charPartition.append((iLength + 1) + "-" + (iLength + pLength) + "\n");
1450                                                    charPartition.append((iPartition + 1 < partitions.size()) ? "," : ";");
1451                                                    
1452                                                    iLength += pLength;
1453                                            } else {
1454                                                    // setup charPartition string for printing at
1455                                                    // end of NEXUS file
1456                                                    charPartition.append("    " + NexusFile.quote(partitionID) + ":");
1457                                                    charPartition.append((iLength + 1) + "-" + (iLength + numPositions) + "\n");
1458                                                    charPartition.append((iPartition + 1 < partitions.size()) ? "," : ";");
1459                                                    
1460                                                    iLength += numPositions;
1461                                            }
1462                                    }
1463                            }
1464                            
1465                            // convert integer nchar value to string which will be
1466                            // used below
1467                            nchar = String.valueOf(iLength);
1468                    }
1469                    
1470                    ////////////////////////////////
1471                    // begin writting the PHYLIP file
1472                    ////////////////////////////////
1473                    try {
1474                            FileWriter fWriter = new FileWriter(file);
1475                            BufferedWriter bWriter = new BufferedWriter(fWriter);
1476                            
1477                            ArrayList species =  treeNew.getLeaves(); 
1478                            int ntax = species.size();
1479                            String tmpS = "";
1480                            int padL = 0;
1481                            
1482                            // if no partitions then no data
1483                            if (partitions.size() > 0) {
1484                                    // print character data header
1485                                    bWriter.write(ntax + " " + nchar + "\n");
1486                                    
1487                                    // process sequence data
1488                                    for (int iPartition = 0; iPartition < partitions.size(); iPartition++) {
1489                                            String partitionID = (String) partitions.get(iPartition);
1490                                            
1491                                            // process species in chunks of size SPECIES_CHUNK
1492                                            for (int iSpecies = 0; iSpecies < ntax; iSpecies += SPECIES_CHUNK) {
1493                                                    int endChunk = iSpecies + SPECIES_CHUNK;
1494                                                    if (endChunk > ntax) endChunk = ntax;
1495                                                    
1496                                                    // build query
1497                                                    String sql = "SELECT species_id, sequence FROM PART_DATA WHERE partition_id = '" + partitionID + "'";
1498                                                    sql += " and (species_id = '" + ((Species) species.get(iSpecies)).getID() + "'";
1499                                                    for (int iChunk = iSpecies + 1; iChunk < endChunk; iChunk++) {
1500                                                            sql += " or species_id = '" + ((Species) species.get(iChunk)).getID() + "'";
1501                                                    }
1502                                                    sql += ")";
1503                                                    
1504                                                    ArrayList results = Database.sqlToArray(sql);
1505                                                    
1506                                                    if (results == null) {
1507                                                            CrimsonUtils.printError("PHYLIP file incomplete. Unable to get partition data: " + sql);
1508                                                            bWriter.flush();
1509                                                            bWriter.close();
1510                                                            fWriter.close();
1511                                                            return null;
1512                                                    }
1513                                                    
1514                                                    for (Iterator rows = results.iterator(); rows.hasNext();) {
1515                                                            ArrayList row = (ArrayList) rows.next();
1516                                                            String data = Database.readClob(row.get(1));
1517                                                            
1518                                                            if ((sampledPositions.size() > 0) && (sampledPositions.get(iPartition) != null)) {
1519                                                                    // only include positions in sampledPositions[]
1520                                                                    StringBuffer sampledData = new StringBuffer();
1521                                                                    TreeSet sampPos = (TreeSet) sampledPositions.get(iPartition);
1522                                                                    for (Iterator i = sampPos.iterator(); i.hasNext();) {
1523                                                                            sampledData.append(data.charAt(((Integer) i.next()).intValue()));
1524                                                                    }
1525                                                                    data = sampledData.toString();
1526                                                            }
1527                                                            
1528                                                            tmpS = String.valueOf(row.get(0));
1529                                                            if (tmpS.length() > 10) {
1530                                                                    // can't have species labels larger than 10 char
1531                                                                    CrimsonUtils.printError("Species label (" + row.get(0) + ") to large for PHYLIP format.");
1532                                                                    return null;
1533                                                            }
1534                                                            padL = 10 - tmpS.length();
1535                                                            for (int i = 0; i < padL; i++) tmpS += ' ';
1536                                                            bWriter.write(tmpS + data + "\n");
1537                                                    }
1538                                            }
1539                                            
1540                                            // put in a space between partitions
1541                                            bWriter.write("\n");
1542                                    }
1543                                    
1544                            }
1545                            
1546                            switch (incTree) {
1547                            case 2: 
1548                                    FileWriter fWriter_t = new FileWriter(file + ".tree");
1549                                    BufferedWriter bWriter_t = new BufferedWriter(fWriter_t);
1550                                    // write newick to NEXUS file
1551                                    bWriter_t.write("#NEXUS;\n\n");
1552                                    bWriter_t.write("BEGIN TREES;\n");
1553                                    // make sure queryID is properly quoted
1554                                    bWriter_t.write("  TREE " + NexusFile.quote(queryID) + " = " + treeNew.toNewick(false) + ";\n");
1555                                    bWriter_t.write("END;\n\n");
1556                                    bWriter_t.flush();
1557                                    bWriter_t.close();
1558                                    fWriter_t.close();
1559                                    break;
1560                            case 3:
1561                            case 4:
1562                                    fWriter_t = new FileWriter(file + ".newick");
1563                                    bWriter_t = new BufferedWriter(fWriter_t);
1564                                    // output newick rooted
1565                                    if (incTree == 3) bWriter_t.write(treeNew.toNewick(false, true) + ";\n");
1566                                    // output newick unrooted
1567                                    else bWriter_t.write(treeNew.toNewick(false, false) + ";\n");
1568                                    bWriter_t.flush();
1569                                    bWriter_t.close();
1570                                    fWriter_t.close();
1571                                    break;
1572                            } // done printing TREES block
1573                            
1574                            bWriter.flush();
1575                            bWriter.close();
1576                            fWriter.close();
1577                    } catch (SQLException e) { 
1578                            CrimsonUtils.printError("SQL error processing CLOB.");
1579                            CrimsonUtils.printError(e.getMessage());
1580                            return null;
1581                    } catch (FileNotFoundException e) { // problem with FileOutputStream
1582                            CrimsonUtils.printError("File \"" + filename + "\" can not be opened.");
1583                            CrimsonUtils.printError(e.getMessage());
1584                            return null;
1585                    } catch (IOException e) { // problem with ObjectOutputStream.  
1586                            // XXX do we need to close bWriter()?
1587                            CrimsonUtils.printError("Error writting output file \"" + filename + "\".");
1588                            CrimsonUtils.printError(e.getMessage());
1589                            return null;
1590                    }
1591                    
1592                    CrimsonUtils.printMsg("Query finished.");
1593    
1594                    return treeNew;
1595            }
1596            
1597            private static TreeSet getRandomPositions(int partitionLength, int numSamples, boolean isCodons) {
1598                    // sampledPositions goes from 0 to ParititionLength - 1
1599                    TreeSet sampledPositions = new TreeSet();
1600                    
1601                    // used to store the positions selected so that we can print
1602                    // them out for debugging
1603                    TreeSet posSelected = new TreeSet();
1604                    
1605                    // set up sampledPositions for sequence selection
1606                    // methods 1 and 2
1607                    if (isCodons) { // random codon sampling
1608                            // if more samples than in the partition, then return null
1609                            // which will cause all positions to be included
1610                            if ((numSamples * 3) >= partitionLength) return null;
1611                            
1612                            int numCodons = partitionLength / 3;
1613                            
1614                            // flag if last codon isn't a complete codon
1615                            int remainder = partitionLength % 3;
1616                            if (remainder > 0) numCodons++;
1617                            
1618                            // we need to select random positions,
1619                            // non-repetitively. to make this simple, we use an array
1620                            // and just remove items as they are selected.
1621                            ArrayList tmpList = new ArrayList(numCodons);
1622                            for (int i = 0; i < numCodons; i++) { tmpList.add(new Integer(i)); }
1623                            
1624                            int iEnd = (numSamples * 3);
1625                            while (sampledPositions.size() < iEnd) {
1626                                    int iRand = CrimsonUtils.random.nextInt(tmpList.size());
1627                                    int value = ((Integer) tmpList.get(iRand)).intValue();
1628                                    
1629                                    if (DEBUG_OUTPUT) posSelected.add(new Integer(value + 1));
1630                                    
1631                                    // test is selected last (partial codon)
1632                                    if ((remainder > 0) && (value == numCodons - 1)) {
1633                                            sampledPositions.add(new Integer(value * 3));
1634                                            
1635                                            if (remainder == 2) { 
1636                                                    sampledPositions.add(new Integer(value * 3 + 1));
1637                                                    iEnd -= 1; // 1 fewer position to sample
1638                                            } else {
1639                                                    iEnd -= 2; // 2 fewer positions to sample
1640                                            }
1641                                            
1642                                            remainder = 0; // no longer a remainder
1643                                    } else {                                                                                  
1644                                            sampledPositions.add(new Integer(value * 3));
1645                                            sampledPositions.add(new Integer(value * 3 + 1));
1646                                            sampledPositions.add(new Integer(value * 3 + 2));
1647                                    }
1648                                    
1649                                    tmpList.remove(iRand);
1650                            }
1651                            
1652                    } else { // random sampling
1653                            // if more samples than in the partition, then return null
1654                            // which will cause all positions to be included
1655                            if (numSamples >= partitionLength) return null;
1656                            
1657                            // we need to select random positions,
1658                            // non-repetitively. to make this simple, we use an array
1659                            // and just remove items as they are selected.
1660                            ArrayList tmpList = new ArrayList(partitionLength);
1661                            for (int i = 0; i < partitionLength; i++) { tmpList.add(new Integer(i)); }
1662                            
1663                            for (int i = 0; i < numSamples; i++) {
1664                                    int iRand = CrimsonUtils.random.nextInt(tmpList.size());
1665                                    int value = ((Integer) tmpList.get(iRand)).intValue();
1666                                    
1667                                    if (DEBUG_OUTPUT) posSelected.add(new Integer(value + 1));
1668                                    
1669                                    // sampled positions go from 0 to N-1
1670                                    sampledPositions.add(new Integer(value));
1671                                    
1672                                    tmpList.remove(iRand);
1673                            }
1674                    }
1675                    
1676                    // print the positions to the output file 'positions.list' for
1677                    // debugging
1678                    if (DEBUG_OUTPUT) FileIO.printPositions(posSelected);
1679                    
1680                    return sampledPositions;
1681            }
1682            
1683            /** 
1684             * This will convert a set of bp ranges to bp positions.  If the
1685             * set is codons, the underlying bp positions for the codons we be
1686             * used.  Note that the user enters positions going from (1 to N)
1687             * but when we access the data we go from (0 to N-1).  So we shift
1688             * the user specified positions down by 1.
1689             */
1690            public static TreeSet expandPositions(HashSet positionSet, boolean isCodons) {
1691                    if ((positionSet == null) || (positionSet.size() == 0)) {
1692                            CrimsonUtils.printError("Empty position list.");
1693                            return null;
1694                    }
1695                    
1696                    TreeSet sampledPositions = new TreeSet();
1697                    
1698                    // used for debugging
1699                    TreeSet posSelected = new TreeSet();
1700                    
1701                    for (Iterator i = positionSet.iterator(); i.hasNext();) {
1702                            String position = (String) i.next();
1703                            
1704                            if (position.contains("-")) {
1705                                    String[] range = position.split("-");
1706                                    
1707                                    if (range.length != 2) {   // make sure range is valid
1708                                            CrimsonUtils.printError("Incorrect range in position list: " + position);
1709                                            return null;
1710                                    }
1711                                    
1712                                    int iStart;
1713                                    int iEnd;
1714                                    if (isCodons) {
1715                                            // the range goes from start to end, inclusively
1716                                            iStart = (Integer.parseInt(range[0]) - 1) * 3;
1717                                            iEnd = (Integer.parseInt(range[1]) * 3);
1718                                            
1719                                            // this is added here to simplify things because
1720                                            // we want the codon start positions, not the
1721                                            // positions of each bp within the codons - which
1722                                            // is what is added to the 'postitions' array
1723                                            if (DEBUG_OUTPUT) {
1724                                                    int tmpI = Integer.parseInt(range[1]);
1725                                                    for (int cnt = Integer.parseInt(range[0]); cnt < tmpI; cnt++) {
1726                                                            posSelected.add(new Integer(cnt));
1727                                                    }
1728                                            }
1729                                    } else {
1730                                            // the range goes from start to end, inclusively
1731                                            iStart = Integer.parseInt(range[0]) - 1;
1732                                            iEnd = Integer.parseInt(range[1]);
1733                                            
1734                                            if (DEBUG_OUTPUT) {
1735                                                    int tmpI = Integer.parseInt(range[1]);
1736                                                    for (int cnt = Integer.parseInt(range[0]); cnt < tmpI; cnt++) {
1737                                                            posSelected.add(new Integer(cnt));
1738                                                    }
1739                                            }
1740                                    }
1741                                    
1742                                    if (iStart < 0) {   // make sure range is valid
1743                                            CrimsonUtils.printError("Invalid position list. Positions start at 1.");
1744                                            return null;
1745                                    }
1746                                    
1747                                    for (int cnt = iStart; cnt < iEnd; cnt++) {
1748                                            sampledPositions.add(new Integer(cnt));
1749                                    }
1750                            } else {
1751                                    if (DEBUG_OUTPUT) posSelected.add(new Integer(position));
1752                                    
1753                                    if (isCodons) {
1754                                            int iEnd = (Integer.parseInt(position) * 3) - 1;
1755                                            for (int cnt = iEnd - 2; cnt <= iEnd; cnt++) {
1756                                                    sampledPositions.add(new Integer(cnt));
1757                                            }
1758                                    } else {
1759                                            sampledPositions.add(new Integer(Integer.parseInt(position) - 1));
1760                                    }
1761                            }
1762                    }
1763                    
1764                    // print the positions to the output file 'positions.list' for
1765                    // debugging
1766                    if (DEBUG_OUTPUT) FileIO.printPositions(posSelected);
1767                    
1768                    return sampledPositions;
1769            }
1770            
1771            /** 
1772             * Returns the non-Clob columns in the QUERIES table. Each table
1773             * record will be separated by a '\n' in the output.
1774             */
1775            public String toString() {
1776                    if (! Database.isOpen()) {
1777                            CrimsonUtils.printError("Can list queries, no open database.");
1778                            return "";
1779                    }
1780                    
1781                    // SELECT id, leaf_select, num_leaves, temp_depth_thresh, level_thresh, seq_select, num_pos, seed, notes FROM queries;
1782                    String sql = "SELECT ID, LEAF_SELECT, NUM_LEAVES, TEMP_DEPTH_THRESH, LEVEL_THRESH, SEQ_SELECT, NUM_POS, SEED, NOTES FROM QUERIES";
1783                    ArrayList results = Database.sqlToArray(sql);
1784                    
1785                    if (results == null) {
1786                            CrimsonUtils.printError("Error printing QUERIES table.");
1787                            return "";
1788                    }
1789                    
1790                    StringBuffer out = new StringBuffer("");
1791                    out.append("ID \t LEAF_SELECT \t NUM_LEAVES \t TEMP_DEPTH_THRESH \t LEVEL_THRESH \t SEQ_SELECT \t NUM_POS \t SEED \t NOTES\n");
1792                    for (Iterator rows = results.iterator(); rows.hasNext();) {
1793                            ArrayList row = (ArrayList) rows.next();
1794                            for (int i = 0; i < 7; i++) out.append(String.valueOf(row.get(i)) + " \t ");
1795                            out.append(String.valueOf(row.get(7)) + "\n");
1796                    }
1797                    
1798                    return out.toString().trim();
1799            }
1800            
1801    } // Queries.java