001    /*
002     * CRIMSON
003     * Copyright (c) 2006, Stephen Fisher, Susan Davidson, and Junhyong Kim, 
004     * University of Pennsylvania.
005     *
006     * This program is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU General Public License as
008     * published by the Free Software Foundation; either version 2 of the
009     * License, or (at your option) any later version.
010     *
011     * This program is distributed in the hope that it will be useful, but
012     * WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019     * 02110-1301 USA.
020     *
021     * @(#)NexusFile.java
022     */
023    
024    package edu.upenn.crimson.io;
025    
026    import edu.upenn.crimson.*;
027    import java.util.ArrayList;
028    import java.util.Iterator;
029    import java.util.regex.Pattern;
030    import java.io.*;
031    import java.sql.Clob;
032    import java.sql.SQLException;
033    
034    /**
035     * Functions related to loading NEXUS files.
036     *
037     * @XXX Should see if can optimize db access by maintaining one
038     * Statement that is created every time a database connection is made.
039     *
040     * @author  Stephen Fisher
041     * @version $Id: NexusFile.java,v 1.54 2007/06/14 19:48:42 fisher Exp $
042     */
043    
044    public class NexusFile {
045        //--------------------------------------------------------------------------
046        // Miscellaneous Methods
047    
048             /** 
049              * Loads a tree structure from a Nexus file into memory.  This is
050              * meant to be used for debugging.
051              */
052             public static Tree loadNewick(String filename) {
053                      if (CrimsonUtils.isEmpty(filename)) return null;
054                      File file = new File(filename);
055                      if ((! file.exists()) || (! file.isFile())) {
056                                    CrimsonUtils.printError("Invalid NEXUS file: " + filename);
057                                    return null;
058                      }
059                      
060                      try {
061                                    BufferedReader bReader = new BufferedReader(new FileReader(file));
062                      
063                                    String line;
064                                    Tree tree = null;
065                                    while ((line = bReader.readLine()) != null) {
066                                             // remove any leading white space
067                                             line = line.trim();
068                                             
069                                             // skip comment lines
070                                             if (line.startsWith("tree") || line.startsWith("TREE")) {
071                                                      int start = line.indexOf("=") + 1;
072                                                      line = line.substring(start);
073    
074                                                      // use filename as tree's ID
075                                                      tree = parseNewick(new Tree(file.getName(), false), line);
076                                                      
077                                                      break;
078                                             }
079                                    }
080    
081                                    bReader.close();
082    
083                                    return tree;
084                      } catch (FileNotFoundException e) {
085                                    CrimsonUtils.printError("File not found: " + e.getMessage());
086                                    CrimsonUtils.printError(e.getMessage());
087                                    return null;
088                      } catch (IOException e) {
089                                    CrimsonUtils.printError("Error reading file: " + filename);
090                                    CrimsonUtils.printError(e.getMessage());
091                                    return null;
092                      }
093             }
094    
095             /** Keep incrementing until we find the end of the number or label. */
096             private static int findLabelEnd(String newick, int start) {
097                      int end = start;
098                      int lenNewick = newick.length();
099                      char c = newick.charAt(++end);
100                      while ((c != ',') && (c != ')') && (c != '(') && (c != ' ') && (c != ';') && (c != ':') && (end < lenNewick-1)) {
101                                    c = newick.charAt(++end);
102                      }
103                      return end;
104             }
105    
106             /** 
107              * This might be faster if we use char[] instead of String for
108              * "newick". This will break if there are blank spaces (' ') in
109              * the tree structure.
110              */
111             public static Tree parseNewick(Tree tree) {
112                      return parseNewick(tree, tree.getNewick());
113             }
114    
115             /** 
116              * This might be faster if we use char[] instead of String for
117              * "newick". This will break if there are blank spaces (' ') in
118              * the tree structure.
119              */
120             public static Tree parseNewick(Tree tree, String newick) {
121                      // make sure there isn't already a tree
122                      tree.setRoot(null);
123    
124                      Species curSpecies = null;
125                      Species curParent = null;
126                      int id = 0;
127    
128                      int lenNewick = newick.length() - 1;
129    
130                      /* check if parenthesis is matching */
131                      int cnt = 0;
132                      for (int i = 0; i < lenNewick; i++){
133                                    if (newick.charAt(i) == '(') cnt++;
134                                    else if (newick.charAt(i) == ')') cnt--;
135                      }
136                      if (cnt != 0){
137                                    CrimsonUtils.printError("Unbalanced parenthesis in the input tree string");
138                                    return null;
139                      }
140    
141                      int head = 0;
142                      int headSave = -1;
143                      while (head < lenNewick) {
144                                    char c = newick.charAt(head);
145    
146                                    if (c == ' ') { // skip blank spaces
147                                             head++;
148                                             continue;
149                                    }
150    
151                                    if (c == '(') { // new species
152                                             // ID will be changed below, when we read the ID value
153                                             Species newSpecies = new Species("T-" + String.valueOf(id++), tree);
154    
155                                             if (tree.getRoot() == null) { // first node is root
156                                                      tree.setRoot(newSpecies);
157                                                      curParent = null;
158                                             } else {
159                                                      if (newick.charAt(head - 1) == '(') { // first child
160                                                                    curSpecies.addChild(newSpecies);
161                                                      } else {
162                                                                    curParent.addChild(newSpecies);
163                                                      }
164                                             }
165    
166                                             curSpecies = newSpecies;
167                                             head++;
168                                             continue;
169                                    }
170    
171                                    if (c == ')') { // end of child list
172                                             if (curSpecies == null) {
173                                                      CrimsonUtils.printError("Unbalanced nodes in tree.");
174                                                      return null;
175                                             }
176    
177                                             curSpecies = curSpecies.getParent(); // up one level
178                                             curParent = curSpecies.getParent();
179                                             head++;
180                                             continue;
181                                    }
182    
183                                    if (c == ':') { // get stem length
184                                             headSave = head;
185                                             head = findLabelEnd(newick, head + 1);
186                                             curSpecies.setStemLength(Double.parseDouble(newick.substring(headSave + 1, head)));
187                                             continue;
188                                    }
189    
190                                    if (c == ';') { // end of tree string
191                                             return tree;
192                                    }
193    
194                                    if (c == ',') { // next child
195                                             head++;
196                                             continue;
197                                    }
198    
199                                    // found label
200                                    if ((head == 0) || (newick.charAt(head - 1) != ')')) { // leaf
201                                             headSave = head;
202                                             head = findLabelEnd(newick, head);
203                                             Species newSpecies = new Species(newick.substring(headSave, head), tree);
204    
205                                             if (newick.charAt(headSave - 1) == '(') { // first child
206                                                      curParent = curSpecies;
207                                             }
208                                             curParent.addChild(newSpecies);
209                                             curSpecies = newSpecies;
210    
211                                    } else { // label for an inner node
212                                             headSave = head;
213                                             head = findLabelEnd(newick, head);
214                                             curSpecies.setID(newick.substring(headSave, head));
215                                    }
216                      }
217    
218                      return tree;
219             }
220    
221             /**
222              * Loads a Nexus file.  This will not load the tree if the tree
223              * already exist in the database.
224              */
225             public static Tree load(String filename, String treeID, String partitionID) {
226                      if (CrimsonUtils.isEmpty(filename)) return null;
227                      File file = new File(filename);
228                      if ((file == null) || (! file.exists()) || (! file.isFile())) {
229                                    CrimsonUtils.printError("Invalid NEXUS file: " + filename);
230                                    return null;
231                      }
232                      
233                      if (CrimsonUtils.isEmpty(treeID)) {
234                                    CrimsonUtils.printError("No tree specified.");
235                                    return null;
236                      }
237                      if (CrimsonUtils.isEmpty(partitionID)) {
238                                    CrimsonUtils.printError("No partition specified.");
239                                    return null;
240                      }
241    
242                      if (! Database.isOpen()) {
243                                    CrimsonUtils.printError("Must open a database before loading a tree.");
244                                    return null;
245                      }
246    
247                      // shouldn't need to test both database and treePool, but
248                      // better to be careful
249                      if (Trees.dbContains(treeID) || ObjectHandles.containsTree(treeID)) {
250                                    CrimsonUtils.printError("Tree already exists. Must choose different tree ID: " + treeID);
251                                    return null;
252                      }
253                      if (Partitions.dbContains(partitionID) || ObjectHandles.containsPartition(partitionID)) {
254                                    CrimsonUtils.printError("Partition already exists. Must choose different partition ID:" + partitionID);
255                                    return null;
256                      }
257    
258                      // we need this to be all uppercase and don't want to do this
259                      // conversion every time we access the database
260                      partitionID = partitionID.toUpperCase();
261    
262                      try {
263                                    BufferedReader bReader = new BufferedReader(new FileReader(file));
264                      
265                                    // flag for reading data blocks: 1 = data, 2 = crimson
266                                    int blockType = 0;
267                                    boolean readingData = false;
268    
269                                    // flag whether the partition needs to be created
270                                    boolean createPartition = true;
271    
272                                    // flag whether we're processing the first data block.  If
273                                    // so, then we'll need to add the partition/species info
274                                    // to the PART_DATA table
275                                    boolean firstDataBlock = true;
276                                    
277                                    String line;
278                                    while ((line = bReader.readLine()) != null) {
279                                             // remove any leading white space
280                                             line = line.trim();
281    
282                                             // --- SKIP BLANK LINES ---
283                                             if (CrimsonUtils.isEmpty(line)) continue;
284    
285                                             // --- SKIP COMMENTS ---
286                                             if ((line.startsWith("#")) || (line.startsWith("["))) continue;
287    
288                                             // --- END READING DATA ---
289                                             if (line.startsWith(";")) {
290                                                      // if not reading data then we just finished the
291                                                      // TAXA block
292                                                      if (readingData) {
293                                                                    // no longer reading data
294                                                                    readingData = false;
295                                                                    // no longer processing the first data block
296                                                                    firstDataBlock = false;
297    
298                                                                    if (blockType == 1) CrimsonUtils.printMsg("Finished loading DATA block.");
299                                                                    else if (blockType == 2) CrimsonUtils.printMsg("Finished loading CRIMSON block.");
300                                                      }
301                                                      continue;
302                                             }
303                                            
304                                             // --- LOAD DATA ---
305                                             if (readingData) {
306                                                      // --- PROCESS SPECIES ID ---
307                                                      // expect ' ' to be delimiter between IDs and data
308                                                      int index = line.indexOf(" ");
309                                                      if (index == -1) {
310                                                                    // didn't find a valid delimiter and while not
311                                                                    // supposed to use tabs, lets check just to
312                                                                    // make sure
313                                                                    index = line.indexOf("\t");
314                                                                    if (index == -1) {
315                                                                             // still haven't found delimiter so error
316                                                                             CrimsonUtils.printError("Can't find a valid delimiter (' ') between species ID and data values: " + line);
317                                                                             return null;
318                                                                    }
319                                                      }
320                                                      // get species ID from line
321                                                      String speciesID = line.substring(0, index).toUpperCase();
322                                                      // remove ID from line and extra delimiter characters (ie spaces)
323                                                      line = line.substring(index + 1).trim();
324    
325                                                      // if first data block, then add partition/species
326                                                      // records to PART_DATA
327                                                      if (firstDataBlock) {
328                                                                    // if necessary, insert the partition record into
329                                                                    // the partitions table
330                                                                    if (createPartition) {
331                                                                             // create a new record in the
332                                                                             // PARTITIONS table.  This assumes all
333                                                                             // data lines have the same length.
334                                                                             String sql = "INSERT INTO partitions (id, tree_id, length) ";
335                                                                             sql += "VALUES ('" + partitionID + "', '" + treeID.toUpperCase() + "', " + line.length() + ")";
336                                                                             if (! Database.execUpdate(sql)) {
337                                                                                      CrimsonUtils.printError("Unable to add record to PARTITIONS table.");
338                                                                                      return null;
339                                                                             }
340                                                                             
341                                                                             createPartition = false;
342                                                                             CrimsonUtils.printMsg("Updated PARTITIONS table.");
343                                                                    }
344                                                                    
345                                                                    // create a new record in the PART_DATA table.  Can't add the data 
346                                                                    String sql = "INSERT INTO part_data (partition_id, species_id) VALUES ('" + partitionID + "', '" + speciesID + "')";
347                                                                    if (! Database.execUpdate(sql)) {
348                                                                             CrimsonUtils.printError("Unable to create new record in PART_DATA table.");
349                                                                             return null;
350                                                                    }
351                                                      }                                              
352    
353                                                      // test if reading the last line of data
354                                                      if (line.endsWith(";")) {
355                                                                    // no longer reading data
356                                                                    readingData = false;
357                                                                    // no longer processing the first data block
358                                                                    firstDataBlock = false;
359                                                                    // remove ";" from line before adding line to PART_DATA
360                                                                    line = line.substring(0, line.length() - 1);
361    
362                                                                    if (blockType == 1) CrimsonUtils.printMsg("Finished loading DATA block.");
363                                                                    else if (blockType == 2) CrimsonUtils.printMsg("Finished loading CRIMSON block.");
364                                                      }
365    
366                                                      // add line to PART_DATA
367                                                      switch (blockType) {
368                                                      case 1:   // --- DATA BLOCK ---
369                                                                    // UPDATE the CLOB
370                                                                    if (! Database.writeDataClob(partitionID, speciesID, "sequence", line)) {
371                                                                             CrimsonUtils.printError("Unable to add sequence data to PART_DATA table.");
372                                                                             return null;
373                                                                    }
374                                                                    break;
375                                                      case 2:   // --- CRIMSON BLOCK ---
376                                                                    if (! Database.writeDataClob(partitionID, speciesID, "structure", line)) {
377                                                                             CrimsonUtils.printError("Unable to add structure data to PART_DATA table.");
378                                                                             return null;
379                                                                    }
380                                                                    break;
381    
382                                                      default:
383                                                                    CrimsonUtils.printError("Invalid Nexus file: " + filename);
384                                                                    return null;
385                                                      }
386    
387                                                      continue;
388                                             }
389    
390                                             // --- LOAD TREE (NEWICK) ---
391                                             if (line.toUpperCase().startsWith("TREE")) {
392                                                      // ignore everything before the "="
393                                                      int start = line.indexOf("=") + 1;
394                                                      line = line.substring(start).trim();
395    
396                                                      // remove trailing ';', if present
397                                                      if (line.endsWith(";")) line = line.substring(0, line.length() - 1);
398    
399                                                      // create a new record in the TREES table
400                                                      //                                              String sql = "INSERT INTO trees (id, newick) VALUES ('" + treeID + "', '" + line + "')";
401                                                      String sql = "INSERT INTO trees (id) VALUES ('" + treeID + "')";
402                                                      if (! Database.execUpdate(sql)) {
403                                                                    CrimsonUtils.printError("Unable to create record in TREES table.");
404                                                                    return null;
405                                                      }
406                                                      if (! Database.writeClob("trees", treeID, "newick", line)) {
407                                                                    CrimsonUtils.printError("Unable to update NEWICK field in TREES table.");
408                                                                    return null;
409                                                      }
410                                                      
411                                                      // need to update the tree stats.  to do this we
412                                                      // will build a temporary tree.
413                                                      Tree tree = new Tree(treeID);
414                                                      tree.computeStats();
415                                                      // update the stats in the database
416                                                      sql = "UPDATE trees SET ";
417                                                      sql += "  num_species = " + tree.getNumSpecies();
418                                                      sql += ", num_leaves = " + tree.getNumLeaves();
419                                                      sql += ", is_binary = " + (tree.isBinary() ? "1" : "0");
420                                                      sql += ", is_ultrametric = " + (tree.isUltrametric() ? "1" : "0");
421                                                      sql += ", min_level = " + tree.getMinLevel();
422                                                      sql += ", max_level = "+ tree.getMaxLevel();
423                                                      sql += ", min_stem_length = " + tree.getMinStemLength();
424                                                      sql += ", max_stem_length = " + tree.getMaxStemLength();
425                                                      sql += ", min_temp_depth = " + tree.getMinTempDepth();
426                                                      sql += ", max_temp_depth = " + tree.getMaxTempDepth();
427                                                      sql += "where id = '" + treeID.toUpperCase() + "'";
428                                                      if (! Database.execUpdate(sql)) {
429                                                                    CrimsonUtils.printError("Unable to set tree stats.");
430                                                                    return null;
431                                                      }
432                                                      // clear out the tree structure and run the
433                                                      // garbage collector. If the user needs the tree
434                                                      // structure, they can manually run
435                                                      // tree.buildTree().
436                                                      tree.clearStructure();
437    
438                                                      CrimsonUtils.printMsg("Updated TREES table.");
439                                                      continue;
440                                             }
441                                             
442                                             // --- ENTERING DATA BLOCK ---
443                                             if ((line.toUpperCase().contains("DATA")) || (line.toUpperCase().contains("CHARACTERS"))) {
444                                                      CrimsonUtils.printMsg("Loading DATA block.");
445                                                      blockType = 1;
446                                                      continue;
447                                             }
448                                             
449                                             // --- ENTERING CRIMSON BLOCK ---
450                                             if (line.toUpperCase().contains("CRIMSON")) {
451                                                      CrimsonUtils.printMsg("Loading CRIMSON block.");
452                                                      blockType = 2;
453                                                      continue;
454                                             }
455                                             
456                                             // --- BEGIN READING DATA ---
457                                             if (line.toUpperCase().contains("MATRIX")) {
458                                                      readingData = true;
459                                                      continue;
460                                             }
461                                    }
462    
463                                    bReader.close();
464                      } catch (FileNotFoundException e) {
465                                    CrimsonUtils.printError("File not found: " + e.getMessage());
466                                    CrimsonUtils.printError(e.getMessage());
467                                    return null;
468                      } catch (IOException e) {
469                                    CrimsonUtils.printError("Error reading file: " + filename);
470                                    CrimsonUtils.printError(e.getMessage());
471                                    return null;
472                      }
473    
474                      // if Oracle, then we want to make sure we do a commit
475                      if (SQL.isOracle()) Database.commit();
476    
477                      // rebuild table lists
478                      ObjectHandles.buildLists();
479    
480                      // we could return the tree created above, but this seems to
481                      // be safer
482                      return ObjectHandles.getTree(treeID);
483             }
484    
485             /**
486              * Appends a Nexus file into the current database.  This will not
487              * append the tree if the tree don't exist or any of the data
488              * files are missing.
489              */
490             public static Tree append(String filename, String treeID, String partitionID) {
491                      if (CrimsonUtils.isEmpty(filename)) {
492                                    CrimsonUtils.printError("No NEXUS file specified.");
493                                    return null;
494                      }
495                      File file = new File(filename);
496                      if ((file == null) || (! file.exists()) || (! file.isFile())) {
497                                    CrimsonUtils.printError("Invalid NEXUS file: " + filename);
498                                    return null;
499                      }
500                      
501                      if (CrimsonUtils.isEmpty(treeID)) {
502                                    CrimsonUtils.printError("No tree specified.");
503                                    return null;
504                      }
505                      if (CrimsonUtils.isEmpty(partitionID)) {
506                                    CrimsonUtils.printError("No partition specified.");
507                                    return null;
508                      }
509    
510                      if (! Database.isOpen()) {
511                                    CrimsonUtils.printError("Must open a database before appending data to a tree.");
512                                    return null;
513                      }
514    
515                      if (! ObjectHandles.containsTree(treeID)) {
516                                    CrimsonUtils.printError("Tree doesn't exists. Must load the tree before can append partitions.");
517                                    return null;
518                      }
519    
520                      // shouldn't need to test both database and partitionPool, but
521                      // it's better to be careful
522                      if (Partitions.dbContains(partitionID) || ObjectHandles.containsPartition(partitionID)) {
523                                    CrimsonUtils.printError("Partition already exists. Must choose different partition ID.");
524                                    return null;
525                      }
526    
527                      // we need this to be all uppercase and don't want to do this
528                      // conversion every time we access the database
529                      partitionID = partitionID.toUpperCase();
530    
531                      try {
532                                    BufferedReader bReader = new BufferedReader(new FileReader(file));
533                      
534                                    // flag for reading data blocks: 1 = data, 2 = crimson
535                                    int blockType = 0;
536                                    boolean readingData = false;
537    
538                                    // flag whether the partition needs to be created
539                                    boolean createPartition = true;
540    
541                                    // flag whether we're processing the first data block.  If
542                                    // so, then we'll need to add the partition/species info
543                                    // to the PART_DATA table
544                                    boolean firstDataBlock = true;
545                                    
546                                    String line;
547                                    while ((line = bReader.readLine()) != null) {
548                                             // remove any leading white space
549                                             line = line.trim();
550    
551                                             // --- SKIP BLANK LINES ---
552                                             if (CrimsonUtils.isEmpty(line)) continue;
553    
554                                             // --- SKIP COMMENTS ---
555                                             if ((line.startsWith("#")) || (line.startsWith("["))) continue;
556    
557                                             // --- END READING DATA ---
558                                             if (line.startsWith(";")) {
559                                                      // if not reading data then we just finished the
560                                                      // TAXA block
561                                                      if (readingData) {
562                                                                    // no longer reading data
563                                                                    readingData = false;
564                                                                    // no longer processing the first data block
565                                                                    firstDataBlock = false;
566                                                                    
567                                                                    if (blockType == 1) CrimsonUtils.printMsg("Finished loading DATA block.");
568                                                                    else if (blockType == 2) CrimsonUtils.printMsg("Finished loading CRIMSON block.");
569                                                      }
570                                                      continue;
571                                             }
572                                            
573                                             // --- LOAD DATA ---
574                                             if (readingData) {
575                                                      // --- PROCESS SPECIES ID ---
576                                                      // expect ' ' to be delimiter between IDs and data
577                                                      int index = line.indexOf(" ");
578                                                      if (index == -1) {
579                                                                    // didn't find a valid delimiter and while not
580                                                                    // supposed to use tabs, lets check just to
581                                                                    // make sure
582                                                                    index = line.indexOf("\t");
583                                                                    if (index == -1) {
584                                                                             // still haven't found delimiter so error
585                                                                             CrimsonUtils.printError("Can't find a valid delimiter (' ') between species ID and data values: " + line);
586                                                                             return null;
587                                                                    }
588                                                      }
589                                                      // get species ID from line
590                                                      String speciesID = line.substring(0, index).toUpperCase();
591                                                      // remove ID from line and extra delimiter characters (ie spaces)
592                                                      line = line.substring(index + 1).trim();
593    
594                                                      // if first data block, then add partition/species
595                                                      // records to PART_DATA
596                                                      if (firstDataBlock) {
597                                                                    // if necessary, insert the partition record into
598                                                                    // the partitions table
599                                                                    if (createPartition) {
600                                                                             // create a new record in the
601                                                                             // PARTITIONS table.  This assumes all
602                                                                             // data lines have the same length.
603                                                                             String sql = "INSERT INTO partitions (id, tree_id, length) ";
604                                                                             sql += "VALUES ('" + partitionID + "', '" + treeID.toUpperCase() + "', " + line.length() + ")";
605                                                                             if (! Database.execUpdate(sql)) {
606                                                                                      CrimsonUtils.printError("Unable to add record to PARTITIONS table.");
607                                                                                      return null;
608                                                                             }
609                                                                             
610                                                                             createPartition = false;
611                                                                             CrimsonUtils.printMsg("Updated PARTITIONS table.");
612                                                                    }
613                                                                    
614                                                                    // create a new record in the PART_DATA table.  Can't add the data 
615                                                                    String sql = "INSERT INTO part_data (partition_id, species_id) VALUES ('" + partitionID + "', '" + speciesID + "')";
616                                                                    if (! Database.execUpdate(sql)) {
617                                                                             CrimsonUtils.printError("Unable to create new record in PART_DATA table.");
618                                                                             return null;
619                                                                    }
620                                                      }                                              
621    
622                                                      // test if reading the last line of data
623                                                      if (line.endsWith(";")) {
624                                                                    // no longer reading data
625                                                                    readingData = false;
626                                                                    // no longer processing the first data block
627                                                                    firstDataBlock = false;
628                                                                    // remove ";" from line before adding line to PART_DATA
629                                                                    line = line.substring(0, line.length() - 1);
630    
631                                                                    if (blockType == 1) CrimsonUtils.printMsg("Finished loading DATA block.");
632                                                                    else if (blockType == 2) CrimsonUtils.printMsg("Finished loading CRIMSON block.");
633                                                      }
634    
635                                                      // add line to PART_DATA
636                                                      switch (blockType) {
637                                                      case 1:   // --- DATA BLOCK ---
638                                                                    // UPDATE the CLOB
639                                                                    if (! Database.writeDataClob(partitionID, speciesID, "sequence", line)) {
640                                                                             CrimsonUtils.printError("Unable to add sequence data to PART_DATA table.");
641                                                                             return null;
642                                                                    }
643                                                                    break;
644                                                      case 2:   // --- CRIMSON BLOCK ---
645                                                                    if (! Database.writeDataClob(partitionID, speciesID, "structure", line)) {
646                                                                             CrimsonUtils.printError("Unable to add structure data to PART_DATA table.");
647                                                                             return null;
648                                                                    }
649                                                                    break;
650    
651                                                      default:
652                                                                    CrimsonUtils.printError("Invalid Nexus file: " + filename);
653                                                                    return null;
654                                                      }
655    
656                                                      continue;
657                                             }
658    
659                                             // --- SKIP TREE (NEWICK) ---
660                                             if (line.toUpperCase().startsWith("TREE")) continue;
661                                             
662                                             // --- ENTERING DATA BLOCK ---
663                                             if ((line.toUpperCase().contains("DATA")) || (line.toUpperCase().contains("CHARACTERS"))) {
664                                                      blockType = 1;
665                                                      continue;
666                                             }
667                                             
668                                             // --- ENTERING CRIMSON BLOCK ---
669                                             if (line.toUpperCase().contains("CRIMSON")) {
670                                                      blockType = 2;
671                                                      continue;
672                                             }
673                                             
674                                             // --- BEGIN READING DATA ---
675                                             if (line.toUpperCase().contains("MATRIX")) {
676                                                      readingData = true;
677                                                      continue;
678                                             }
679                                    }
680    
681                                    bReader.close();
682                      } catch (FileNotFoundException e) {
683                                    CrimsonUtils.printError("File not found: " + e.getMessage());
684                                    CrimsonUtils.printError(e.getMessage());
685                                    return null;
686                      } catch (IOException e) {
687                                    CrimsonUtils.printError("Error reading file: " + filename);
688                                    CrimsonUtils.printError(e.getMessage());
689                                    return null;
690                      }
691    
692                      // if Oracle, then we want to make sure we do a commit
693                      //              if (SQL.isOracle()) Database.commit();
694    
695                      // rebuild table lists
696                      ObjectHandles.buildLists();
697    
698                      // we could return the tree created above, but this seems to
699                      // be safer
700                      return ObjectHandles.getTree(treeID);
701             }
702    
703             /** From Mark Holder */
704             public static String doQuote(final String text)
705             {
706                      // replace any single quotes with a pair of single quotes.
707                      final String tmp = text.replaceAll("'", "''");
708                      // place the string within quotes.
709                      final StringBuffer sb = new StringBuffer(tmp.length() + 2);
710                      sb.append('\'');
711                      sb.append(tmp);
712                      sb.append('\'');
713                      return sb.toString();
714             }
715             
716             /** 
717              * From Mark Holder 
718              *
719              * This will properly quote a word that is to be included in a
720              * NEXUS file.
721              */
722             public static String quote(final String text)
723             {
724                      final int len = text.length();
725                      if (len > 1)
726                                    {
727                                             if (multiCharStringNeedsQuotes(text))
728                                                      {
729                                                                    return doQuote(text);
730                                                      }
731                                    }
732                      else if (shortStringNeedsQuotes(text))
733                                    {
734                                             return doQuote(text);
735                                    }
736                      return text;
737             }
738             
739             /** 
740              * From Mark Holder 
741              *
742              * This will properly quote a word that is to be included in a
743              * NEXUS file.
744              */
745             public static String[] quote(final String[] text)
746             {
747                      for (int i = 0; i < text.length; i++)
748                                    {
749                                             text[i] = quote(text[i]);
750                                    }
751                      return text;
752             }
753             
754             /*
755              * From Mark Holder
756              *
757              * charactens in NEXUS that must be quoted when they occur in strings are
758              * any of the following (){}[]/\,;:=*'"`+-<>_ and whitespace. I built the
759              * re by putting the set of characters above between double quotes and
760              * inserting \\ before each character. Except that you have to handle \ and "
761              * specially since they have special meaning when writing java string
762              * literals. To get a \ in a literal you need to use \\, so to escape it
763              * from both the literal mechanism and the re mechanism to match '\' you
764              * must use \\\\. To escape " from the string literal mechanism you must
765              * precede it with \. So "\"" is a string containing a quote. To escape the
766              * quote to the re mechanism you need to send it \", which you write as
767              * "\\\"". I surrounded that with [] to indicate "match any of these
768              * characters". I added [\s] inside the []s to create the union of those
769              * chars and all white space chars. I preceeded and followed that with .* to
770              * indicate: 0 or more instances of anything followed by one of the
771              * specified chars followed by 0 or more instances of anything. How's that
772              * for readable code? mth added the _ character and changed the name from
773              * tokenBreakers. The underscore needs to be quoted but is not a token
774              * breaker. Also changed the logic of "quote" so that single characters will
775              * not be quote unless they need it.
776              */
777             private static final String regexPat = ".*[\\(\\)\\{\\}\\[\\]\\/\\\\\\,\\;\\:\\=\\*\\'\\\"\\`\\+\\-\\<\\>\\_[\\s]].*";
778             /** From Mark Holder */
779             private static final Pattern needsQuotesPattern = Pattern.compile(regexPat);
780             
781             /** From Mark Holder */
782             private static boolean multiCharStringNeedsQuotes(final String text)
783             {
784                      return needsQuotesPattern.matcher(text).matches();
785             }
786             
787             /** From Mark Holder */
788             private static final Pattern shortStrNeedsQuotePattern = Pattern.compile("^[\\[\\]\\'\\_[\\s]]?$");
789             
790             /**
791              * From Mark Holder
792              *
793              * handles strings of len 1 or 0 only. The only single punctuation
794              * characters that need quoting are []'_ Note that an empty string
795              * and whitespac. must be quoted too.
796              * @param text
797              * @return true for "", "[", "
798              */
799             private static boolean shortStringNeedsQuotes(final String text)
800             {
801                      return shortStrNeedsQuotePattern.matcher(text).matches();
802             }
803    } // NexusFile.java