001 /* 002 * CRIMSON 003 * Copyright (c) 2006, Stephen Fisher, Susan Davidson, and Junhyong Kim, 004 * University of Pennsylvania. 005 * 006 * This program is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU General Public License as 008 * published by the Free Software Foundation; either version 2 of the 009 * License, or (at your option) any later version. 010 * 011 * This program is distributed in the hope that it will be useful, but 012 * WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 019 * 02110-1301 USA. 020 * 021 * @(#)NexusFile.java 022 */ 023 024 package edu.upenn.crimson.io; 025 026 import edu.upenn.crimson.*; 027 import java.util.ArrayList; 028 import java.util.Iterator; 029 import java.util.regex.Pattern; 030 import java.io.*; 031 import java.sql.Clob; 032 import java.sql.SQLException; 033 034 /** 035 * Functions related to loading NEXUS files. 036 * 037 * @XXX Should see if can optimize db access by maintaining one 038 * Statement that is created every time a database connection is made. 039 * 040 * @author Stephen Fisher 041 * @version $Id: NexusFile.java,v 1.54 2007/06/14 19:48:42 fisher Exp $ 042 */ 043 044 public class NexusFile { 045 //-------------------------------------------------------------------------- 046 // Miscellaneous Methods 047 048 /** 049 * Loads a tree structure from a Nexus file into memory. This is 050 * meant to be used for debugging. 051 */ 052 public static Tree loadNewick(String filename) { 053 if (CrimsonUtils.isEmpty(filename)) return null; 054 File file = new File(filename); 055 if ((! file.exists()) || (! file.isFile())) { 056 CrimsonUtils.printError("Invalid NEXUS file: " + filename); 057 return null; 058 } 059 060 try { 061 BufferedReader bReader = new BufferedReader(new FileReader(file)); 062 063 String line; 064 Tree tree = null; 065 while ((line = bReader.readLine()) != null) { 066 // remove any leading white space 067 line = line.trim(); 068 069 // skip comment lines 070 if (line.startsWith("tree") || line.startsWith("TREE")) { 071 int start = line.indexOf("=") + 1; 072 line = line.substring(start); 073 074 // use filename as tree's ID 075 tree = parseNewick(new Tree(file.getName(), false), line); 076 077 break; 078 } 079 } 080 081 bReader.close(); 082 083 return tree; 084 } catch (FileNotFoundException e) { 085 CrimsonUtils.printError("File not found: " + e.getMessage()); 086 CrimsonUtils.printError(e.getMessage()); 087 return null; 088 } catch (IOException e) { 089 CrimsonUtils.printError("Error reading file: " + filename); 090 CrimsonUtils.printError(e.getMessage()); 091 return null; 092 } 093 } 094 095 /** Keep incrementing until we find the end of the number or label. */ 096 private static int findLabelEnd(String newick, int start) { 097 int end = start; 098 int lenNewick = newick.length(); 099 char c = newick.charAt(++end); 100 while ((c != ',') && (c != ')') && (c != '(') && (c != ' ') && (c != ';') && (c != ':') && (end < lenNewick-1)) { 101 c = newick.charAt(++end); 102 } 103 return end; 104 } 105 106 /** 107 * This might be faster if we use char[] instead of String for 108 * "newick". This will break if there are blank spaces (' ') in 109 * the tree structure. 110 */ 111 public static Tree parseNewick(Tree tree) { 112 return parseNewick(tree, tree.getNewick()); 113 } 114 115 /** 116 * This might be faster if we use char[] instead of String for 117 * "newick". This will break if there are blank spaces (' ') in 118 * the tree structure. 119 */ 120 public static Tree parseNewick(Tree tree, String newick) { 121 // make sure there isn't already a tree 122 tree.setRoot(null); 123 124 Species curSpecies = null; 125 Species curParent = null; 126 int id = 0; 127 128 int lenNewick = newick.length() - 1; 129 130 /* check if parenthesis is matching */ 131 int cnt = 0; 132 for (int i = 0; i < lenNewick; i++){ 133 if (newick.charAt(i) == '(') cnt++; 134 else if (newick.charAt(i) == ')') cnt--; 135 } 136 if (cnt != 0){ 137 CrimsonUtils.printError("Unbalanced parenthesis in the input tree string"); 138 return null; 139 } 140 141 int head = 0; 142 int headSave = -1; 143 while (head < lenNewick) { 144 char c = newick.charAt(head); 145 146 if (c == ' ') { // skip blank spaces 147 head++; 148 continue; 149 } 150 151 if (c == '(') { // new species 152 // ID will be changed below, when we read the ID value 153 Species newSpecies = new Species("T-" + String.valueOf(id++), tree); 154 155 if (tree.getRoot() == null) { // first node is root 156 tree.setRoot(newSpecies); 157 curParent = null; 158 } else { 159 if (newick.charAt(head - 1) == '(') { // first child 160 curSpecies.addChild(newSpecies); 161 } else { 162 curParent.addChild(newSpecies); 163 } 164 } 165 166 curSpecies = newSpecies; 167 head++; 168 continue; 169 } 170 171 if (c == ')') { // end of child list 172 if (curSpecies == null) { 173 CrimsonUtils.printError("Unbalanced nodes in tree."); 174 return null; 175 } 176 177 curSpecies = curSpecies.getParent(); // up one level 178 curParent = curSpecies.getParent(); 179 head++; 180 continue; 181 } 182 183 if (c == ':') { // get stem length 184 headSave = head; 185 head = findLabelEnd(newick, head + 1); 186 curSpecies.setStemLength(Double.parseDouble(newick.substring(headSave + 1, head))); 187 continue; 188 } 189 190 if (c == ';') { // end of tree string 191 return tree; 192 } 193 194 if (c == ',') { // next child 195 head++; 196 continue; 197 } 198 199 // found label 200 if ((head == 0) || (newick.charAt(head - 1) != ')')) { // leaf 201 headSave = head; 202 head = findLabelEnd(newick, head); 203 Species newSpecies = new Species(newick.substring(headSave, head), tree); 204 205 if (newick.charAt(headSave - 1) == '(') { // first child 206 curParent = curSpecies; 207 } 208 curParent.addChild(newSpecies); 209 curSpecies = newSpecies; 210 211 } else { // label for an inner node 212 headSave = head; 213 head = findLabelEnd(newick, head); 214 curSpecies.setID(newick.substring(headSave, head)); 215 } 216 } 217 218 return tree; 219 } 220 221 /** 222 * Loads a Nexus file. This will not load the tree if the tree 223 * already exist in the database. 224 */ 225 public static Tree load(String filename, String treeID, String partitionID) { 226 if (CrimsonUtils.isEmpty(filename)) return null; 227 File file = new File(filename); 228 if ((file == null) || (! file.exists()) || (! file.isFile())) { 229 CrimsonUtils.printError("Invalid NEXUS file: " + filename); 230 return null; 231 } 232 233 if (CrimsonUtils.isEmpty(treeID)) { 234 CrimsonUtils.printError("No tree specified."); 235 return null; 236 } 237 if (CrimsonUtils.isEmpty(partitionID)) { 238 CrimsonUtils.printError("No partition specified."); 239 return null; 240 } 241 242 if (! Database.isOpen()) { 243 CrimsonUtils.printError("Must open a database before loading a tree."); 244 return null; 245 } 246 247 // shouldn't need to test both database and treePool, but 248 // better to be careful 249 if (Trees.dbContains(treeID) || ObjectHandles.containsTree(treeID)) { 250 CrimsonUtils.printError("Tree already exists. Must choose different tree ID: " + treeID); 251 return null; 252 } 253 if (Partitions.dbContains(partitionID) || ObjectHandles.containsPartition(partitionID)) { 254 CrimsonUtils.printError("Partition already exists. Must choose different partition ID:" + partitionID); 255 return null; 256 } 257 258 // we need this to be all uppercase and don't want to do this 259 // conversion every time we access the database 260 partitionID = partitionID.toUpperCase(); 261 262 try { 263 BufferedReader bReader = new BufferedReader(new FileReader(file)); 264 265 // flag for reading data blocks: 1 = data, 2 = crimson 266 int blockType = 0; 267 boolean readingData = false; 268 269 // flag whether the partition needs to be created 270 boolean createPartition = true; 271 272 // flag whether we're processing the first data block. If 273 // so, then we'll need to add the partition/species info 274 // to the PART_DATA table 275 boolean firstDataBlock = true; 276 277 String line; 278 while ((line = bReader.readLine()) != null) { 279 // remove any leading white space 280 line = line.trim(); 281 282 // --- SKIP BLANK LINES --- 283 if (CrimsonUtils.isEmpty(line)) continue; 284 285 // --- SKIP COMMENTS --- 286 if ((line.startsWith("#")) || (line.startsWith("["))) continue; 287 288 // --- END READING DATA --- 289 if (line.startsWith(";")) { 290 // if not reading data then we just finished the 291 // TAXA block 292 if (readingData) { 293 // no longer reading data 294 readingData = false; 295 // no longer processing the first data block 296 firstDataBlock = false; 297 298 if (blockType == 1) CrimsonUtils.printMsg("Finished loading DATA block."); 299 else if (blockType == 2) CrimsonUtils.printMsg("Finished loading CRIMSON block."); 300 } 301 continue; 302 } 303 304 // --- LOAD DATA --- 305 if (readingData) { 306 // --- PROCESS SPECIES ID --- 307 // expect ' ' to be delimiter between IDs and data 308 int index = line.indexOf(" "); 309 if (index == -1) { 310 // didn't find a valid delimiter and while not 311 // supposed to use tabs, lets check just to 312 // make sure 313 index = line.indexOf("\t"); 314 if (index == -1) { 315 // still haven't found delimiter so error 316 CrimsonUtils.printError("Can't find a valid delimiter (' ') between species ID and data values: " + line); 317 return null; 318 } 319 } 320 // get species ID from line 321 String speciesID = line.substring(0, index).toUpperCase(); 322 // remove ID from line and extra delimiter characters (ie spaces) 323 line = line.substring(index + 1).trim(); 324 325 // if first data block, then add partition/species 326 // records to PART_DATA 327 if (firstDataBlock) { 328 // if necessary, insert the partition record into 329 // the partitions table 330 if (createPartition) { 331 // create a new record in the 332 // PARTITIONS table. This assumes all 333 // data lines have the same length. 334 String sql = "INSERT INTO partitions (id, tree_id, length) "; 335 sql += "VALUES ('" + partitionID + "', '" + treeID.toUpperCase() + "', " + line.length() + ")"; 336 if (! Database.execUpdate(sql)) { 337 CrimsonUtils.printError("Unable to add record to PARTITIONS table."); 338 return null; 339 } 340 341 createPartition = false; 342 CrimsonUtils.printMsg("Updated PARTITIONS table."); 343 } 344 345 // create a new record in the PART_DATA table. Can't add the data 346 String sql = "INSERT INTO part_data (partition_id, species_id) VALUES ('" + partitionID + "', '" + speciesID + "')"; 347 if (! Database.execUpdate(sql)) { 348 CrimsonUtils.printError("Unable to create new record in PART_DATA table."); 349 return null; 350 } 351 } 352 353 // test if reading the last line of data 354 if (line.endsWith(";")) { 355 // no longer reading data 356 readingData = false; 357 // no longer processing the first data block 358 firstDataBlock = false; 359 // remove ";" from line before adding line to PART_DATA 360 line = line.substring(0, line.length() - 1); 361 362 if (blockType == 1) CrimsonUtils.printMsg("Finished loading DATA block."); 363 else if (blockType == 2) CrimsonUtils.printMsg("Finished loading CRIMSON block."); 364 } 365 366 // add line to PART_DATA 367 switch (blockType) { 368 case 1: // --- DATA BLOCK --- 369 // UPDATE the CLOB 370 if (! Database.writeDataClob(partitionID, speciesID, "sequence", line)) { 371 CrimsonUtils.printError("Unable to add sequence data to PART_DATA table."); 372 return null; 373 } 374 break; 375 case 2: // --- CRIMSON BLOCK --- 376 if (! Database.writeDataClob(partitionID, speciesID, "structure", line)) { 377 CrimsonUtils.printError("Unable to add structure data to PART_DATA table."); 378 return null; 379 } 380 break; 381 382 default: 383 CrimsonUtils.printError("Invalid Nexus file: " + filename); 384 return null; 385 } 386 387 continue; 388 } 389 390 // --- LOAD TREE (NEWICK) --- 391 if (line.toUpperCase().startsWith("TREE")) { 392 // ignore everything before the "=" 393 int start = line.indexOf("=") + 1; 394 line = line.substring(start).trim(); 395 396 // remove trailing ';', if present 397 if (line.endsWith(";")) line = line.substring(0, line.length() - 1); 398 399 // create a new record in the TREES table 400 // String sql = "INSERT INTO trees (id, newick) VALUES ('" + treeID + "', '" + line + "')"; 401 String sql = "INSERT INTO trees (id) VALUES ('" + treeID + "')"; 402 if (! Database.execUpdate(sql)) { 403 CrimsonUtils.printError("Unable to create record in TREES table."); 404 return null; 405 } 406 if (! Database.writeClob("trees", treeID, "newick", line)) { 407 CrimsonUtils.printError("Unable to update NEWICK field in TREES table."); 408 return null; 409 } 410 411 // need to update the tree stats. to do this we 412 // will build a temporary tree. 413 Tree tree = new Tree(treeID); 414 tree.computeStats(); 415 // update the stats in the database 416 sql = "UPDATE trees SET "; 417 sql += " num_species = " + tree.getNumSpecies(); 418 sql += ", num_leaves = " + tree.getNumLeaves(); 419 sql += ", is_binary = " + (tree.isBinary() ? "1" : "0"); 420 sql += ", is_ultrametric = " + (tree.isUltrametric() ? "1" : "0"); 421 sql += ", min_level = " + tree.getMinLevel(); 422 sql += ", max_level = "+ tree.getMaxLevel(); 423 sql += ", min_stem_length = " + tree.getMinStemLength(); 424 sql += ", max_stem_length = " + tree.getMaxStemLength(); 425 sql += ", min_temp_depth = " + tree.getMinTempDepth(); 426 sql += ", max_temp_depth = " + tree.getMaxTempDepth(); 427 sql += "where id = '" + treeID.toUpperCase() + "'"; 428 if (! Database.execUpdate(sql)) { 429 CrimsonUtils.printError("Unable to set tree stats."); 430 return null; 431 } 432 // clear out the tree structure and run the 433 // garbage collector. If the user needs the tree 434 // structure, they can manually run 435 // tree.buildTree(). 436 tree.clearStructure(); 437 438 CrimsonUtils.printMsg("Updated TREES table."); 439 continue; 440 } 441 442 // --- ENTERING DATA BLOCK --- 443 if ((line.toUpperCase().contains("DATA")) || (line.toUpperCase().contains("CHARACTERS"))) { 444 CrimsonUtils.printMsg("Loading DATA block."); 445 blockType = 1; 446 continue; 447 } 448 449 // --- ENTERING CRIMSON BLOCK --- 450 if (line.toUpperCase().contains("CRIMSON")) { 451 CrimsonUtils.printMsg("Loading CRIMSON block."); 452 blockType = 2; 453 continue; 454 } 455 456 // --- BEGIN READING DATA --- 457 if (line.toUpperCase().contains("MATRIX")) { 458 readingData = true; 459 continue; 460 } 461 } 462 463 bReader.close(); 464 } catch (FileNotFoundException e) { 465 CrimsonUtils.printError("File not found: " + e.getMessage()); 466 CrimsonUtils.printError(e.getMessage()); 467 return null; 468 } catch (IOException e) { 469 CrimsonUtils.printError("Error reading file: " + filename); 470 CrimsonUtils.printError(e.getMessage()); 471 return null; 472 } 473 474 // if Oracle, then we want to make sure we do a commit 475 if (SQL.isOracle()) Database.commit(); 476 477 // rebuild table lists 478 ObjectHandles.buildLists(); 479 480 // we could return the tree created above, but this seems to 481 // be safer 482 return ObjectHandles.getTree(treeID); 483 } 484 485 /** 486 * Appends a Nexus file into the current database. This will not 487 * append the tree if the tree don't exist or any of the data 488 * files are missing. 489 */ 490 public static Tree append(String filename, String treeID, String partitionID) { 491 if (CrimsonUtils.isEmpty(filename)) { 492 CrimsonUtils.printError("No NEXUS file specified."); 493 return null; 494 } 495 File file = new File(filename); 496 if ((file == null) || (! file.exists()) || (! file.isFile())) { 497 CrimsonUtils.printError("Invalid NEXUS file: " + filename); 498 return null; 499 } 500 501 if (CrimsonUtils.isEmpty(treeID)) { 502 CrimsonUtils.printError("No tree specified."); 503 return null; 504 } 505 if (CrimsonUtils.isEmpty(partitionID)) { 506 CrimsonUtils.printError("No partition specified."); 507 return null; 508 } 509 510 if (! Database.isOpen()) { 511 CrimsonUtils.printError("Must open a database before appending data to a tree."); 512 return null; 513 } 514 515 if (! ObjectHandles.containsTree(treeID)) { 516 CrimsonUtils.printError("Tree doesn't exists. Must load the tree before can append partitions."); 517 return null; 518 } 519 520 // shouldn't need to test both database and partitionPool, but 521 // it's better to be careful 522 if (Partitions.dbContains(partitionID) || ObjectHandles.containsPartition(partitionID)) { 523 CrimsonUtils.printError("Partition already exists. Must choose different partition ID."); 524 return null; 525 } 526 527 // we need this to be all uppercase and don't want to do this 528 // conversion every time we access the database 529 partitionID = partitionID.toUpperCase(); 530 531 try { 532 BufferedReader bReader = new BufferedReader(new FileReader(file)); 533 534 // flag for reading data blocks: 1 = data, 2 = crimson 535 int blockType = 0; 536 boolean readingData = false; 537 538 // flag whether the partition needs to be created 539 boolean createPartition = true; 540 541 // flag whether we're processing the first data block. If 542 // so, then we'll need to add the partition/species info 543 // to the PART_DATA table 544 boolean firstDataBlock = true; 545 546 String line; 547 while ((line = bReader.readLine()) != null) { 548 // remove any leading white space 549 line = line.trim(); 550 551 // --- SKIP BLANK LINES --- 552 if (CrimsonUtils.isEmpty(line)) continue; 553 554 // --- SKIP COMMENTS --- 555 if ((line.startsWith("#")) || (line.startsWith("["))) continue; 556 557 // --- END READING DATA --- 558 if (line.startsWith(";")) { 559 // if not reading data then we just finished the 560 // TAXA block 561 if (readingData) { 562 // no longer reading data 563 readingData = false; 564 // no longer processing the first data block 565 firstDataBlock = false; 566 567 if (blockType == 1) CrimsonUtils.printMsg("Finished loading DATA block."); 568 else if (blockType == 2) CrimsonUtils.printMsg("Finished loading CRIMSON block."); 569 } 570 continue; 571 } 572 573 // --- LOAD DATA --- 574 if (readingData) { 575 // --- PROCESS SPECIES ID --- 576 // expect ' ' to be delimiter between IDs and data 577 int index = line.indexOf(" "); 578 if (index == -1) { 579 // didn't find a valid delimiter and while not 580 // supposed to use tabs, lets check just to 581 // make sure 582 index = line.indexOf("\t"); 583 if (index == -1) { 584 // still haven't found delimiter so error 585 CrimsonUtils.printError("Can't find a valid delimiter (' ') between species ID and data values: " + line); 586 return null; 587 } 588 } 589 // get species ID from line 590 String speciesID = line.substring(0, index).toUpperCase(); 591 // remove ID from line and extra delimiter characters (ie spaces) 592 line = line.substring(index + 1).trim(); 593 594 // if first data block, then add partition/species 595 // records to PART_DATA 596 if (firstDataBlock) { 597 // if necessary, insert the partition record into 598 // the partitions table 599 if (createPartition) { 600 // create a new record in the 601 // PARTITIONS table. This assumes all 602 // data lines have the same length. 603 String sql = "INSERT INTO partitions (id, tree_id, length) "; 604 sql += "VALUES ('" + partitionID + "', '" + treeID.toUpperCase() + "', " + line.length() + ")"; 605 if (! Database.execUpdate(sql)) { 606 CrimsonUtils.printError("Unable to add record to PARTITIONS table."); 607 return null; 608 } 609 610 createPartition = false; 611 CrimsonUtils.printMsg("Updated PARTITIONS table."); 612 } 613 614 // create a new record in the PART_DATA table. Can't add the data 615 String sql = "INSERT INTO part_data (partition_id, species_id) VALUES ('" + partitionID + "', '" + speciesID + "')"; 616 if (! Database.execUpdate(sql)) { 617 CrimsonUtils.printError("Unable to create new record in PART_DATA table."); 618 return null; 619 } 620 } 621 622 // test if reading the last line of data 623 if (line.endsWith(";")) { 624 // no longer reading data 625 readingData = false; 626 // no longer processing the first data block 627 firstDataBlock = false; 628 // remove ";" from line before adding line to PART_DATA 629 line = line.substring(0, line.length() - 1); 630 631 if (blockType == 1) CrimsonUtils.printMsg("Finished loading DATA block."); 632 else if (blockType == 2) CrimsonUtils.printMsg("Finished loading CRIMSON block."); 633 } 634 635 // add line to PART_DATA 636 switch (blockType) { 637 case 1: // --- DATA BLOCK --- 638 // UPDATE the CLOB 639 if (! Database.writeDataClob(partitionID, speciesID, "sequence", line)) { 640 CrimsonUtils.printError("Unable to add sequence data to PART_DATA table."); 641 return null; 642 } 643 break; 644 case 2: // --- CRIMSON BLOCK --- 645 if (! Database.writeDataClob(partitionID, speciesID, "structure", line)) { 646 CrimsonUtils.printError("Unable to add structure data to PART_DATA table."); 647 return null; 648 } 649 break; 650 651 default: 652 CrimsonUtils.printError("Invalid Nexus file: " + filename); 653 return null; 654 } 655 656 continue; 657 } 658 659 // --- SKIP TREE (NEWICK) --- 660 if (line.toUpperCase().startsWith("TREE")) continue; 661 662 // --- ENTERING DATA BLOCK --- 663 if ((line.toUpperCase().contains("DATA")) || (line.toUpperCase().contains("CHARACTERS"))) { 664 blockType = 1; 665 continue; 666 } 667 668 // --- ENTERING CRIMSON BLOCK --- 669 if (line.toUpperCase().contains("CRIMSON")) { 670 blockType = 2; 671 continue; 672 } 673 674 // --- BEGIN READING DATA --- 675 if (line.toUpperCase().contains("MATRIX")) { 676 readingData = true; 677 continue; 678 } 679 } 680 681 bReader.close(); 682 } catch (FileNotFoundException e) { 683 CrimsonUtils.printError("File not found: " + e.getMessage()); 684 CrimsonUtils.printError(e.getMessage()); 685 return null; 686 } catch (IOException e) { 687 CrimsonUtils.printError("Error reading file: " + filename); 688 CrimsonUtils.printError(e.getMessage()); 689 return null; 690 } 691 692 // if Oracle, then we want to make sure we do a commit 693 // if (SQL.isOracle()) Database.commit(); 694 695 // rebuild table lists 696 ObjectHandles.buildLists(); 697 698 // we could return the tree created above, but this seems to 699 // be safer 700 return ObjectHandles.getTree(treeID); 701 } 702 703 /** From Mark Holder */ 704 public static String doQuote(final String text) 705 { 706 // replace any single quotes with a pair of single quotes. 707 final String tmp = text.replaceAll("'", "''"); 708 // place the string within quotes. 709 final StringBuffer sb = new StringBuffer(tmp.length() + 2); 710 sb.append('\''); 711 sb.append(tmp); 712 sb.append('\''); 713 return sb.toString(); 714 } 715 716 /** 717 * From Mark Holder 718 * 719 * This will properly quote a word that is to be included in a 720 * NEXUS file. 721 */ 722 public static String quote(final String text) 723 { 724 final int len = text.length(); 725 if (len > 1) 726 { 727 if (multiCharStringNeedsQuotes(text)) 728 { 729 return doQuote(text); 730 } 731 } 732 else if (shortStringNeedsQuotes(text)) 733 { 734 return doQuote(text); 735 } 736 return text; 737 } 738 739 /** 740 * From Mark Holder 741 * 742 * This will properly quote a word that is to be included in a 743 * NEXUS file. 744 */ 745 public static String[] quote(final String[] text) 746 { 747 for (int i = 0; i < text.length; i++) 748 { 749 text[i] = quote(text[i]); 750 } 751 return text; 752 } 753 754 /* 755 * From Mark Holder 756 * 757 * charactens in NEXUS that must be quoted when they occur in strings are 758 * any of the following (){}[]/\,;:=*'"`+-<>_ and whitespace. I built the 759 * re by putting the set of characters above between double quotes and 760 * inserting \\ before each character. Except that you have to handle \ and " 761 * specially since they have special meaning when writing java string 762 * literals. To get a \ in a literal you need to use \\, so to escape it 763 * from both the literal mechanism and the re mechanism to match '\' you 764 * must use \\\\. To escape " from the string literal mechanism you must 765 * precede it with \. So "\"" is a string containing a quote. To escape the 766 * quote to the re mechanism you need to send it \", which you write as 767 * "\\\"". I surrounded that with [] to indicate "match any of these 768 * characters". I added [\s] inside the []s to create the union of those 769 * chars and all white space chars. I preceeded and followed that with .* to 770 * indicate: 0 or more instances of anything followed by one of the 771 * specified chars followed by 0 or more instances of anything. How's that 772 * for readable code? mth added the _ character and changed the name from 773 * tokenBreakers. The underscore needs to be quoted but is not a token 774 * breaker. Also changed the logic of "quote" so that single characters will 775 * not be quote unless they need it. 776 */ 777 private static final String regexPat = ".*[\\(\\)\\{\\}\\[\\]\\/\\\\\\,\\;\\:\\=\\*\\'\\\"\\`\\+\\-\\<\\>\\_[\\s]].*"; 778 /** From Mark Holder */ 779 private static final Pattern needsQuotesPattern = Pattern.compile(regexPat); 780 781 /** From Mark Holder */ 782 private static boolean multiCharStringNeedsQuotes(final String text) 783 { 784 return needsQuotesPattern.matcher(text).matches(); 785 } 786 787 /** From Mark Holder */ 788 private static final Pattern shortStrNeedsQuotePattern = Pattern.compile("^[\\[\\]\\'\\_[\\s]]?$"); 789 790 /** 791 * From Mark Holder 792 * 793 * handles strings of len 1 or 0 only. The only single punctuation 794 * characters that need quoting are []'_ Note that an empty string 795 * and whitespac. must be quoted too. 796 * @param text 797 * @return true for "", "[", " 798 */ 799 private static boolean shortStringNeedsQuotes(final String text) 800 { 801 return shortStrNeedsQuotePattern.matcher(text).matches(); 802 } 803 } // NexusFile.java