001 /* 002 * Copyright 2007, 2012 Stephen Fisher and Junhyong Kim, University of 003 * Pennsylvania. 004 * 005 * This file is part of Glo-DB. 006 * 007 * Glo-DB is free software: you can redistribute it and/or modify it 008 * under the terms of the GNU General Public License as published by 009 * the Free Software Foundation, either version 3 of the License, or 010 * (at your option) any later version. 011 * 012 * Glo-DB is distributed in the hope that it will be useful, but 013 * WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * along with Glo-DB. If not, see <http://www.gnu.org/licenses/>. 019 * 020 * @(#)ParserUtils.java 021 */ 022 023 package edu.upenn.gloDB.parser; 024 025 import edu.upenn.gloDB.*; 026 import java.util.HashMap; 027 import java.util.TreeSet; 028 import java.util.ArrayList; 029 import java.util.Iterator; 030 import java.io.StringReader; 031 032 /** 033 * Methods to process the Operations ArrayList produced by the parser. 034 * 035 * @author Stephen Fisher 036 * @version $Id: ParserUtils.java,v 1.46.2.21 2007/03/01 21:17:33 fisher Exp $ 037 */ 038 039 public class ParserUtils { 040 041 public static boolean debug = false; 042 public static boolean debugSolveAll = false; 043 public static boolean debugSolveOps = false; 044 045 //-------------------------------------------------------------------------- 046 // Miscellaneous Methods 047 048 /** 049 * This will parse the input string to create the operations 050 * array, then compute the operations and assign the results to 051 * the Track indicated. If the Track already exists, it will be 052 * overwritten. 053 * @XXX For now, trackPool and sequencePool are used as the Track 054 * and Sequence sets. These should be options that the user can 055 * set within the parse string. Within the parser they are used 056 * to determine if a Track or Sequence object is valid and are 057 * used as the set of all Tracks for the "__T" parser option. 058 */ 059 public static Track compute(String parse) { 060 Parser parser = new Parser(new StringReader(parse)); 061 ArrayList ops; 062 063 try { 064 // create the array of operations 065 ops = parser.run(ObjectHandles.trackPool, ObjectHandles.sequencePool); 066 } catch (ParseException e) { 067 if (e.getMessage() == null) { 068 GloDBUtils.printError("Invalid expression. String can not be solved."); 069 } else { 070 GloDBUtils.printError("Invalid expression. " + e.getMessage()); 071 } 072 return null; 073 } catch (TokenMgrError e) { 074 if (e.getMessage() == null) { 075 GloDBUtils.printError("Invalid expression. String can not be solved."); 076 } else { 077 GloDBUtils.printError("Invalid expression. " + e.getMessage()); 078 } 079 return null; 080 } 081 082 // not sure if this will ever happen 083 if (ops == null) return null; 084 085 // get the id for the assignment track 086 String id = parser.getId(); 087 088 Track track; 089 if (ObjectHandles.trackPool.containsKey(id)) { 090 // warn user if 'id' is being overwritten 091 GloDBUtils.printMsg("Track \"" + id + "\" already existed and has been overwritten.", 092 GloDBUtils.WARNING); 093 094 // get the existing Track object and erase its attributes 095 track = ObjectHandles.getTrack(id); 096 track.erase(); 097 098 // XXX this is inefficient but will retain any links to 099 // the existing Track that the user might already have 100 track.setFeatures(solveOpsRecurse(ops).getFeatures()); 101 } else { 102 track = solveOpsRecurse(ops); 103 104 // add output Track to set of all Track 105 ObjectHandles.addTrack(track); 106 107 // need to set the Track ID. This will also add the Track 108 // to ObjectHandles.trackPool 109 try { track.setID(id); } 110 catch (InvalidIDException e) { 111 track.setID(Track.randomID("_" + id + "_")); 112 } 113 } 114 115 return track; 116 } 117 118 /** 119 * This will run solveOpsRecurse() which will recursively solve 120 * the Operations in the ArrayList 'ops'. This wrapper will make 121 * sure the output Track is added to the trackPool. The temporary 122 * Tracks created along the way are not added to the trackPool. 123 */ 124 public static Track solveOps(ArrayList ops) { 125 return solveOps(ops, ""); 126 } 127 128 /** 129 * This will run solveOpsRecurse() which will recursively solve 130 * the Operations in the ArrayList 'ops'. This wrapper will make 131 * sure the output Track is added to the trackPool. The temporary 132 * Tracks created along the way are not added to the trackPool. 133 * The 'id' will be used as the output Track ID. 134 */ 135 public static Track solveOps(ArrayList ops, String id) { 136 if (ops == null) return null; 137 138 // get output as a Track 139 Track out = solveOpsRecurse(ops); 140 141 // add output Track to trackPool. if ID already exists, then 142 // add a random tag to the ID. if ID is blank then add the 143 // Track to trackPool and don't change the ID. 144 if (id == "") { 145 // add output Track to set of all Track 146 ObjectHandles.addTrack(out); 147 } else { 148 try { out.setID(id); } 149 catch (InvalidIDException e) { 150 out.setID(Track.randomID("_" + id + "_")); 151 } 152 } 153 154 return out; 155 } 156 157 /** 158 * This will solve the Operations in the ArrayList 'ops', calling 159 * itself to recursively resolve groups of Operations. 160 */ 161 private static Track solveOpsRecurse(ArrayList ops) { 162 if (ops == null) return null; 163 164 // the output set of Tracks, initialized to the left hand side 165 // of the Operator or set of Operators. 166 Operation operation = getOperation((Operation) ops.get(0)); 167 168 // start out with a copy of the initial Track. This will 169 // serve as the left side of the next Operation. 170 171 // XXX by cloning the Track, we don't change the original 172 // Operation, allowing this array of Operations to be solved 173 // again. Since the Operation already has a cloned Track, 174 // the usefullness of this may not outway the computational 175 // cost. 176 Track out = (Track) operation.track.cloneTrack(false); 177 178 // loop through all Operations 179 for (int i = 1; i < ops.size(); i++) { 180 operation = getOperation((Operation) ops.get(i)); 181 182 if (debugSolveOps) System.out.println("Operation : " + i); 183 if (debugSolveOps) System.out.println("Op type: " + operation.getType()); 184 185 out = Operator.processOperation(out, operation); 186 187 if (debugSolveOps) System.out.println(out); 188 } 189 190 return out; 191 } 192 193 /** 194 * For Operations that contain groups, this will solve the group 195 * and store the resulting TreeSet of Tracks in the Operation. 196 * This will be called recursively if there are nested groups. 197 */ 198 private static Operation getOperation(Operation operation) { 199 // if (operation.track == null) { 200 if (operation.isGroup()) { 201 if (debug) System.out.println("** FOUND NESTED GROUP **"); 202 203 // 'solved' will contain ALL matches and EACH match will 204 // be a Track, the collection of Tracks is the new 205 // value of "operation.track". These Tracks are not 206 // in ObjectHandle.trackPool. 207 operation.track = solveOpsRecurse(operation.getGroup()); 208 if (operation.track != null) { // add the set of Tracks to the Operation. 209 // perform length and seqPos filters 210 // operation.filterOnLength(); 211 operation.filterOnLength(); 212 operation.filterOnSeqPos(); 213 operation.filterOnRepeat(); 214 215 // if negate is true, then binary invert the Features 216 if (operation.isNegate()) { 217 operation.track = negate(operation.track); 218 } 219 } else { 220 // no match so set tracks to an empty Track, instead 221 // of null. This way we won't try to solve the group 222 // again. 223 operation.track = new Track(false); 224 } 225 if (debug) System.out.println("matched nested tracks: " + operation.track.getFeatures()); 226 if (debug) System.out.println("** FINISHED PROCESSING NESTED GROUP **\n"); 227 } else { 228 // if negate is true, then binary invert the Features 229 if (operation.isNegate()) { 230 operation.track = negate(operation.track); 231 } 232 } 233 234 // since this is a new (untested) Operation, need to reset the 235 // Operation's Track iterator. 236 operation.resetTrack(); 237 238 return operation; 239 } 240 241 /** 242 * This will perform a 'binary' inversion of the Features in the 243 * Track. 244 */ 245 private static Track negate(Track track) { 246 Track out = new Track(false); 247 248 // can't invert a Track that doesn't exist 249 if (track.numFeatures() == 0) return out; 250 251 // merge all overlapping Features. 252 track.mergeContiguous(); 253 HashMap sources = track.getSources(); 254 255 // step through each Sequence 256 for (Iterator s = sources.keySet().iterator(); s.hasNext();) { 257 String source = (String) s.next(); 258 Sequence sourceObj = (Sequence) ObjectHandles.sequencePool.get(source); 259 260 int sourceMax = sourceObj.getMax() - 1; 261 262 // invert Features for this source 263 TreeSet features = (TreeSet) sources.get(source); 264 Iterator i = features.iterator(); 265 266 // get first Feature to deal with case when first 267 // Feature doesn't start at 0 268 if (! i.hasNext()) continue; 269 Feature feature = (Feature) i.next(); 270 271 int minFeature = feature.getMin(); 272 if (minFeature > 0) { 273 // deal with first Feature not starting at 0 274 // out.addFeature(new ExactFeature(0, minFeature, sourceObj)); 275 out.addFeature(new ExactFeature(0, minFeature - 1, sourceObj)); 276 } 277 278 // int minOut = feature.getMax(); 279 int minOut = feature.getMax() + 1; 280 while (i.hasNext()) { 281 feature = (Feature) i.next(); 282 // minFeature = feature.getMin(); 283 minFeature = feature.getMin() - 1; 284 out.addFeature(new ExactFeature(minOut, minFeature, sourceObj)); 285 // minOut = feature.getMax(); 286 int fMax = feature.getMax() + 1; 287 if (fMax > minOut) { 288 minOut = fMax; 289 if ((fMax - 2) >= sourceMax) { 290 GloDBUtils.printError("Feature position exceeds sequence length"); 291 return new Track(false); 292 } 293 } 294 } 295 296 if (minOut <= sourceMax) { 297 out.addFeature(new ExactFeature(minOut, sourceMax, sourceObj)); 298 } 299 } 300 301 return out; 302 } 303 } // ParserUtils.java