001    /*
002     * Copyright 2007, 2012 Stephen Fisher and Junhyong Kim, University of
003     * Pennsylvania.
004     *
005     * This file is part of Glo-DB.
006     * 
007     * Glo-DB is free software: you can redistribute it and/or modify it
008     * under the terms of the GNU General Public License as published by
009     * the Free Software Foundation, either version 3 of the License, or
010     * (at your option) any later version.
011     * 
012     * Glo-DB is distributed in the hope that it will be useful, but
013     * WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * General Public License for more details.
016     * 
017     * You should have received a copy of the GNU General Public License
018     * along with Glo-DB. If not, see <http://www.gnu.org/licenses/>.
019     *
020     * @(#)ParserUtils.java
021     */
022    
023    package edu.upenn.gloDB.parser;
024    
025    import edu.upenn.gloDB.*;
026    import java.util.HashMap;
027    import java.util.TreeSet;
028    import java.util.ArrayList;
029    import java.util.Iterator;
030    import java.io.StringReader;
031    
032    /**
033     * Methods to process the Operations ArrayList produced by the parser.
034     *
035     * @author  Stephen Fisher
036     * @version $Id: ParserUtils.java,v 1.46.2.21 2007/03/01 21:17:33 fisher Exp $
037     */
038    
039    public class ParserUtils {
040    
041             public static boolean debug = false;
042             public static boolean debugSolveAll = false;
043             public static boolean debugSolveOps = false;
044    
045        //--------------------------------------------------------------------------
046        // Miscellaneous Methods
047    
048             /**
049              * This will parse the input string to create the operations
050              * array, then compute the operations and assign the results to
051              * the Track indicated.  If the Track already exists, it will be
052              * overwritten.
053              * @XXX For now, trackPool and sequencePool are used as the Track
054              * and Sequence sets.  These should be options that the user can
055              * set within the parse string.  Within the parser they are used
056              * to determine if a Track or Sequence object is valid and are
057              * used as the set of all Tracks for the "__T" parser option.
058              */
059             public static Track compute(String parse) {
060                      Parser parser = new Parser(new StringReader(parse));
061                      ArrayList ops;
062    
063                      try {
064                                    // create the array of operations
065                                    ops = parser.run(ObjectHandles.trackPool, ObjectHandles.sequencePool);
066                      } catch (ParseException e) {
067                                    if (e.getMessage() == null) {
068                                             GloDBUtils.printError("Invalid expression.  String can not be solved.");
069                                    } else {
070                                             GloDBUtils.printError("Invalid expression. " + e.getMessage());
071                                    }
072                                    return null;
073                      } catch (TokenMgrError e) {
074                                    if (e.getMessage() == null) {
075                                             GloDBUtils.printError("Invalid expression.  String can not be solved.");
076                                    } else {
077                                             GloDBUtils.printError("Invalid expression. " + e.getMessage());
078                                    }
079                                    return null;
080                      }
081    
082                      // not sure if this will ever happen
083                      if (ops == null) return null;
084    
085                      // get the id for the assignment track
086                      String id = parser.getId();
087    
088                      Track track;
089                      if (ObjectHandles.trackPool.containsKey(id)) {
090                                    // warn user if 'id' is being overwritten
091                                    GloDBUtils.printMsg("Track \"" + id + "\" already existed and has been overwritten.", 
092                                                                            GloDBUtils.WARNING);
093    
094                                    // get the existing Track object and erase its attributes
095                                    track = ObjectHandles.getTrack(id);
096                                    track.erase();
097    
098                                    // XXX this is inefficient but will retain any links to
099                                    // the existing Track that the user might already have
100                                    track.setFeatures(solveOpsRecurse(ops).getFeatures());
101                      } else {
102                                    track = solveOpsRecurse(ops);
103    
104                                    // add output Track to set of all Track
105                                    ObjectHandles.addTrack(track);
106    
107                                    // need to set the Track ID.  This will also add the Track
108                                    // to ObjectHandles.trackPool
109                                    try { track.setID(id); }
110                                    catch (InvalidIDException e) { 
111                                            track.setID(Track.randomID("_" + id + "_"));
112                                    }
113                      }
114    
115                      return track;
116             }
117    
118             /**
119              * This will run solveOpsRecurse() which will recursively solve
120              * the Operations in the ArrayList 'ops'.  This wrapper will make
121              * sure the output Track is added to the trackPool.  The temporary
122              * Tracks created along the way are not added to the trackPool.
123             */
124             public static Track solveOps(ArrayList ops) {
125                      return solveOps(ops, "");
126             }
127    
128             /**
129              * This will run solveOpsRecurse() which will recursively solve
130              * the Operations in the ArrayList 'ops'.  This wrapper will make
131              * sure the output Track is added to the trackPool.  The temporary
132              * Tracks created along the way are not added to the trackPool.
133              * The 'id' will be used as the output Track ID.
134             */
135             public static Track solveOps(ArrayList ops, String id) {
136                      if (ops == null) return null;
137    
138                      // get output as a Track
139                      Track out = solveOpsRecurse(ops);
140                      
141                      // add output Track to trackPool.  if ID already exists, then
142                      // add a random tag to the ID.  if ID is blank then add the
143                      // Track to trackPool and don't change the ID.
144                      if (id == "") {
145                                    // add output Track to set of all Track
146                                    ObjectHandles.addTrack(out);
147                      } else {
148                                    try { out.setID(id); }
149                                    catch (InvalidIDException e) { 
150                                             out.setID(Track.randomID("_" + id + "_"));
151                                    }
152                      }
153    
154                      return out;
155             }
156    
157             /**
158              * This will solve the Operations in the ArrayList 'ops', calling
159              * itself to recursively resolve groups of Operations.
160             */
161             private static Track solveOpsRecurse(ArrayList ops) {
162                      if (ops == null) return null;
163    
164                      // the output set of Tracks, initialized to the left hand side
165                      // of the Operator or set of Operators.
166                      Operation operation = getOperation((Operation) ops.get(0));
167    
168                      // start out with a copy of the initial Track.  This will
169                      // serve as the left side of the next Operation.  
170                      
171                      // XXX by cloning the Track, we don't change the original
172                      // Operation, allowing this array of Operations to be solved
173                      // again.  Since the Operation already has a cloned Track,
174                      // the usefullness of this may not outway the computational
175                      // cost.
176                      Track out = (Track) operation.track.cloneTrack(false);
177    
178                      // loop through all Operations
179                      for (int i = 1; i < ops.size(); i++) {
180                                    operation = getOperation((Operation) ops.get(i));
181    
182                                    if (debugSolveOps) System.out.println("Operation : " + i);
183                                    if (debugSolveOps) System.out.println("Op type: " + operation.getType());
184    
185                                    out = Operator.processOperation(out, operation);
186    
187                                    if (debugSolveOps) System.out.println(out);
188                      }
189    
190                      return out;
191             }
192    
193             /**
194              * For Operations that contain groups, this will solve the group
195              * and store the resulting TreeSet of Tracks in the Operation.
196              * This will be called recursively if there are nested groups.
197              */
198             private static Operation getOperation(Operation operation) {
199                      //              if (operation.track == null) {
200                      if (operation.isGroup()) {
201                                    if (debug) System.out.println("** FOUND NESTED GROUP **");
202                                    
203                                    // 'solved' will contain ALL matches and EACH match will
204                                    // be a Track, the collection of Tracks is the new
205                                    // value of "operation.track".  These Tracks are not
206                                    // in ObjectHandle.trackPool.
207                                    operation.track = solveOpsRecurse(operation.getGroup());
208                                    if (operation.track != null) { // add the set of Tracks to the Operation.  
209                                             // perform length and seqPos filters
210                                             //                                      operation.filterOnLength();
211                                             operation.filterOnLength();
212                                             operation.filterOnSeqPos();
213                                             operation.filterOnRepeat();
214    
215                                             // if negate is true, then binary invert the Features
216                                             if (operation.isNegate()) {
217                                                      operation.track = negate(operation.track);
218                                             }
219                                    } else {
220                                             // no match so set tracks to an empty Track, instead
221                                             // of null.  This way we won't try to solve the group
222                                             // again.
223                                             operation.track = new Track(false);
224                                    }
225                                    if (debug) System.out.println("matched nested tracks: " + operation.track.getFeatures());
226                                    if (debug) System.out.println("** FINISHED PROCESSING NESTED GROUP **\n");
227                      } else {
228                                    // if negate is true, then binary invert the Features
229                                    if (operation.isNegate()) {
230                                             operation.track = negate(operation.track);
231                                    }
232                      }
233    
234                      // since this is a new (untested) Operation, need to reset the
235                      // Operation's Track iterator.
236                      operation.resetTrack();
237    
238                      return operation;
239             }
240    
241             /** 
242              * This will perform a 'binary' inversion of the Features in the
243              * Track.
244              */
245             private static Track negate(Track track) {
246                      Track out = new Track(false);
247                      
248                      // can't invert a Track that doesn't exist
249                      if (track.numFeatures() == 0) return out;
250    
251                      // merge all overlapping Features.
252                      track.mergeContiguous();
253                      HashMap sources = track.getSources();
254    
255                      // step through each Sequence
256                      for (Iterator s = sources.keySet().iterator(); s.hasNext();) {
257                                    String source = (String) s.next();
258                                    Sequence sourceObj = (Sequence) ObjectHandles.sequencePool.get(source);
259    
260                                    int sourceMax = sourceObj.getMax() - 1;
261    
262                                    // invert Features for this source
263                                    TreeSet features = (TreeSet) sources.get(source);
264                                    Iterator i = features.iterator();
265    
266                                    // get first Feature to deal with case when first
267                                    // Feature doesn't start at 0
268                                    if (! i.hasNext()) continue;
269                                    Feature feature = (Feature) i.next();
270    
271                                    int minFeature = feature.getMin();
272                                    if (minFeature > 0) { 
273                                             // deal with first Feature not starting at 0
274                                             //                                      out.addFeature(new ExactFeature(0, minFeature, sourceObj));
275                                             out.addFeature(new ExactFeature(0, minFeature - 1, sourceObj));
276                                    }
277    
278                                    //                              int minOut = feature.getMax();
279                                    int minOut = feature.getMax() + 1;
280                                    while (i.hasNext()) {
281                                             feature = (Feature) i.next();
282                                             //                                      minFeature = feature.getMin();
283                                             minFeature = feature.getMin() - 1;
284                                             out.addFeature(new ExactFeature(minOut, minFeature, sourceObj));
285                                             //                                      minOut = feature.getMax();
286                                             int fMax = feature.getMax() + 1;
287                                             if (fMax > minOut) {
288                                                      minOut = fMax;
289                                                      if ((fMax - 2) >= sourceMax) {
290                                                                    GloDBUtils.printError("Feature position exceeds sequence length");
291                                                                    return new Track(false);
292                                                      }
293                                             }
294                                    }
295    
296                                    if (minOut <= sourceMax) {
297                                             out.addFeature(new ExactFeature(minOut, sourceMax, sourceObj));
298                                    }
299                      }
300    
301                      return out;
302             }
303    } // ParserUtils.java