001    /*
002     * Copyright 2007, 2012 Stephen Fisher and Junhyong Kim, University of
003     * Pennsylvania.
004     *
005     * This file is part of Glo-DB.
006     * 
007     * Glo-DB is free software: you can redistribute it and/or modify it
008     * under the terms of the GNU General Public License as published by
009     * the Free Software Foundation, either version 3 of the License, or
010     * (at your option) any later version.
011     * 
012     * Glo-DB is distributed in the hope that it will be useful, but
013     * WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * General Public License for more details.
016     * 
017     * You should have received a copy of the GNU General Public License
018     * along with Glo-DB. If not, see <http://www.gnu.org/licenses/>.
019     *
020     * @(#)Operation.java
021     */
022    
023    package edu.upenn.gloDB.parser;
024    
025    import edu.upenn.gloDB.*;
026    import java.util.Set;
027    import java.util.ArrayList;
028    import java.util.Iterator;
029    
030    /**
031     * These objects store the operations parsed by the parser.  An
032     * Operation can contain either an array of Operations (ie a set of
033     * Operations to be processed as a group), or a Set of Features (ie a
034     * Track).
035     *
036     * @author  Stephen Fisher
037     * @version $Id: Operation.java,v 1.8.2.22 2007/03/01 21:17:33 fisher Exp $
038     */
039    
040    public class Operation implements Cloneable { 
041    
042             /** 
043              * If this Operation is a 'group' (ie '()'), then instead of
044              * containing a SortedSet of Features, it will contain an array of
045              * Operations.  This array of Operations, being a group will be
046              * processed and converted into a SortedSet of Features and stored
047              * in 'track'.
048              */
049             private ArrayList group = null;
050    
051             /** 
052              * SortedSet of Features to be searched.  This set is filtered by
053              * sequence, minLength, maxLength, minSeqPos, and maxSeqPos.  We
054              * use a Track to hold the operands because we can then take
055              * advantange of the various properties of a Track when
056              * computing the Operation.
057              * @XXX Needs to be private.
058              */
059             public Track track = null;
060    
061             /** 
062              * Flag for the type of operation to be performed with the
063              * preceeding Operation.  When new types are added
064              * Operator.getType(), Parser.jj and Operator need to be updated.
065              *   -1 = null (no preceeding operator - first Track in a group).
066              *   0 = POS, 1 = AND, 2 = OR, 3 = MINUS, 4 = sAND, 5 = sMINUS
067              *   10 = ., 11 = &&, 12 = ||, 13 = -
068              * @XXX Default = '-1'
069              */
070             private int type = -1;
071    
072             /** 
073              * Flag for whether to invert the output Track, treating the Track
074              * as a mask over the Sequence (ie binary operation).  There is no
075              * comparable option that preserves the Feature sets (ie
076              * non-binary operation).
077              * @XXX Default = 'false'
078              */
079             private boolean negate = false;
080    
081             /** 
082              * Minimum number of positions since the previous Track. 
083              * @XXX Default = '0'
084              * @XXX Needs to be private.
085              */
086             int minPos = 0;
087    
088             /** 
089              * Maximum number of positions since the previous Track.
090              * @XXX Default = '0'
091              * @XXX Needs to be private.
092              */
093             int maxPos = 0;
094    
095             /** 
096              * If not null, then ignore all other Sequences when testing for
097              * Feature matches.
098              * @XXX Default = 'null'
099              */
100             private Sequence sequence = null;
101    
102             /** 
103              * Minimum acceptible Feature width. 
104              * @XXX Default = '0'
105              */
106             private int minLength = 0;
107    
108             /** 
109              * Maximum acceptible Feature width. 
110              * @XXX Default = '0'
111              */
112             private int maxLength = 0;
113    
114             /** 
115              * Minimum acceptible position within 'sequence'. 
116              * @XXX Default = '0'
117              */
118             private int minSeqPos = 0;
119    
120             /** 
121              * Maximum acceptible position within 'sequence'.  If -1, then
122              * goes to maximum Sequence length.
123              * @XXX Default = '0'
124              */
125             private int maxSeqPos = 0;
126    
127             /** 
128              * Minimum number of repeating features.
129              * @XXX Default = '1'
130              */
131             private int minRepeat = 1;
132    
133             /** 
134              * Maximum number of repeating features.
135              * @XXX Default = '1'
136              */
137             private int maxRepeat = 1;
138    
139             /** 
140              * Minimum number of positions between repeating features. If '0',
141              * then any overlap between features will be valid.
142              * @XXX Default = '0'
143              */
144             private int minWithin = 0;
145    
146             /** 
147              * Maximum number of positions between repeating features.
148              * @XXX Default = '0'
149              */
150             private int maxWithin = 0;
151    
152             /** 
153              * Flag for whether the mapped Feature should be included in the
154              * output.
155              * @XXX Default = 'false'
156              */
157             private boolean ignore = false;
158    
159             /** 
160              * Number of times the Operation has been matched.
161              * @XXX Needs to be private.
162              */
163             int matched = 0;
164    
165             /** 
166              * An Iterator over 'Track'.  This is initialized when the
167              * Operation is created and will be increment as successive
168              * Features are tested.
169              */
170             private Iterator iterator = null;
171    
172             /** 
173              * Create a new Operation object, used by clone() below.
174              */
175             private Operation() {
176                      initialize();
177             }
178    
179             /** 
180              * Create a new Operation object containing a group.
181              */
182             public Operation(ArrayList group) {
183                      this.group = new ArrayList(group);
184                      initialize();
185             }
186    
187             /** 
188              * Create a new Operation object with a Track.
189              */
190             public Operation(Track track) {
191                      this.track = (Track) track.cloneTrack(false);
192                      initialize();
193             }
194    
195             /** 
196              * Create a new Operation object with a set of Tracks.
197              */
198             public Operation(Set tracks) {
199                      // iterate over all Tracks in the set
200                      Iterator i = tracks.iterator();
201                      Track t = (Track) i.next();
202    
203                      // create a new Track with the contents of the first Track.
204                      // This avoids the need to recreate the sources HashMap.
205                      this.track = (Track) t.cloneTrack(false);
206    
207                      // add all of the Features for all of the remaining Tracks
208                      while (i.hasNext()) {
209                                    t = (Track) i.next();
210                                    this.track.addFeatures(t.getFeatures());
211                      }
212    
213                      initialize();
214             }
215    
216             private void initialize() {
217                      // initialize the 'tracks' iterator.
218                      resetTrack();
219             }
220    
221        //--------------------------------------------------------------------------
222        // Setters and Getters
223       
224             /** Sets the array of Operations. */
225             public void setGroup(ArrayList group) { this.group = group; }
226    
227             /** Gets the array of Operations. */
228             public ArrayList getGroup() { return this.group; }
229    
230             /** 
231              * Set the Operation type:
232              *   0 = POS, 1 = AND, 2 = nAND, 3 = OR, 4 = LESS
233              *   10 = ., 11 = &&, 12 = ^&&, 13 = ||, 14 = -
234              */
235             public void setType(int type) { this.type = type; }
236    
237             /** 
238              * Get the Operation type:
239              *   0 = POS, 1 = AND, 2 = nAND, 3 = OR, 4 = LESS
240              *   10 = ., 11 = &&, 12 = ^&&, 13 = ||, 14 = -
241              */
242             public int getType() { return this.type; }
243    
244             /** 
245              * Flag for whether to invert the output Track, treating the Track
246              * as a mask over the Sequence (ie binary operation).  There is no
247              * comparable option that preserves the Feature sets (ie
248              * non-binary operation).
249              */
250             public void setNegate(boolean negate) { this.negate = negate; }
251    
252             /**
253              * Flag for whether to invert the output Track, treating the Track
254              * as a mask over the Sequence (ie binary operation).  There is no
255              * comparable option that preserves the Feature sets (ie
256              * non-binary operation).
257              */
258             public boolean isNegate() { return this.negate; }
259    
260             /** Sets the ignore flag. */
261             public void setIgnore(boolean ignore) { this.ignore = ignore; }
262    
263             /** Gets the ignore flag. */
264             public boolean isIgnore() { return this.ignore; }
265    
266             /**
267              * This will remove all Features from "track" that are not on
268              * "sequence".  If this is a group, then 'seq' is propogated to
269              * all inner groups.
270              */
271             public void setSequence(Sequence seq) {
272                      this.sequence = seq;
273    
274                      // propogate the Sequence info to all inner groups
275                      if (isGroup()) {
276                                    for (Iterator i = group.iterator(); i.hasNext();) {
277                                             Operation operation = (Operation) i.next();
278                                             operation.setSequence(seq);
279                                    }
280                      }
281    
282                      // do the sequence filtering for this Operation
283                      filterOnSequence();
284             }
285    
286             /**
287              * This will set the min/max Length for all Features in "track".
288              * If not a group, then the values are set and "filterOnLength()"
289              * is run to perform the filtering.  If a group, then
290              * filterOnLength() is not run and must be run separately, because
291              * should only filter the output of the group operation.
292              * @XXX Should throw an exception if max < min.
293              */
294             public void setLength(int min, int max) {
295                      // make sure lengths are legal.
296                      if (max < min) { 
297                                    GloDBUtils.printError("The max length is less than the min length, when filtering on length.");
298                                    return; 
299                      }
300    
301                      this.minLength = min;
302                      this.maxLength = max;
303    
304                      // if group then don't do anything, this will be handled in
305                      // ParserUtils.getOperation()
306                      if (isGroup()) return;
307    
308                      // not a group so we can just do the length filtering now
309                      filterOnLength();
310             }
311    
312             /**
313              * This will set the min/max seqquence position for all Features
314              * in "track".  If not a group, then the values are set and
315              * "filterOnSeqPos()" is run to perform the filtering.  If a
316              * group, then filterOnSeqPos() is not run and must be run
317              * separately.
318              * @XXX Should throw an exception if max < min.
319              */
320             public void setSeqPos(int min, int max) {
321                      // make sure lengths are legal.
322                      if ((max != -1) && (max < min)) { 
323                                    GloDBUtils.printError("The max pos is less than the min pos, when filtering on sequence position.");
324                                    return; 
325                      }
326    
327                      this.minSeqPos = min;
328                      this.maxSeqPos = max;
329    
330                      // if group then don't do anything, this will be handled in
331                      // ParserUtils.getOperation()
332                      if (isGroup()) return;
333    
334                      // not a group so we can just do the filtering now
335                      filterOnSeqPos();
336             }
337    
338             /**
339              * This will set the min/max Repeat for all Features in "track".
340              * If not a group, then the values are set and "filterOnRepeat()"
341              * is run to perform the filtering.  If a group, then
342              * filterOnRepeat() is not run and must be run separately, because
343              * should only filter the output of the group operation.
344              * @XXX Should throw an exception if max < min.
345              */
346             public void setRepeat(int min, int max) { setRepeat(min, max, 0, 0); }
347             public void setRepeat(int min, int max, int minW, int maxW) {
348                      // make sure repeats are legal.
349                      if (max < min) { 
350                                    GloDBUtils.printError("The max repeat is less than the min repeat, when filtering on repeats.");
351                                    return; 
352                      }
353                      if (maxW < minW) { 
354                                    GloDBUtils.printError("The max within is less than the min within, when filtering on repeats.");
355                                    return; 
356                      }
357    
358                      this.minRepeat = min;
359                      this.maxRepeat = max;
360                      this.minWithin = minW;
361                      this.maxWithin = maxW;
362    
363                      // if group then don't do anything, this will be handled in
364                      // ParserUtils.getOperation()
365                      if (isGroup()) return;
366    
367                      // not a group so we can just do the repeat filtering now
368                      filterOnRepeat();
369             }
370    
371        //--------------------------------------------------------------------------
372        // Miscellaneous Methods
373    
374             /**
375              * This will remove all Features from "track" that are not on
376              * "sequence".
377              */
378             public void filterOnSequence() {
379                      // if no Sequence or Features then don't do anything.
380                      if ((sequence == null) || (track == null)) return;
381    
382                      track.filterOnSequence(sequence.getID());
383             }
384    
385             /**
386              * This will remove all Features from "track" that are not within
387              * the 'minLength'/'maxLength' boundaries.
388              */
389             public void filterOnLength() {
390                      // if maxLength is 0 or no features then don't filter.
391                      if ((maxLength == 0) || (track == null)) return;
392    
393                      track.filterOnLength(minLength, maxLength);
394             }
395    
396             /**
397              * This will remove all Features from "track" that are not within
398              * the 'minSeqPos'/'maxSeqPos' boundaries.  If 'max' is -1, then
399              * goes to maximum Sequence length.
400              */
401             public void filterOnSeqPos() {
402                      // if maxSeqPos is 0 or no features then don't filter
403                      if ((maxSeqPos == 0) || (track == null)) return;
404    
405                      track.filterOnSeqPos(minSeqPos, maxSeqPos);
406             }
407    
408             /**
409              * This will remove all Features from "track" that are not within
410              * the 'minRepeat/maxRepeat' and 'minWithin/maxWithin' boundaries.
411              */
412             public void filterOnRepeat() {
413                      // if minRepeat/maxRepeat are less than 2 or no features then
414                      // nothing to filter
415                      if ((minRepeat < 2) || (maxRepeat < 2) || (track == null)) return;
416    
417                      track.filterOnRepeat(minRepeat, maxRepeat, minWithin, maxWithin);
418             }
419       
420             /** Returns true if this Operation is a group. */
421             public boolean isGroup() { 
422                      if (group == null) return false;
423                      else return true;
424             }
425    
426             /**
427              * Returns the number of Features contained in Operation.
428              */
429             public int numFeatures() { 
430                      if (track == null) {
431                                    return 0;
432                      } else {
433                                    return track.numFeatures();
434                      }
435             }
436    
437             /** Restart iterator. */
438             public void resetTrack() { 
439                      if (track != null) iterator = track.featureIterator();
440             }
441    
442             /** Return true if there are more Features in 'iterator'. */
443             public boolean hasNextFeature() { 
444                      if (iterator != null) {
445                                    return iterator.hasNext(); 
446                      } else {
447                                    return false;
448                      }
449             }
450    
451             /** Restart the Tracks iterator and return the first Feature. */
452             public Feature firstFeature() { 
453                      if (track != null) {
454                                    resetTrack();
455                                    return nextFeature(); 
456                      } else {
457                                    return null;
458                      }
459             }
460    
461             /** Return the next Feature in iterator. */
462             public Feature nextFeature() {
463                      if (hasNextFeature()) { 
464                                    return (Feature) iterator.next(); 
465                      } else { 
466                                    return null; 
467                      }
468             }
469    
470             /** Returns an iterator over the group list, null if empty group. */
471             public Iterator groupIterator() { 
472                      if (group == null) { return null; }
473                      else { return group.iterator(); }
474             }
475    
476             /**
477              * Create a shallow clone (just clone the structure, not the
478              * Objects) of the existing object.
479              */
480             public Object clone() {
481                      Operation operation = new Operation();
482                      operation.group = this.group;
483                      operation.track = this.track;
484                      operation.type = this.type;
485                      operation.negate = this.negate;
486                      operation.minPos = this.minPos;
487                      operation.maxPos = this.maxPos;
488                      operation.sequence = this.sequence;
489                      operation.minLength = this.minLength;
490                      operation.maxLength = this.maxLength;
491                      operation.ignore = this.ignore;
492                      operation.minSeqPos = this.minSeqPos;
493                      operation.maxSeqPos = this.maxSeqPos;
494                      operation.minRepeat = this.minRepeat;
495                      operation.maxRepeat = this.maxRepeat;
496                      operation.minWithin = this.minWithin;
497                      operation.maxWithin = this.maxWithin;
498                      operation.matched = this.matched;
499                      operation.iterator = this.iterator;
500    
501                      return operation;
502             }
503    
504             /** Returns Operation information for debugging purposes. */
505             public String toString() {
506                      String out = "\n";
507    
508                      if (isGroup()) {
509                                    out += "group:          " + group.size() + "\n";
510                      } else {
511                                    out += "group:          null\n";
512                      }
513                      if (track == null) {
514                                    out += "Track:          null\n";
515                      } else if (track.numFeatures() == 0) {
516                                    out += "Track (0 features):\n";
517                      } else {
518                                    out += "Track (" + track.numFeatures() + " features):" + track.toStringMore();
519                      }
520                      out += "Type:           " + Operator.getType(type) + "\n";
521                      out += "Not:            " + negate + "\n";
522                      out += "Min spacing:    " + minPos + "\n";
523                      out += "Max spacing:    " + maxPos + "\n";
524                      if (sequence == null) {
525                                    out += "Sequence:       null\n";
526                      } else {
527                                    out += "Sequence:       " + sequence.getID() + "\n";
528                      }
529                      out += "Min length:     " + minLength + "\n";
530                      out += "Max length:     " + maxLength + "\n";
531                      out += "Min seq pos:    " + minSeqPos + "\n";
532                      out += "Max seq pos:    " + maxSeqPos + "\n";
533                      out += "Min repeat:     " + minRepeat + "\n";
534                      out += "Max repeat:     " + maxRepeat + "\n";
535                      out += "Min within:     " + minWithin + "\n";
536                      out += "Max within:     " + maxWithin + "\n";
537                      out += "Ignore:         " + ignore + "\n";
538                      out += "Matched:        " + matched + "\n";
539    
540                      return out;
541             }
542    } // Operation.java