001 /* 002 * Copyright 2007, 2012 Stephen Fisher and Junhyong Kim, University of 003 * Pennsylvania. 004 * 005 * This file is part of Glo-DB. 006 * 007 * Glo-DB is free software: you can redistribute it and/or modify it 008 * under the terms of the GNU General Public License as published by 009 * the Free Software Foundation, either version 3 of the License, or 010 * (at your option) any later version. 011 * 012 * Glo-DB is distributed in the hope that it will be useful, but 013 * WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * along with Glo-DB. If not, see <http://www.gnu.org/licenses/>. 019 * 020 * @(#)Operation.java 021 */ 022 023 package edu.upenn.gloDB.parser; 024 025 import edu.upenn.gloDB.*; 026 import java.util.Set; 027 import java.util.ArrayList; 028 import java.util.Iterator; 029 030 /** 031 * These objects store the operations parsed by the parser. An 032 * Operation can contain either an array of Operations (ie a set of 033 * Operations to be processed as a group), or a Set of Features (ie a 034 * Track). 035 * 036 * @author Stephen Fisher 037 * @version $Id: Operation.java,v 1.8.2.22 2007/03/01 21:17:33 fisher Exp $ 038 */ 039 040 public class Operation implements Cloneable { 041 042 /** 043 * If this Operation is a 'group' (ie '()'), then instead of 044 * containing a SortedSet of Features, it will contain an array of 045 * Operations. This array of Operations, being a group will be 046 * processed and converted into a SortedSet of Features and stored 047 * in 'track'. 048 */ 049 private ArrayList group = null; 050 051 /** 052 * SortedSet of Features to be searched. This set is filtered by 053 * sequence, minLength, maxLength, minSeqPos, and maxSeqPos. We 054 * use a Track to hold the operands because we can then take 055 * advantange of the various properties of a Track when 056 * computing the Operation. 057 * @XXX Needs to be private. 058 */ 059 public Track track = null; 060 061 /** 062 * Flag for the type of operation to be performed with the 063 * preceeding Operation. When new types are added 064 * Operator.getType(), Parser.jj and Operator need to be updated. 065 * -1 = null (no preceeding operator - first Track in a group). 066 * 0 = POS, 1 = AND, 2 = OR, 3 = MINUS, 4 = sAND, 5 = sMINUS 067 * 10 = ., 11 = &&, 12 = ||, 13 = - 068 * @XXX Default = '-1' 069 */ 070 private int type = -1; 071 072 /** 073 * Flag for whether to invert the output Track, treating the Track 074 * as a mask over the Sequence (ie binary operation). There is no 075 * comparable option that preserves the Feature sets (ie 076 * non-binary operation). 077 * @XXX Default = 'false' 078 */ 079 private boolean negate = false; 080 081 /** 082 * Minimum number of positions since the previous Track. 083 * @XXX Default = '0' 084 * @XXX Needs to be private. 085 */ 086 int minPos = 0; 087 088 /** 089 * Maximum number of positions since the previous Track. 090 * @XXX Default = '0' 091 * @XXX Needs to be private. 092 */ 093 int maxPos = 0; 094 095 /** 096 * If not null, then ignore all other Sequences when testing for 097 * Feature matches. 098 * @XXX Default = 'null' 099 */ 100 private Sequence sequence = null; 101 102 /** 103 * Minimum acceptible Feature width. 104 * @XXX Default = '0' 105 */ 106 private int minLength = 0; 107 108 /** 109 * Maximum acceptible Feature width. 110 * @XXX Default = '0' 111 */ 112 private int maxLength = 0; 113 114 /** 115 * Minimum acceptible position within 'sequence'. 116 * @XXX Default = '0' 117 */ 118 private int minSeqPos = 0; 119 120 /** 121 * Maximum acceptible position within 'sequence'. If -1, then 122 * goes to maximum Sequence length. 123 * @XXX Default = '0' 124 */ 125 private int maxSeqPos = 0; 126 127 /** 128 * Minimum number of repeating features. 129 * @XXX Default = '1' 130 */ 131 private int minRepeat = 1; 132 133 /** 134 * Maximum number of repeating features. 135 * @XXX Default = '1' 136 */ 137 private int maxRepeat = 1; 138 139 /** 140 * Minimum number of positions between repeating features. If '0', 141 * then any overlap between features will be valid. 142 * @XXX Default = '0' 143 */ 144 private int minWithin = 0; 145 146 /** 147 * Maximum number of positions between repeating features. 148 * @XXX Default = '0' 149 */ 150 private int maxWithin = 0; 151 152 /** 153 * Flag for whether the mapped Feature should be included in the 154 * output. 155 * @XXX Default = 'false' 156 */ 157 private boolean ignore = false; 158 159 /** 160 * Number of times the Operation has been matched. 161 * @XXX Needs to be private. 162 */ 163 int matched = 0; 164 165 /** 166 * An Iterator over 'Track'. This is initialized when the 167 * Operation is created and will be increment as successive 168 * Features are tested. 169 */ 170 private Iterator iterator = null; 171 172 /** 173 * Create a new Operation object, used by clone() below. 174 */ 175 private Operation() { 176 initialize(); 177 } 178 179 /** 180 * Create a new Operation object containing a group. 181 */ 182 public Operation(ArrayList group) { 183 this.group = new ArrayList(group); 184 initialize(); 185 } 186 187 /** 188 * Create a new Operation object with a Track. 189 */ 190 public Operation(Track track) { 191 this.track = (Track) track.cloneTrack(false); 192 initialize(); 193 } 194 195 /** 196 * Create a new Operation object with a set of Tracks. 197 */ 198 public Operation(Set tracks) { 199 // iterate over all Tracks in the set 200 Iterator i = tracks.iterator(); 201 Track t = (Track) i.next(); 202 203 // create a new Track with the contents of the first Track. 204 // This avoids the need to recreate the sources HashMap. 205 this.track = (Track) t.cloneTrack(false); 206 207 // add all of the Features for all of the remaining Tracks 208 while (i.hasNext()) { 209 t = (Track) i.next(); 210 this.track.addFeatures(t.getFeatures()); 211 } 212 213 initialize(); 214 } 215 216 private void initialize() { 217 // initialize the 'tracks' iterator. 218 resetTrack(); 219 } 220 221 //-------------------------------------------------------------------------- 222 // Setters and Getters 223 224 /** Sets the array of Operations. */ 225 public void setGroup(ArrayList group) { this.group = group; } 226 227 /** Gets the array of Operations. */ 228 public ArrayList getGroup() { return this.group; } 229 230 /** 231 * Set the Operation type: 232 * 0 = POS, 1 = AND, 2 = nAND, 3 = OR, 4 = LESS 233 * 10 = ., 11 = &&, 12 = ^&&, 13 = ||, 14 = - 234 */ 235 public void setType(int type) { this.type = type; } 236 237 /** 238 * Get the Operation type: 239 * 0 = POS, 1 = AND, 2 = nAND, 3 = OR, 4 = LESS 240 * 10 = ., 11 = &&, 12 = ^&&, 13 = ||, 14 = - 241 */ 242 public int getType() { return this.type; } 243 244 /** 245 * Flag for whether to invert the output Track, treating the Track 246 * as a mask over the Sequence (ie binary operation). There is no 247 * comparable option that preserves the Feature sets (ie 248 * non-binary operation). 249 */ 250 public void setNegate(boolean negate) { this.negate = negate; } 251 252 /** 253 * Flag for whether to invert the output Track, treating the Track 254 * as a mask over the Sequence (ie binary operation). There is no 255 * comparable option that preserves the Feature sets (ie 256 * non-binary operation). 257 */ 258 public boolean isNegate() { return this.negate; } 259 260 /** Sets the ignore flag. */ 261 public void setIgnore(boolean ignore) { this.ignore = ignore; } 262 263 /** Gets the ignore flag. */ 264 public boolean isIgnore() { return this.ignore; } 265 266 /** 267 * This will remove all Features from "track" that are not on 268 * "sequence". If this is a group, then 'seq' is propogated to 269 * all inner groups. 270 */ 271 public void setSequence(Sequence seq) { 272 this.sequence = seq; 273 274 // propogate the Sequence info to all inner groups 275 if (isGroup()) { 276 for (Iterator i = group.iterator(); i.hasNext();) { 277 Operation operation = (Operation) i.next(); 278 operation.setSequence(seq); 279 } 280 } 281 282 // do the sequence filtering for this Operation 283 filterOnSequence(); 284 } 285 286 /** 287 * This will set the min/max Length for all Features in "track". 288 * If not a group, then the values are set and "filterOnLength()" 289 * is run to perform the filtering. If a group, then 290 * filterOnLength() is not run and must be run separately, because 291 * should only filter the output of the group operation. 292 * @XXX Should throw an exception if max < min. 293 */ 294 public void setLength(int min, int max) { 295 // make sure lengths are legal. 296 if (max < min) { 297 GloDBUtils.printError("The max length is less than the min length, when filtering on length."); 298 return; 299 } 300 301 this.minLength = min; 302 this.maxLength = max; 303 304 // if group then don't do anything, this will be handled in 305 // ParserUtils.getOperation() 306 if (isGroup()) return; 307 308 // not a group so we can just do the length filtering now 309 filterOnLength(); 310 } 311 312 /** 313 * This will set the min/max seqquence position for all Features 314 * in "track". If not a group, then the values are set and 315 * "filterOnSeqPos()" is run to perform the filtering. If a 316 * group, then filterOnSeqPos() is not run and must be run 317 * separately. 318 * @XXX Should throw an exception if max < min. 319 */ 320 public void setSeqPos(int min, int max) { 321 // make sure lengths are legal. 322 if ((max != -1) && (max < min)) { 323 GloDBUtils.printError("The max pos is less than the min pos, when filtering on sequence position."); 324 return; 325 } 326 327 this.minSeqPos = min; 328 this.maxSeqPos = max; 329 330 // if group then don't do anything, this will be handled in 331 // ParserUtils.getOperation() 332 if (isGroup()) return; 333 334 // not a group so we can just do the filtering now 335 filterOnSeqPos(); 336 } 337 338 /** 339 * This will set the min/max Repeat for all Features in "track". 340 * If not a group, then the values are set and "filterOnRepeat()" 341 * is run to perform the filtering. If a group, then 342 * filterOnRepeat() is not run and must be run separately, because 343 * should only filter the output of the group operation. 344 * @XXX Should throw an exception if max < min. 345 */ 346 public void setRepeat(int min, int max) { setRepeat(min, max, 0, 0); } 347 public void setRepeat(int min, int max, int minW, int maxW) { 348 // make sure repeats are legal. 349 if (max < min) { 350 GloDBUtils.printError("The max repeat is less than the min repeat, when filtering on repeats."); 351 return; 352 } 353 if (maxW < minW) { 354 GloDBUtils.printError("The max within is less than the min within, when filtering on repeats."); 355 return; 356 } 357 358 this.minRepeat = min; 359 this.maxRepeat = max; 360 this.minWithin = minW; 361 this.maxWithin = maxW; 362 363 // if group then don't do anything, this will be handled in 364 // ParserUtils.getOperation() 365 if (isGroup()) return; 366 367 // not a group so we can just do the repeat filtering now 368 filterOnRepeat(); 369 } 370 371 //-------------------------------------------------------------------------- 372 // Miscellaneous Methods 373 374 /** 375 * This will remove all Features from "track" that are not on 376 * "sequence". 377 */ 378 public void filterOnSequence() { 379 // if no Sequence or Features then don't do anything. 380 if ((sequence == null) || (track == null)) return; 381 382 track.filterOnSequence(sequence.getID()); 383 } 384 385 /** 386 * This will remove all Features from "track" that are not within 387 * the 'minLength'/'maxLength' boundaries. 388 */ 389 public void filterOnLength() { 390 // if maxLength is 0 or no features then don't filter. 391 if ((maxLength == 0) || (track == null)) return; 392 393 track.filterOnLength(minLength, maxLength); 394 } 395 396 /** 397 * This will remove all Features from "track" that are not within 398 * the 'minSeqPos'/'maxSeqPos' boundaries. If 'max' is -1, then 399 * goes to maximum Sequence length. 400 */ 401 public void filterOnSeqPos() { 402 // if maxSeqPos is 0 or no features then don't filter 403 if ((maxSeqPos == 0) || (track == null)) return; 404 405 track.filterOnSeqPos(minSeqPos, maxSeqPos); 406 } 407 408 /** 409 * This will remove all Features from "track" that are not within 410 * the 'minRepeat/maxRepeat' and 'minWithin/maxWithin' boundaries. 411 */ 412 public void filterOnRepeat() { 413 // if minRepeat/maxRepeat are less than 2 or no features then 414 // nothing to filter 415 if ((minRepeat < 2) || (maxRepeat < 2) || (track == null)) return; 416 417 track.filterOnRepeat(minRepeat, maxRepeat, minWithin, maxWithin); 418 } 419 420 /** Returns true if this Operation is a group. */ 421 public boolean isGroup() { 422 if (group == null) return false; 423 else return true; 424 } 425 426 /** 427 * Returns the number of Features contained in Operation. 428 */ 429 public int numFeatures() { 430 if (track == null) { 431 return 0; 432 } else { 433 return track.numFeatures(); 434 } 435 } 436 437 /** Restart iterator. */ 438 public void resetTrack() { 439 if (track != null) iterator = track.featureIterator(); 440 } 441 442 /** Return true if there are more Features in 'iterator'. */ 443 public boolean hasNextFeature() { 444 if (iterator != null) { 445 return iterator.hasNext(); 446 } else { 447 return false; 448 } 449 } 450 451 /** Restart the Tracks iterator and return the first Feature. */ 452 public Feature firstFeature() { 453 if (track != null) { 454 resetTrack(); 455 return nextFeature(); 456 } else { 457 return null; 458 } 459 } 460 461 /** Return the next Feature in iterator. */ 462 public Feature nextFeature() { 463 if (hasNextFeature()) { 464 return (Feature) iterator.next(); 465 } else { 466 return null; 467 } 468 } 469 470 /** Returns an iterator over the group list, null if empty group. */ 471 public Iterator groupIterator() { 472 if (group == null) { return null; } 473 else { return group.iterator(); } 474 } 475 476 /** 477 * Create a shallow clone (just clone the structure, not the 478 * Objects) of the existing object. 479 */ 480 public Object clone() { 481 Operation operation = new Operation(); 482 operation.group = this.group; 483 operation.track = this.track; 484 operation.type = this.type; 485 operation.negate = this.negate; 486 operation.minPos = this.minPos; 487 operation.maxPos = this.maxPos; 488 operation.sequence = this.sequence; 489 operation.minLength = this.minLength; 490 operation.maxLength = this.maxLength; 491 operation.ignore = this.ignore; 492 operation.minSeqPos = this.minSeqPos; 493 operation.maxSeqPos = this.maxSeqPos; 494 operation.minRepeat = this.minRepeat; 495 operation.maxRepeat = this.maxRepeat; 496 operation.minWithin = this.minWithin; 497 operation.maxWithin = this.maxWithin; 498 operation.matched = this.matched; 499 operation.iterator = this.iterator; 500 501 return operation; 502 } 503 504 /** Returns Operation information for debugging purposes. */ 505 public String toString() { 506 String out = "\n"; 507 508 if (isGroup()) { 509 out += "group: " + group.size() + "\n"; 510 } else { 511 out += "group: null\n"; 512 } 513 if (track == null) { 514 out += "Track: null\n"; 515 } else if (track.numFeatures() == 0) { 516 out += "Track (0 features):\n"; 517 } else { 518 out += "Track (" + track.numFeatures() + " features):" + track.toStringMore(); 519 } 520 out += "Type: " + Operator.getType(type) + "\n"; 521 out += "Not: " + negate + "\n"; 522 out += "Min spacing: " + minPos + "\n"; 523 out += "Max spacing: " + maxPos + "\n"; 524 if (sequence == null) { 525 out += "Sequence: null\n"; 526 } else { 527 out += "Sequence: " + sequence.getID() + "\n"; 528 } 529 out += "Min length: " + minLength + "\n"; 530 out += "Max length: " + maxLength + "\n"; 531 out += "Min seq pos: " + minSeqPos + "\n"; 532 out += "Max seq pos: " + maxSeqPos + "\n"; 533 out += "Min repeat: " + minRepeat + "\n"; 534 out += "Max repeat: " + maxRepeat + "\n"; 535 out += "Min within: " + minWithin + "\n"; 536 out += "Max within: " + maxWithin + "\n"; 537 out += "Ignore: " + ignore + "\n"; 538 out += "Matched: " + matched + "\n"; 539 540 return out; 541 } 542 } // Operation.java