001    /*
002     * Copyright 2007, 2012 Stephen Fisher and Junhyong Kim, University of
003     * Pennsylvania.
004     *
005     * This file is part of Glo-DB.
006     * 
007     * Glo-DB is free software: you can redistribute it and/or modify it
008     * under the terms of the GNU General Public License as published by
009     * the Free Software Foundation, either version 3 of the License, or
010     * (at your option) any later version.
011     * 
012     * Glo-DB is distributed in the hope that it will be useful, but
013     * WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * General Public License for more details.
016     * 
017     * You should have received a copy of the GNU General Public License
018     * along with Glo-DB. If not, see <http://www.gnu.org/licenses/>.
019     *
020     * @(#)FASTAParserMinimal.java
021     */
022    
023    package edu.upenn.gloDB.io;
024    
025    import java.util.HashMap;
026    
027    /**
028     * Parses the header line at the beginning of each FASTA sequence.
029     * The header line is chopped into substrings at each space (" ").  If
030     * a substring contains a ":" then it is assumed to be a key:value
031     * pair and is added to the attributes field as such.  Otherwise, it
032     * is added as a value to the 'descriptors' key.  This is very similar
033     * to FASTAParserFly.java, however it doesn't processes the
034     * gene_boundaries in any special way.  Thus this should not be used
035     * for FASTA files that contain Features.
036     *
037     * @XXX this assumes that the header starts with a sequence ID.
038     *
039     * @author  Stephen Fisher
040     * @version $Id: FASTAParserMinimal.java,v 1.7.2.4 2007/03/01 21:17:33 fisher Exp $
041     */
042    
043    public class FASTAParserMinimal implements FASTAParser {
044             
045             public HashMap parseHeader(String header) {
046                      HashMap attributes = new HashMap();
047                      String descriptors = "";
048                      
049                      // chop off the ">" from the beginning of the header
050                      header = header.substring(1);
051                      
052                      // split header at each space 
053                      String[] attribs = header.split(" ");
054                      
055                      // the header is assumed to start with a sequence ID
056                      attributes.put("ID", attribs[0]);
057                      
058                      //              GloDBUtils.printMsg("Loading: " + attribs[0]);
059                      
060                      String[] tmp;
061                      for (int i = 1; i < attribs.length; i++) {
062                                    // split the substring at each colon
063                                    tmp = attribs[i].split(":", 2);
064                                    
065                                    // if tmp only has one value, then the substring 
066                                    // didn't contain a ":", so add it as a descriptor.
067                                    if (tmp.length == 1) {
068                                             descriptors += " " + tmp[0];
069                                    } else {
070                                             //                                      GloDBUtils.printMsg(attribs[i]+"  "+tmp[0]+"   "+tmp[1]);
071                                             attributes.put(tmp[0], tmp[1]);
072                                    }
073                      }
074                      
075                      // only add descriptors if they actually exist.
076                      if (descriptors.length() > 0) {
077                                    attributes.put("descriptors", descriptors);
078                      }
079                      
080                      return attributes;
081             }
082    
083    }  // FASTAParserMinimal.java
084