// Dirk Husmeier, February 2002 // Reads in dna.dat with the whole DNA sequence alignment; // Writes out dna_no_gaps.dat, which is the same alignment where columns // with gaps have been removed. // Also, writes out gapSites.out, which contains a list of gap sites // in the original alignment. import java.lang.*; import java.io.*; import java.awt.*; import java.util.*; public class RemoveGaps{ private String x; private int nDNA; // Number of nucleotides in the alignment private int nSpecies; // Number of species private int firstNuc; // First nucleotide private int lastNuc; // Last nucleotide private int nGaps; // Number of gaps private int[] gapSites; // Vector with gap positions private String[] speciesName; private String[] dnaSeq; private String[] dnaSeq_unGapped; public RemoveGaps(){ // Constructor this.firstNuc=1; } public int showLengthDNA(){ return nDNA; } // --------------------------------------------------- public void write(PrintStream outStream, String x) throws IOException{ // Write out string outStream.print(x); } // --------------------------------------------------- public void write(PrintStream outStream, String x, int lineLength) throws IOException{ // Write out string, broken up in lines of indicated length int i; String xNew= ""; int L = x.length(); for (i=0; i0){ xNew= xNew+"\n"; } xNew= xNew+x.substring(i,i+1); } outStream.print(xNew); } // --------------------------------------------------- public void write(PrintStream outStream, int x) throws IOException{ // Write out integer outStream.print(x); } // --------------------------------------------------- public int[] readN(BufferedReader inStream) throws IOException{ // Read in two integers from one line int I1, I2; int[] intArray = new int[2]; String inputLine= inStream.readLine(); StringTokenizer theTokenizer= new StringTokenizer(inputLine," "); I1= Integer.parseInt(theTokenizer.nextToken()); I2= Integer.parseInt(theTokenizer.nextToken()); intArray[0]=I1; intArray[1]=I2; return intArray; } // --------------------------------------------------- public String readName(BufferedReader inStream) throws IOException{ // Read in species names String inputLine= inStream.readLine(); return inputLine; } // --------------------------------------------------- public String readSeq(BufferedReader inStream) throws IOException{ // Read in DNA sequences String blank=" "; int n=0; int nStart= this.firstNuc; int nEnd= this.lastNuc; int i,L; String outLine=""; while(n=nStart && n<=nEnd){ outLine=outLine+inLine.substring(i,i+1); } } } } return outLine; } // ---------------------------------------------------- public void discardGaps(){ // Copies the DNA sequence alignment from dnaSeq to dnaSeq_unGapped // while discarding columns with gaps. int n=0; // Species int t=0; // Sites int t_continue=0; int t_ungapped=0; // Sites in ungapped alignment this.dnaSeq_unGapped= new String[this.nSpecies]; // Initialisation t=-1; do{ t++; } while (this.gapSites[t]==1); //This indicates a gap // This gives the first site t without gaps for (n=0; n set flag flag++; } } if (flag>0){ // Gap this.gapSites[t]=1; N_gaps++; } else{ // No gap this.gapSites[t]=0; } } System.out.println("Number of gaps: "+N_gaps); return N_gaps; } // ---------------------------------------------------- public int isNucleotide(String site){ // Check if a site has a nucleotide, return a 1 if it does, // and a 0 if it does not. int flag; if (site.equals("A")) flag=1; else if (site.equals("a")) flag=1; else if (site.equals("C")) flag=1; else if (site.equals("c")) flag=1; else if (site.equals("G")) flag=1; else if (site.equals("g")) flag=1; else if (site.equals("T")) flag=1; else if (site.equals("t")) flag=1; else if (site.equals("U")) flag=1; else if (site.equals("u")) flag=1; else flag=0; return flag; } // --------------------------------------------------- public void select(){ this.select(1); } // --------------------------------------------------- public void select(int NreplicateWin){ // NreplicateWin = number of times the same window is replicated. // NreplicateWin==1 --> correct results (T=1) // NreplicateWin>1 --> simulated annealing (T<1) int i,j; int[] intArray= new int[2]; if (NreplicateWin<1) { System.out.println("Error: NreplicateWin<1"); System.exit(0); } // Read data from file try{ // Open input file FileReader inFile = new FileReader("dna.dat"); BufferedReader inStream = new BufferedReader(inFile); // Read in number of species and length of alignment intArray=this.readN(inStream); this.nSpecies=intArray[0]; this.nDNA= intArray[1]; this.lastNuc= this.nDNA; this.dnaSeq= new String[this.nSpecies]; this.speciesName= new String[this.nSpecies]; for (i=0; i