Gene Sequencing

Brandon Blatter

CS 312

View this report in your browser

Let's be honest. PDFs were not meant for displaying code, and code was not meant to be displayed in PDFs. Please view this report in your browser so it's easy to read.

Complexity

Time Complexity

Unrestricted Algorithm

Big O for the unrestricted algorithm is O(nm) because every cell of an entire n * m array needs to be calculated. The operations for each calculation are constant time.

Banded Algorithm

Big O for the banded algorithm is O(n + m) because only the cells in the band need to be calculated. The size of the band is n or m (whichever is longer, hence the n + m in the Big O) multiplied by the band size, which is constant.

Space Complexity

Unrestricted Algorithm

Big O for the unrestricted algorithm is O(nm) because every cell of an entire n m array needs to be calculated. A n m array must be allocated to hold this data.

Banded Algorithm

Big O for the banded algorithm is O(n + m) because only the cells in the band need to be calculated. I used a bandwidth * n array for my banded algorithm.

Results

5000 chars, unrestricted

First 100 chars of row 3 col 10

ttgcgagcgatttgcgtgcgtgca-tcc--cgcttcac-tgatctcttgttagatcttttcataatctaaactttataaaaacatccactccctg-tagt
ataagagtgattggcgtccgtacgtaccctttctactctcaaactcttgttagtttaaat-ctaatctaaacttta--taaa-cggcacttcctgtgtgt

15000 chars, banded

First 100 chars of row 3 col 10

cttgtgatggacaaaagtttactgat-gag--tcctttta-caagaacatgtatttaagaagtgcagttatgcagagtgttggagcttgcgtggtct-gc
t-tattttaagtacttgtgatggtcaaaagtttactgatgagaccttttacaagaacatgta-tttaagaagtgcagt--gatg-caaagcgtcggtgcc

Screenshots

Unbanded algorithm running under 30 seconds

Unbanded algorithm displaying row 9 col 10

Banded algorithm with 15000 chars

Code

The main file I edited was PairWiseAlign.cs. I hardly touched the other files so I didn't include them here.

PairWiseAlign.cs

using System;
using System.Collections.Generic;
using System.Text;

namespace GeneticsLab {
  class PairWiseAlign {
    int MaxCharactersToAlign;
    int score = 0;
    string[] alignment = new string[2];

    public PairWiseAlign() {
      // Default is to align only 5000 characters in each sequence.
      this.MaxCharactersToAlign = 5000;
    }

    public PairWiseAlign(int len) {
      // Alternatively, we can use an different length; typically used with the banded option checked.
      this.MaxCharactersToAlign = len;
    }

    private enum From { START, LEFT, TOP, DIAGONAL };

    private struct Cell {
      public int cost;
      public From fromDir;
    }

    private void PrintCells(string A, string B, Cell[,] cells) {
      // print stuff
      string line = "\t-";

      for(int x = 1; x < cells.GetLength(0); x++) {
        line += "\t" + A[x - 1];
      }
      Console.WriteLine(line);

      for(int y = 0; y < cells.GetLength(1); y++) {
        line = y == 0 ? "-" : "" + B[y - 1];
        for(int x = 0; x < cells.GetLength(0); x++) {
          line += "\t" + cells[x, y].cost;
        }
        Console.WriteLine(line);
      }
      for(int y = 0; y < cells.GetLength(1); y++) {
        line = y == 0 ? "-" : "" + B[y - 1];
        for(int x = 0; x < cells.GetLength(0); x++) {
          line += "\t" + cells[x, y].fromDir;
        }
        Console.WriteLine(line);
      }
    }

    private string Reverse(string toReverse) {
        char[] toReverseChars = toReverse.ToCharArray();
        Array.Reverse(toReverseChars);
        return new string(toReverseChars);
    }

    private Cell Min(char row, char col, int left, int top, int diagonal) {
      // left and top have a cost of +5
      int leftCost = left + 5;
      int topCost = top + 5;
      // diagonal has a cost of -3 if matching, 1 otherwise
      int diagonalCost = diagonal + (row == col ? -3 : 1);
      Cell cell = new Cell();
      if(leftCost < topCost && leftCost < diagonalCost) {
        cell.cost = leftCost;
        cell.fromDir = From.LEFT;
      } else if(topCost < leftCost && topCost < diagonalCost) {
        cell.cost = topCost;
        cell.fromDir = From.TOP;
      } else {
        cell.cost = diagonalCost;
        cell.fromDir = From.DIAGONAL;
      }
      return cell;
    }

    // used for special cases with banded alignment
    private Cell MinFromCol(char row, char col, int indel, int diagonal, From indelFromDir) {
      // indel has a cost of +5
      int indelCost = indel + 5;
      // diagonal has a cost of -3 if matching, 1 otherwise
      int diagonalCost = diagonal + (row == col ? -3 : 1);
      Cell cell = new Cell();
      if(indelCost < diagonalCost) {
        cell.cost = indelCost;
        cell.fromDir = indelFromDir;
      } else {
        cell.cost = diagonalCost;
        cell.fromDir = From.DIAGONAL;
      }
      return cell;
    }

    private void SetAllAlignment(string A, string B, Cell[,] cells) {
      // reset alignment strings
      alignment[0] = "";
      alignment[1] = "";
      // start at last cell
      int x = cells.GetLength(0) - 1;
      int y = cells.GetLength(1) - 1;
      // append last cell
      alignment[0] += A[x - 1];
      alignment[1] += B[y - 1];
      // trace back through the cells
      while(x > 1 && y > 1) {
        // depending on the direction, append the right chars
        switch(cells[x, y].fromDir) {
          case From.LEFT:
            x--;
            alignment[0] += A[x - 1];
            alignment[1] += "-";
            break;
          case From.TOP:
            y--;
            alignment[0] += "-";
            alignment[1] += B[y - 1];
            break;
          case From.DIAGONAL:
            x--;
            y--;
            alignment[0] += A[x - 1];
            alignment[1] += B[y - 1];
            break;
          default:
            break;
        }
      }
      // reverse alignment strings
      alignment[0] = Reverse(alignment[0]);
      alignment[1] = Reverse(alignment[1]);
    }

    private void AllAlign(string A, string B) {
      int xLength = 1 + (A.Length < MaxCharactersToAlign ? A.Length : MaxCharactersToAlign);
      int yLength = 1 + (B.Length < MaxCharactersToAlign ? B.Length : MaxCharactersToAlign);
      // make an array of cells
      Cell[,] cells = new Cell[xLength, yLength];
      Cell cell = new Cell();
      // set the first cell to 0
      cell.cost = 0;
      cell.fromDir = From.START;
      cells[0, 0] = cell;

      // initialize first row
      for(int x = 1; x < xLength; x++) {
        cell.cost = cells[x - 1, 0].cost + 5;
        cell.fromDir = From.LEFT;
        cells[x, 0] = cell;
      }
      // initialize first column
      for(int y = 1; y < yLength; y++) {
        cell.cost = cells[0, y - 1].cost + 5;
        cell.fromDir = From.TOP;
        cells[0, y] = cell;
      }
      // iterate through every cell
      for(int y = 1; y < yLength; y++) {
        for(int x = 1; x < xLength; x++) {
          cells[x, y] = Min(  A[x - 1],
                              B[y - 1],
                              cells[x - 1, y].cost,
                              cells[x, y - 1].cost,
                              cells[x - 1, y - 1].cost);
        }
      }

      // set results
      score = cells[xLength - 1, yLength - 1].cost;
      SetAllAlignment(A, B, cells);
    }

    private void SetBandAlignment(string A, string B, Cell[,] cells) {
      // reset alignment strings
      alignment[0] = "";
      alignment[1] = "";
      // start at last cell
      // scan the bottom row for the corner value
      int x = -1;
      int y = cells.GetLength(1) - 1;
      int xLength = cells.GetLength(0);
      for(int i = 0; i < xLength; i++) {
        // if end of array or default cell is reached
        if(i + 1 == xLength || cells[i + 1, y].fromDir == From.START) {
          x = i;
          score = cells[i, y].cost;
          break;
        }
      }
      int aIndex = A.Length - 1;
      int bIndex = B.Length - 1;
      // trace back through the cells
      while(y > 0) {
        // depending on the direction, append the right chars
        switch(cells[x, y].fromDir) {
          case From.LEFT:
            alignment[0] += A[aIndex];
            alignment[1] += "-";
            aIndex--;
            x--;
            break;
          case From.TOP:
            alignment[0] += "-";
            alignment[1] += B[bIndex];
            bIndex--;
            y--;
            x++;
            break;
          case From.DIAGONAL:
            alignment[0] += A[aIndex];
            alignment[1] += B[bIndex];
            aIndex--;
            bIndex--;
            y--;
            break;
          case From.START:
            alignment[0] += "-";
            alignment[1] += B[bIndex];
            bIndex--;
            y--;
            break;
          default:
            break;
        }
      }
      // reverse alignment strings
      alignment[0] = Reverse(alignment[0]);
      alignment[1] = Reverse(alignment[1]);
    }

    private void BandAlign(string A, string B) {
      int bandWidth = 7;
      int xLength = 1 + (A.Length < MaxCharactersToAlign ? A.Length : MaxCharactersToAlign);
      int yLength = 1 + (B.Length < MaxCharactersToAlign ? B.Length : MaxCharactersToAlign);
      // make an array of cells
      Cell[,] cells = new Cell[bandWidth, yLength];
      // set first cell to 0
      Cell cell = new Cell();
      // set the first cell to 0
      cell.cost = 0;
      cell.fromDir = From.START;
      cells[bandWidth / 2, 0] = cell;
      // initialize first row
      for(int i = bandWidth / 2 + 1; i < bandWidth; i++) {
         cell.cost = cells[i - 1, 0].cost + 5;
         cell.fromDir = From.LEFT;
         cells[i, 0] = cell;
      }
      // initialize first diagonal
      for(int i = bandWidth / 2 - 1; i >= 0; i--) {
        int x = i;
        int y = bandWidth / 2 - i;
        cell.cost = cells[x + 1, y - 1].cost + 5;
        cell.fromDir = From.TOP;
        cells[x, y] = cell;
      }
      int xLetterIndex;
      // iterate through every cell
      for(int y = 1; y < yLength; y++) {
        for(int x = 0; x < bandWidth; x++) {
          // skip corner cases
          if((x + y) <= bandWidth / 2) continue;
          if((bandWidth - x) + (yLength - y - 1) <= bandWidth / 2) continue;
          // skip if one word is longer than the other
          xLetterIndex = y + (x - bandWidth / 2) - 1;
          if(xLetterIndex >= A.Length) continue;

          // leftmost column of the array, only diagonal and top viable
          if(x == 0) {
            cells[x, y] = MinFromCol( A[xLetterIndex],
                                      B[y - 1],
                                      cells[x + 1, y - 1].cost, // indel
                                      cells[x, y - 1].cost, // diagonal
                                      From.TOP);
          } else if(x == bandWidth - 1) {
            // rightmost column of the array, only diagonal and left viable
            cells[x, y] = MinFromCol( A[xLetterIndex],
                                      B[y - 1],
                                      cells[x - 1, y].cost, // indel
                                      cells[x, y - 1].cost, // diagonal
                                      From.LEFT);
          } else {
            // if all three cells are viable
            cells[x, y] = Min(  A[xLetterIndex],
                                B[y - 1],
                                cells[x - 1, y].cost, // left
                                cells[x + 1, y - 1].cost, // top
                                cells[x, y - 1].cost); // diagonal
          }
        }
      }
      // set results
      SetBandAlignment(A, B, cells);
    }

    /// <summary>
    /// this is the function you implement.
    /// </summary>
    /// <param name="sequenceA">the first sequence</param>
    /// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
    /// <param name="banded">true if alignment should be band limited.</param>
    /// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB.  The calling function places the result in the dispay appropriately.
    ///
    public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) {
      string seqA = sequenceA.Sequence;
      string seqB = sequenceB.Sequence;
      ResultTable.Result result = new ResultTable.Result();
      if(banded && Math.Abs(seqA.Length - seqB.Length) > 1000) {
        result.Update(int.MaxValue, "No Alignment Possible", "No Alignment Possible");
        return result;
      }
      // if(seqA.Length > 60 || seqB.Length > 60) return result;
      if(banded) BandAlign(seqA, seqB);
      else AllAlign(seqA, seqB);

      // bundling your results into the right object type
      result.Update(score, alignment[0], alignment[1]);
      return(result);
    }
  }
}

results matching ""

    No results matching ""