Gene Sequencing
Brandon Blatter
CS 312
View this report in your browser
Let's be honest. PDFs were not meant for displaying code, and code was not meant to be displayed in PDFs. Please view this report in your browser so it's easy to read.
Complexity
Time Complexity
Unrestricted Algorithm
Big O for the unrestricted algorithm is O(nm) because every cell of an entire n * m array needs to be calculated. The operations for each calculation are constant time.
Banded Algorithm
Big O for the banded algorithm is O(n + m) because only the cells in the band need to be calculated. The size of the band is n or m (whichever is longer, hence the n + m in the Big O) multiplied by the band size, which is constant.
Space Complexity
Unrestricted Algorithm
Big O for the unrestricted algorithm is O(nm) because every cell of an entire n m array needs to be calculated. A n m array must be allocated to hold this data.
Banded Algorithm
Big O for the banded algorithm is O(n + m) because only the cells in the band need to be calculated. I used a bandwidth * n array for my banded algorithm.
Results
5000 chars, unrestricted
First 100 chars of row 3 col 10
ttgcgagcgatttgcgtgcgtgca-tcc--cgcttcac-tgatctcttgttagatcttttcataatctaaactttataaaaacatccactccctg-tagt
ataagagtgattggcgtccgtacgtaccctttctactctcaaactcttgttagtttaaat-ctaatctaaacttta--taaa-cggcacttcctgtgtgt
15000 chars, banded
First 100 chars of row 3 col 10
cttgtgatggacaaaagtttactgat-gag--tcctttta-caagaacatgtatttaagaagtgcagttatgcagagtgttggagcttgcgtggtct-gc
t-tattttaagtacttgtgatggtcaaaagtttactgatgagaccttttacaagaacatgta-tttaagaagtgcagt--gatg-caaagcgtcggtgcc
Screenshots
Unbanded algorithm running under 30 seconds
Unbanded algorithm displaying row 9 col 10
Banded algorithm with 15000 chars
Code
The main file I edited was PairWiseAlign.cs. I hardly touched the other files so I didn't include them here.
PairWiseAlign.cs
using System;
using System.Collections.Generic;
using System.Text;
namespace GeneticsLab {
class PairWiseAlign {
int MaxCharactersToAlign;
int score = 0;
string[] alignment = new string[2];
public PairWiseAlign() {
// Default is to align only 5000 characters in each sequence.
this.MaxCharactersToAlign = 5000;
}
public PairWiseAlign(int len) {
// Alternatively, we can use an different length; typically used with the banded option checked.
this.MaxCharactersToAlign = len;
}
private enum From { START, LEFT, TOP, DIAGONAL };
private struct Cell {
public int cost;
public From fromDir;
}
private void PrintCells(string A, string B, Cell[,] cells) {
// print stuff
string line = "\t-";
for(int x = 1; x < cells.GetLength(0); x++) {
line += "\t" + A[x - 1];
}
Console.WriteLine(line);
for(int y = 0; y < cells.GetLength(1); y++) {
line = y == 0 ? "-" : "" + B[y - 1];
for(int x = 0; x < cells.GetLength(0); x++) {
line += "\t" + cells[x, y].cost;
}
Console.WriteLine(line);
}
for(int y = 0; y < cells.GetLength(1); y++) {
line = y == 0 ? "-" : "" + B[y - 1];
for(int x = 0; x < cells.GetLength(0); x++) {
line += "\t" + cells[x, y].fromDir;
}
Console.WriteLine(line);
}
}
private string Reverse(string toReverse) {
char[] toReverseChars = toReverse.ToCharArray();
Array.Reverse(toReverseChars);
return new string(toReverseChars);
}
private Cell Min(char row, char col, int left, int top, int diagonal) {
// left and top have a cost of +5
int leftCost = left + 5;
int topCost = top + 5;
// diagonal has a cost of -3 if matching, 1 otherwise
int diagonalCost = diagonal + (row == col ? -3 : 1);
Cell cell = new Cell();
if(leftCost < topCost && leftCost < diagonalCost) {
cell.cost = leftCost;
cell.fromDir = From.LEFT;
} else if(topCost < leftCost && topCost < diagonalCost) {
cell.cost = topCost;
cell.fromDir = From.TOP;
} else {
cell.cost = diagonalCost;
cell.fromDir = From.DIAGONAL;
}
return cell;
}
// used for special cases with banded alignment
private Cell MinFromCol(char row, char col, int indel, int diagonal, From indelFromDir) {
// indel has a cost of +5
int indelCost = indel + 5;
// diagonal has a cost of -3 if matching, 1 otherwise
int diagonalCost = diagonal + (row == col ? -3 : 1);
Cell cell = new Cell();
if(indelCost < diagonalCost) {
cell.cost = indelCost;
cell.fromDir = indelFromDir;
} else {
cell.cost = diagonalCost;
cell.fromDir = From.DIAGONAL;
}
return cell;
}
private void SetAllAlignment(string A, string B, Cell[,] cells) {
// reset alignment strings
alignment[0] = "";
alignment[1] = "";
// start at last cell
int x = cells.GetLength(0) - 1;
int y = cells.GetLength(1) - 1;
// append last cell
alignment[0] += A[x - 1];
alignment[1] += B[y - 1];
// trace back through the cells
while(x > 1 && y > 1) {
// depending on the direction, append the right chars
switch(cells[x, y].fromDir) {
case From.LEFT:
x--;
alignment[0] += A[x - 1];
alignment[1] += "-";
break;
case From.TOP:
y--;
alignment[0] += "-";
alignment[1] += B[y - 1];
break;
case From.DIAGONAL:
x--;
y--;
alignment[0] += A[x - 1];
alignment[1] += B[y - 1];
break;
default:
break;
}
}
// reverse alignment strings
alignment[0] = Reverse(alignment[0]);
alignment[1] = Reverse(alignment[1]);
}
private void AllAlign(string A, string B) {
int xLength = 1 + (A.Length < MaxCharactersToAlign ? A.Length : MaxCharactersToAlign);
int yLength = 1 + (B.Length < MaxCharactersToAlign ? B.Length : MaxCharactersToAlign);
// make an array of cells
Cell[,] cells = new Cell[xLength, yLength];
Cell cell = new Cell();
// set the first cell to 0
cell.cost = 0;
cell.fromDir = From.START;
cells[0, 0] = cell;
// initialize first row
for(int x = 1; x < xLength; x++) {
cell.cost = cells[x - 1, 0].cost + 5;
cell.fromDir = From.LEFT;
cells[x, 0] = cell;
}
// initialize first column
for(int y = 1; y < yLength; y++) {
cell.cost = cells[0, y - 1].cost + 5;
cell.fromDir = From.TOP;
cells[0, y] = cell;
}
// iterate through every cell
for(int y = 1; y < yLength; y++) {
for(int x = 1; x < xLength; x++) {
cells[x, y] = Min( A[x - 1],
B[y - 1],
cells[x - 1, y].cost,
cells[x, y - 1].cost,
cells[x - 1, y - 1].cost);
}
}
// set results
score = cells[xLength - 1, yLength - 1].cost;
SetAllAlignment(A, B, cells);
}
private void SetBandAlignment(string A, string B, Cell[,] cells) {
// reset alignment strings
alignment[0] = "";
alignment[1] = "";
// start at last cell
// scan the bottom row for the corner value
int x = -1;
int y = cells.GetLength(1) - 1;
int xLength = cells.GetLength(0);
for(int i = 0; i < xLength; i++) {
// if end of array or default cell is reached
if(i + 1 == xLength || cells[i + 1, y].fromDir == From.START) {
x = i;
score = cells[i, y].cost;
break;
}
}
int aIndex = A.Length - 1;
int bIndex = B.Length - 1;
// trace back through the cells
while(y > 0) {
// depending on the direction, append the right chars
switch(cells[x, y].fromDir) {
case From.LEFT:
alignment[0] += A[aIndex];
alignment[1] += "-";
aIndex--;
x--;
break;
case From.TOP:
alignment[0] += "-";
alignment[1] += B[bIndex];
bIndex--;
y--;
x++;
break;
case From.DIAGONAL:
alignment[0] += A[aIndex];
alignment[1] += B[bIndex];
aIndex--;
bIndex--;
y--;
break;
case From.START:
alignment[0] += "-";
alignment[1] += B[bIndex];
bIndex--;
y--;
break;
default:
break;
}
}
// reverse alignment strings
alignment[0] = Reverse(alignment[0]);
alignment[1] = Reverse(alignment[1]);
}
private void BandAlign(string A, string B) {
int bandWidth = 7;
int xLength = 1 + (A.Length < MaxCharactersToAlign ? A.Length : MaxCharactersToAlign);
int yLength = 1 + (B.Length < MaxCharactersToAlign ? B.Length : MaxCharactersToAlign);
// make an array of cells
Cell[,] cells = new Cell[bandWidth, yLength];
// set first cell to 0
Cell cell = new Cell();
// set the first cell to 0
cell.cost = 0;
cell.fromDir = From.START;
cells[bandWidth / 2, 0] = cell;
// initialize first row
for(int i = bandWidth / 2 + 1; i < bandWidth; i++) {
cell.cost = cells[i - 1, 0].cost + 5;
cell.fromDir = From.LEFT;
cells[i, 0] = cell;
}
// initialize first diagonal
for(int i = bandWidth / 2 - 1; i >= 0; i--) {
int x = i;
int y = bandWidth / 2 - i;
cell.cost = cells[x + 1, y - 1].cost + 5;
cell.fromDir = From.TOP;
cells[x, y] = cell;
}
int xLetterIndex;
// iterate through every cell
for(int y = 1; y < yLength; y++) {
for(int x = 0; x < bandWidth; x++) {
// skip corner cases
if((x + y) <= bandWidth / 2) continue;
if((bandWidth - x) + (yLength - y - 1) <= bandWidth / 2) continue;
// skip if one word is longer than the other
xLetterIndex = y + (x - bandWidth / 2) - 1;
if(xLetterIndex >= A.Length) continue;
// leftmost column of the array, only diagonal and top viable
if(x == 0) {
cells[x, y] = MinFromCol( A[xLetterIndex],
B[y - 1],
cells[x + 1, y - 1].cost, // indel
cells[x, y - 1].cost, // diagonal
From.TOP);
} else if(x == bandWidth - 1) {
// rightmost column of the array, only diagonal and left viable
cells[x, y] = MinFromCol( A[xLetterIndex],
B[y - 1],
cells[x - 1, y].cost, // indel
cells[x, y - 1].cost, // diagonal
From.LEFT);
} else {
// if all three cells are viable
cells[x, y] = Min( A[xLetterIndex],
B[y - 1],
cells[x - 1, y].cost, // left
cells[x + 1, y - 1].cost, // top
cells[x, y - 1].cost); // diagonal
}
}
}
// set results
SetBandAlignment(A, B, cells);
}
/// <summary>
/// this is the function you implement.
/// </summary>
/// <param name="sequenceA">the first sequence</param>
/// <param name="sequenceB">the second sequence, may have length not equal to the length of the first seq.</param>
/// <param name="banded">true if alignment should be band limited.</param>
/// <returns>the alignment score and the alignment (in a Result object) for sequenceA and sequenceB. The calling function places the result in the dispay appropriately.
///
public ResultTable.Result Align_And_Extract(GeneSequence sequenceA, GeneSequence sequenceB, bool banded) {
string seqA = sequenceA.Sequence;
string seqB = sequenceB.Sequence;
ResultTable.Result result = new ResultTable.Result();
if(banded && Math.Abs(seqA.Length - seqB.Length) > 1000) {
result.Update(int.MaxValue, "No Alignment Possible", "No Alignment Possible");
return result;
}
// if(seqA.Length > 60 || seqB.Length > 60) return result;
if(banded) BandAlign(seqA, seqB);
else AllAlign(seqA, seqB);
// bundling your results into the right object type
result.Update(score, alignment[0], alignment[1]);
return(result);
}
}
}