|
| 1 | +import numpy as np |
| 2 | + |
| 3 | + |
| 4 | +def global_alignment(seq0, seq1): |
| 5 | + def get_dp_table(): |
| 6 | + dp_score_table = np.ndarray(shape=(len(seq0) + 1, len(seq1) + 1), dtype=int) |
| 7 | + dp_score_table.fill(0) |
| 8 | + for col_idx in range(dp_score_table.shape[1]): |
| 9 | + dp_score_table[0][col_idx] = (-1) * col_idx |
| 10 | + for row_idx in range(dp_score_table.shape[0]): |
| 11 | + dp_score_table[row_idx][0] = (-1) * row_idx |
| 12 | + |
| 13 | + min_size = min(len(seq0), len(seq1)) |
| 14 | + |
| 15 | + def match_score(i, j): |
| 16 | + return 1 if seq0[i - 1] == seq1[j - 1] else -1 |
| 17 | + |
| 18 | + def transition_computation(i, j): |
| 19 | + gap_penalty = -1 |
| 20 | + diagonal_val = dp_score_table[i - 1][j - 1] + match_score(i, j) |
| 21 | + right_val = dp_score_table[i][j - 1] + gap_penalty |
| 22 | + down_val = dp_score_table[i - 1][j] + gap_penalty |
| 23 | + dp_score_table[i][j] = max(diagonal_val, right_val, down_val) |
| 24 | + |
| 25 | + for iter_num in xrange(1, min_size + 1): |
| 26 | + transition_computation(iter_num, iter_num) |
| 27 | + # move right |
| 28 | + for col_idx in xrange(iter_num + 1, dp_score_table.shape[1]): |
| 29 | + transition_computation(iter_num, col_idx) |
| 30 | + |
| 31 | + # move down |
| 32 | + for row_idx in xrange(iter_num + 1, dp_score_table.shape[0]): |
| 33 | + transition_computation(row_idx, iter_num) |
| 34 | + |
| 35 | + return dp_score_table |
| 36 | + |
| 37 | + def traceback(table): |
| 38 | + """ |
| 39 | + :type table: np.ndarray |
| 40 | + """ |
| 41 | + gap_penalty = -1 |
| 42 | + def match_score(i, j): |
| 43 | + return 1 if seq0[i - 1] == seq1[j - 1] else -1 |
| 44 | + |
| 45 | + def dfs_detail(row_idx, col_idx, level): |
| 46 | + blank_str = ''.join([" "] * level)[:-1] + '|_' if level > 0 else 'root' |
| 47 | + print '%-50s' % (blank_str + str((row_idx, col_idx))), 'score at', str((row_idx, col_idx)), ':', \ |
| 48 | + table[row_idx][col_idx] |
| 49 | + |
| 50 | + if row_idx != 0 and col_idx != 0: |
| 51 | + # diagonal |
| 52 | + if table[row_idx - 1][col_idx - 1] + match_score(row_idx, col_idx) == table[row_idx][col_idx]: |
| 53 | + dfs_detail(row_idx - 1, col_idx - 1, level + 1) |
| 54 | + # down |
| 55 | + if table[row_idx - 1][col_idx] + gap_penalty == table[row_idx][col_idx]: |
| 56 | + dfs_detail(row_idx - 1, col_idx, level + 1) |
| 57 | + # right |
| 58 | + if table[row_idx][col_idx - 1] + gap_penalty == table[row_idx][col_idx]: |
| 59 | + dfs_detail(row_idx, col_idx - 1, level + 1) |
| 60 | + |
| 61 | + dfs_detail(table.shape[0] - 1, table.shape[1] - 1, 0) |
| 62 | + |
| 63 | + dp_score_table = get_dp_table() |
| 64 | + print 'global alignment table:\n', dp_score_table, '\n' |
| 65 | + traceback(dp_score_table) |
| 66 | + |
| 67 | + |
| 68 | +if __name__ == '__main__': |
| 69 | + seq_str0 = 'GGTTGACTA' |
| 70 | + seq_str1 = 'TGTTACGG' |
| 71 | + global_alignment(seq0=seq_str1, seq1=seq_str0) |
0 commit comments