Archive for March, 2011

Yet Another Python Coroutine Fun Stuff

It might be a totally useless python hack. Yes, it is possible to implement dynamic programming using message passing style python co-routine with the enhanced python generator. Here is the code. I will write some details about how this piece code works. However, the main idea is simple (although you might need some background knowledge about sequence alignment algorithm.) We create a co-routine for each alignment cell. The alignment score is generated by passing the best score around the neighboring cells. The backtracking is also implemented as message passing backward.

matchScore, mismatchScore, gapScore = 4, -5, -3
seq1 = "AGTGTAGTTGTGTGAATGTATTTTTAT"
seq2 = "AGGTAGTTGTGGTGATTTAGTTGATAT"

cellMap = {}
globalBestCellScore = [None, -100]

def getAnAlignCell(x, y, p):
    def f():
        global globalBestCellScore
        global cellMap
        b1, b2 = p
        cell1Id, s1 = yield 
        cell2Id, s2 = yield 
        cell3Id, s3 = yield 
        cellData = [ (cell1Id, s1), (cell2Id, s2), (cell3Id, s3) ]
        cellData.sort( key=lambda x: -x[1] )
        bestCell, bestScore = cellData[0]
        if bestScore > globalBestCellScore[1]:
            globalBestCellScore = [ (x,y), bestScore ]
        if x+1 < len(seq1) and y+1 < len(seq2):
            if b1 == b2:
                cellMap[ (x+1, y+1) ].send( ((x,y), bestScore + matchScore) )
            else:
                cellMap[ (x+1, y+1) ].send( ((x,y), bestScore + mismatchScore) )
        if x+1 < len(seq1):
            cellMap[ (x+1, y) ].send( ((x,y), bestScore + gapScore) )
        if y+1 < len(seq2):
            cellMap[ (x, y+1) ].send( ((x,y), bestScore + gapScore) )
            
        path = yield
        if bestCell[0] >= 0 and bestCell[1] >=0 :
            if path == None:
                path = []
            path.extend( [ (x,y) ] )

            cellMap[ bestCell ].send(  path   )
        yield path
    return f

for x in range(len(seq1)):
    for y in range(len(seq2)):
        cellMap[ (x,y) ] = getAnAlignCell( x, y, (seq1[x], seq2[y]) )()
        cellMap[ (x,y) ].next()

for x in range(len(seq1)):
    cellMap[ (x,0) ].send( ( (x, -1), 0 ) )
    cellMap[ (x,0) ].send( ( (x-1, -1), 0 ) )

for y in range(len(seq2)):
    if y != 0:
        cellMap[ (0,y) ].send( ( (-1, y), 0 ) )
        cellMap[ (0,y) ].send( ( (-1, y-1), 0 ) )

cellMap[(0,0)].send( ( (-1, -1), 0 ) )

bestCell = globalBestCellScore[0]
bestPath = cellMap[bestCell].next()
bestPath.reverse()

s1 = []
s2 = []
px, py = bestPath[0]
for x,y in bestPath[1:]:
    if x - px != 0:
        s1.append(seq1[px])
    else:
        s1.append("-")
    if y - py != 0:
        s2.append(seq21)
    else:
        s2.append("-")
    px, py = x, y
print "".join(s1)
print "".join(s2)

The result seems to be correct

$ python coAlign.py   
GTGTAGTTGTGTGAATGTATTT--TT-A
G-GTAGTTGTG-G--TG-ATTTAGTTGA

Python Generator Fun

The following python code generates 100 by 100 = 10,000 generators and use them to simulate 100 step random walk 500 times. Not particular useful thing but it was fun to find out you can simulate random walk differently. I will probably try to write some dynamical programming code using the extensive generator in python (co-routine like construct) if I find some time to work on it.


import random

maxStep = 100
fmap = {}
def getFun(i,j):
    def f():
        path = [(i,j)]
        while 1:
            if i < maxStep - 1:
                path.extend( fmap[ (i+1, j+1) ].next() if random.uniform(0,1) > 0.5 else fmap[ (i+1, j) ].next() )
            yield path
            path = [(i,j)]
    return f

for i in range(maxStep):
    for j in range(maxStep):
        f = getFun(i,j)()
        fmap[ (i,j) ] = f

for i in range(500):
    print i, [ x[1] for x in fmap[ (0,0) ].next() ]