Archive for April, 2011

DiggsInABox…

William McVey left a message about my code for the “DiggsInAbox” and this. I never really intended to release the code. I don’t mind to share but it is just I have never really motivated to polish the code so it can be released as a professional written code. Anyway, I am pasting my code here so I hope it can be useful for some one. I wrote this code for fun and to learn the algorithm to generate the Treemap. There are a lot of Treemap implementation these days. You might be able to find better code.

DiggsInABox.py

#!/usr/bin/env python

"""
Copyright 2007-2011 Jason Chin, All right reserved
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
  this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE."""

from Treemap import Treemap, Node

class diggTreemap(Treemap):
    
    def __init__(self, rootNode):
        self.rootNode = rootNode;
        self.setWidthHeight(300,200)
        self.setPosition(0,0)
        self._cMap = self._colorMap()
    
    def _colorMap(self):
        cMap = ["#fc0","#cc0","#f63","#39c","#696","#f93",
                "#6f0","#c69","#cf9","#36c","#393","#c0f"]
        i = 0
        while True:
            yield cMap[i]
            i = i+1
            if i > len(cMap) - 1: i = i % len(cMap)
    
    def writeCSS(self):
        return u"""\
<style type="text/css" media="screen">
body {
background: black;
}

div.group2 {
position:absolute; 
overflow:hidden;
margin: 3px;
z-index: 1;
}


div.leave {
position:absolute; 
text-align:center;
overflow:hidden;
vertical-align:middle;
opacity: 1;
border: 1px outset #000;
margin: 3px;
z-index:2;
}

div.leave > a > div {
    display: table-cell;
    position: static;
    vertical-align: middle;
    padding:3px;
    color:#000000;
}

div.leave > a {
text-decoration:none;
}
div.leave > a:link > div  {
color: black;
}
div.leave > a:visited > div {
color: #bcc;
}

div.index {
position:absolute; 
left: 810px;
width: 90px;
text-align:center;
overflow:hidden;
vertical-align:middle;
opacity: 1;
border: 2px #fff outset;

}

div.index > a {
text-decoration:none;
}
div.index > a:link > div  {
color: black;
}
div.index > a:visited > div {
color: black;
}

div.index > a > div {
    display: table-cell;
    position: static;
    vertical-align: middle;
    padding:3px;
    color:#000000;
    width: 90px;
    -moz-user-select: none;
}

</style>\n"""
               
    def writeJS(self):
        return """\
<script language='javascript'>



function hiliteBlock(blockId) {
    elm = document.getElementById(blockId);
    elm.style.width = parseInt(elm.style.width) - 6 + 'px';
    elm.style.height = parseInt(elm.style.height) - 6 + 'px';
    elm.style.borderWidth = '3px';
    elm.style.borderStyle = 'dashed';
    elm.style.borderColor = '#fff';
    
    return true;
}

function removeHilite(blockId) {
    elm = document.getElementById(blockId);
    elm.style.width = parseInt(elm.style.width) + 6 + 'px';
    elm.style.height = parseInt(elm.style.height) + 6 + 'px';
    elm.style.borderWidth = '0px';
    elm.style.borderStyle = '';
    elm.style.borderColor = '';
    return true;
}


function showSummary(event,leaveId) {
    if (document.getElementById('summaryPopup')) {
        popup = document.getElementById('summaryPopup');
        document.body.removeChild(popup);
    } 
    
    //elm = document.getElementById(leaveId);
    popup = document.createElement('div');
    popup.id = 'summaryPopup';
    x = event.pageX;
    y = event.pageY;
    document.body.appendChild(popup);
    
    popup.style.position = 'absolute';
    popup.style.left = x + 'px';
    popup.style.top = y + 'px';
    popup.style.width = '250px';
    popup.style.height = '150px';
    popup.style.zIndex = '100';
    popup.style.background = '#cec';
    
    //still working on this function, ajax maybe needed
    
    return true;
}
    
function adjustFont() {
    tmElm = document.getElementById('treemap');
    
    for (idx=0; idx < tmElm.childNodes.length; idx++) {
        elm = tmElm.childNodes[idx];
        if (elm.className != 'leave' && elm.className != 'index') {
            continue;
        }
        while ( elm.scrollWidth > elm.clientWidth || 
            elm.scrollHeight > elm.clientHeight) {
            curFontsize =  parseInt(elm.style.fontSize);
            newFontSizeInPx = parseInt(elm.style.fontSize) - 1;
            if (newFontSizeInPx <= 2) { 
                newFontSizeInPx = 2;
                elm.style.fontSize = newFontSizeInPx + 'px';
               
                break;
             }
            elm.style.fontSize = newFontSizeInPx + 'px';
            
        }
    }
}

</script>
""" 
                
    def writeAll(self):
        outStr = u""
        outStr += u'<html><head>'
        outStr += self.writeCSS()
        outStr += self.writeJS()
        outStr += u"</head><body onload='adjustFont();'><div style='font-size:30px;color:white;margin-left:40px'>Diggs in a Box<br> <span style='font-size:0.3em;'>(v. 0.02 by Chen-Shan Chin)</span></div>"


        outStr += u"<div id='treemap' style='position:relative;left:40px; top:5px;width:%dpx;height:%dpx;'>" % (self.width+5, self.height+5)
        
        #write feed navigator
        rssMapLabel = ['All','Technology','Science','Business','Sports',
                       'Entertainment','Gamming']              
        outStr += "<div id='nav1' style='position:absolute;right:0px;top:-30px;height:30px;width:600px'>"
        outStr += "<table align=right><tr>"
        for label in rssMapLabel:
            col = '#fff'
            if self.rootNode.name == label:
                col = '#fff'
            else:
                col = '#888'
            outStr += "<td><a href='http://infoecho.net/Sandbox/DiggsInABox.py?feed=%s' style='text-decoration:none;'><div style='color:%s;border:1px #888 solid;align:right;padding:3px;text-decoration:none;'>%s</div></a></td>" % (label,col,label)
        
        outStr += "</table></div>"


        #write index navigator
        y = 0;
        cMap = self._colorMap()
        totalWeight = 0
        
        for node in self.rootNode.children:
            if node.weight > 10:
                totalWeight += node.weight
            else:
                totalWeight += 10
        dhMap = {}
        dhSum = 0

        for node in self.rootNode.children:
            dh =  1.0 * node.weight / totalWeight * (self.height+3) 
            if dh > 20:
                dhMap[node] = dh   
            else:
                dhMap[node] = 20
            dhSum += dhMap[node]

                
        for node in self.rootNode.children:
            dh = 1.0 * dhMap[node] / dhSum * (self.height+3)
            fs = 12
            if dh > 24:
                fs *= 1.8
            if max([len(w) for w in node.name.split(' ')]) > 10: 
                fs = (1.0*max([len(w) for w in node.name.split(' ')]) / 10)
            if len(node.name.split(' ')) > 1 and dh < 48:
                fs *= 0.7
            if fs < 12: fs = 12
            if fs > 24: fs = 24
            outStr += u"<div id='%s_index' class='index' \
style='top:%dpx; height:%dpx; background:%s;font-size:%dpx' onmouseover='hiliteBlock(\"%s\");'  onmouseout='removeHilite(\"%s\");'><a target='_blank' href='%s'><div style='height:%dpx;'>%s</div></a></div>"\
% (node.name, y, dh-2, cMap.next(), fs,  node.name, node.name, node.properties['link'], dh-2,  node.name)
            y += dh
                
        #write all nodes
        outStr += self.writeNodes(self.rootNode)
        outStr += u"</div></body></html>"
        return outStr
        
    def writeNodes(self, node): 
        outStr = self.writeNode(node)
        for n in node.children:
            outStr += self.writeNodes(n)    
        return outStr
    
    def writeNode(self, node):
        
        outStr = u""
        
        if "group2" in node.properties:
            x,y,dw,dh = node.rect
            x = int(round(x))
            y = int(round(y))
            dw = int(round(dw))
            dh = int(round(dh))
            color = self._cMap.next()
            outStr += u"<div id='%s' class='group2' \
style='left:%dpx; top:%dpx; width:%dpx; height:%dpx; background:%s;'></div>"\
% (node.name, x, y, dw, dh, color)
            
            
            return outStr
        
        elif node.properties['is_leave'] == False: 
            return outStr
        
        
        x,y,dw,dh = node.rect
        x = int(round(x+3))
        y = int(round(y+3))
        dw = int(round(dw-9))
        dh = int(round(dh-9))  
        label = node.properties['data']['title'].strip()
        if len(label) > 60:
            label = label[:60]+" ..."
        
        if dw > 20 and dh >20:
            fs = node.area**0.5 / 7;
            if len(label) > 40:
                fs *= 0.75
            if len(label) < 20:
                fs *= 1.25 
            if max([len(w) for w in label.split(' ')]) * fs * 0.7 > dw:          
                fs = 2 * dw / (max([len(w) for w in label.split(' ')]))
            fs = int(fs)
            outStr += u"<div id='%s' class='leave' \
style='left:%dpx; top:%dpx; width:%dpx; height:%dpx;\
 font-size:%fpx;'>" % (node.name, x, y, dw, dh, fs)
            outStr += u"<a target='_blank' href='%s'><div style='width:%d;height:%d;'>%s</div></a></div>" % (node.properties['data']['link'], dw, dh, label)

        else:

            outStr += u"<a target='_blank' href='%s'><div id='%s' class='leave' \
style='left:%dpx; top:%dpx; width:%dpx; height:%dpx;'></div></a>" % (node.properties['data']['link'], label, x, y, dw, dh)

        return outStr
        
        


###########################################################
import cgitb; cgitb.enable()
import cgi

import feedparser

form = cgi.FieldStorage() 
print "Content-Type: text/html"
print

feed = ""
if form.has_key('feed'):
    feed = form['feed'].value.strip()


rssMap = {'All':'http://digg.com/rss/index.xml',
          'Technology':'http://digg.com/rss/containertechnology.xml',
          'Science':'http://digg.com/rss/containerscience.xml',
          'Business':'http://digg.com/rss/containerworld_business.xml',
          'Sports':'http://digg.com/rss/containersports.xml',
          'Entertainment':'http://digg.com/rss/containerentertainment.xml',
          'Gamming':'http://digg.com/rss/containergaming.xml'}

if feed not in rssMap:
    feed = 'All'

data = feedparser.parse(rssMap[feed])
entries = data['entries']
term2entries = {}
term2link = {}
for e in entries:
    term = e['digg_category'] 
    if term not in term2entries:
        term2entries[term] = []
    term2entries[term].append( {'id':e['id'],
                                'diggcount':int(e['digg_diggcount']),
                                'link':e['link'],'title':e['title'],
                                'commentcount':int(e['digg_commentcount']), 
                                'summary':e['summary']} )
    if term not in term2link:
        term2link[term] = "/".join(e['link'].split('/')[:-1])

rootNode = Node(feed)

for term in term2entries:
    
    n2 = Node(term)
    n2.properties['is_leave'] = False
    n2.properties['group2'] = True
    n2.properties['link'] = term2link[term]
    n2.weight = 0
    for entry in term2entries[term]:
        n3 = Node(entry['id'].split("/")[-1])
        n3.properties['is_leave'] = True
        n3.properties['data'] = entry
        n3.weight = entry['diggcount']
        n2.addAChild(n3)
        n2.weight = n2.weight + n3.weight
    rootNode.addAChild(n2)
    rootNode.weight = rootNode.weight + n2.weight
    rootNode.properties['is_leave'] = False


#for n in rootNode.children:
#    print n.name, [n2.name for n2 in n.children]

rootNode.sortChildrenByWeight()
TM = diggTreemap(rootNode)
TM.setWidthHeight(800,540)    
TM.layout()        
outStr = TM.writeAll() 
print outStr.encode('utf-8')

Treemap.py

#!/usr/bin/env python


"""
Copyright 2007-2011 Jason Chin, All right reserved
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
  this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE."""

import sys

class Node:
    
    def __init__(self, name, weight=1):
        self.name = name
        self.properties = {}
        self.children = []
        self.parentNode = []
        self.area = 1.0 #need to normalized such that sum(children.area) = the area assigned
        self.rect = []
        self.weight = weight
    
    def addAChild(self, aNode):
        self.children.append(aNode)
        aNode.parentNode.append(self)    
    
    def addChildren(self, Nodes):
        for n in Nodes:
            self.children.append(n)
            n.parentNode.append(self) 
    
    def numOfChildren(self):
        return len(children)

    def addAProperty(self, pk, pv):
        self.properties[pk] = pv

    def sortChildrenByWeight(self):
        if len(self.children) == 0:
            return 
        tmpNodes = [ [-c.weight, c] for c in self.children] 
        tmpNodes.sort()
        self.children = 1 for c in tmpNodes]
        for c in self.children:
            c.sortChildrenByWeight()
        
    def normalizeChildrenArea(self, totalArea):
        sw = 1.0 * sum([n.weight for n in self.children])
        for n in self.children:
            n.area = n.weight / sw * totalArea;
    

class Treemap:
    
    def __init__(self, rootNode):
        self.rootNode = rootNode
        self.setHeightWidth(300,200)
        self.setPosition(0,0)
        pass
    
    def setWidthHeight(self, width,height):
        self.height = height
        self.width = width
    
    def setPosition(self,left,top):
        self.left = left
        self.top = top
    
    def _worst(self, rowOfNodes, mw):
        rowBlockAreas = [n.area for n in rowOfNodes];
        s = sum(rowBlockAreas)
        rmin = min(rowBlockAreas)
        rmax = max(rowBlockAreas)
        return max([ (1.0*mw*mw * rmax)/(s*s), (1.0*s*s)/(mw*mw * rmin)] )    
            
    def _squarified(self, nodes, rowOfNodes, top, left, mw, mh, layoutDir=1):

        if mw > mh:
            mw, mh = mh, mw
            layoutDir = -layoutDir

        
        nodesToPlot = nodes;
        nodesInRow = []
                
        while nodesToPlot:
                
            c = nodesToPlot[0];
            if not nodesInRow:
                nodesInRow.append(c)
                nodesToPlot = nodesToPlot[1:]
                continue
                
            if self._worst(nodesInRow, min([mw,mh])) >= self._worst(nodesInRow+1, min([mw,mh])):
                nodesInRow = nodesInRow+1
                nodesToPlot = nodesToPlot[1:]
                continue
            else:
                dh = 1.0*sum([n.area for n in nodesInRow])/mw;
                
                self._layoutRowOfNodes(nodesInRow, left, top, dh, layoutDir)
                            
                if layoutDir == 1:
                    top = top + dh
                else:
                    left = left + dh 
                mh = mh - dh;
            
                if mh < mw:
                    mw, mh = mh, mw
                    layoutDir = -layoutDir
                
                nodesInRow = []
                
        dh = 1.0*sum([n.area for n in nodesInRow])/mw;        
        self._layoutRowOfNodes(nodesInRow, left, top, dh, layoutDir)      
        
    def _layoutRowOfNodes(self,rowOfNodes,left,top,mh,ld):
    
        x = left
        y = top
        
        for n in rowOfNodes:
            r = n.area
            if ld == 1:
                dw = 1.0 * r / mh
                dh = 0
                n.rect = [x,y,dw,mh] 
                #print "Rect(%f,%f,%f,%f);" % (x,y,dw,mh)
            else:
                dw = 0
                dh = 1.0 * r / mh
                n.rect = [x,y,mh,dh] 
                #print "Rect(%f,%f,%f,%f);" % (x,y,mh,dh)
            x = x + dw
            y = y + dh    
    
    def _layoutANode(self, aNode, left, top, w, h):
        if len(aNode.children)==0: return
        
        aNode.normalizeChildrenArea(w*h)
        #print aNode,[n for n in aNode.children]

        self._squarified(aNode.children, [], top, left, w, h)
        for n in aNode.children:
            x,y,w,h = n.rect
            self._layoutANode(n, x, y, w, h)
    
    def layout(self):
        w = self.width
        h = self.height
        self.rootNode.area = w * h;
        self.rootNode.rect = [0, 0, w, h];
        #self.rootNode.normalizeChildrenArea(w*h)
        self._layoutANode(self.rootNode, 0, 0, w, h)  

    def writeAll(self, outputStream=sys.stdout):
        self.outputStream = outputStream
        self.printNodes(self.rootNode)
        pass
        
    def writeNodes(self, node):
        self.writeNode(node)
        for n in node.children:
            self.writeNodes(n)

    def writeNode(self, node):
        outputStream.write(node.name)
        outputStream.write("\n")
        outputStream.write(node.rect)
        outputStream.write("\n")
        pass
        
        
class CanvasTreemap(Treemap):
    import random
    def __init__(self, rootNode):
        self.rootNode = rootNode;
        self.setWidthHeight(300,200)
        self.setPosition(0,0)
    
    def printAll(self):
        print "function plotCanvas(cId){"
        print "ctx = document.getElementById(cId).getContext('2d');"
        print "ctx.globalAlpha=0.2;"
        self.printNodes(self.rootNode);
        print "}"

    def printNodes(self, node):
        self.printNode(node)
        for n in node.children:
            self.printNodes(n)        

    def printNode(self,node):
        x,y,dw,dh = node.rect
        x = x+1
        y = y+1
        dw = dw-1
        dh = dh-1
        level = node.properties['level']
        style = {0:'rgb(255,0,0)', 1:'rgb(255,255,0)', 2: 'rgb(0,255,0)', 3: 'rgb(0,255,255)', 4: 'rgb(255,0,255)'}
        
        print "ctx.strokeStyle = '%s';" % style[int(random.uniform(0,4.9))]
        print "ctx.fillStyle = '%s';" % style[int(random.uniform(0,4.9))]
        print "ctx.lineCap='round';"
        print "ctx.lineWidth = %d;" % (10-2*int(random.uniform(0,4)))
        print "ctx.beginPath();"
        print "ctx.moveTo(%.1f,%.1f);" % (x,    y);
        print "ctx.lineTo(%.1f,%.1f);" % (x+dw, y);
        print "ctx.lineTo(%.1f,%.1f);" % (x+dw, y+dh);
        print "ctx.lineTo(%.1f,%.1f);" % (x,    y+dh);
        print "ctx.lineTo(%.1f,%.1f);" % (x,    y);
        if random.uniform(0,1) < 0.5:
            print "ctx.stroke();"
        else:
            print "ctx.fill();"
            #print "ctx.stroke();"
            
            
class DivTreemap(Treemap):

    def __init__(self, rootNode):
        self.rootNode = rootNode;
        self.setWidthHeight(300,200)
        self.setPosition(0,0)
    
    def printAll(self):
        print "<html>"
        print '<head><style type="text/css" media="screen">'
        print """div.node {
            position:absolute; 
            text-align:center;border:2px solid #bbaaaa;
            overflow:hidden;
            vertical-align:middle;
            background:#f0ffff
            }"""
        print '</style><head>'
        print "<body>This is a test.<br/>"
        print "<div id='treemap' style='position:relative;left:30px; top:60px;width:%dpx;height:%dpx;'>" % (self.width, self.height)
        self.printNodes(self.rootNode);
        print "</div></body></html>"

    def printNodes(self, node):
        self.printNode(node)
        for n in node.children:
            self.printNodes(n)        

    
    def printNode(self,node):
        if node.name == "root": return
        x,y,dw,dh = node.rect
        level = node.properties['level']
        fs = node.area**0.5 / 100;
        if fs < 0.75: fs = 0.75
        print "<div id='%s' class='node' \
        style='left:%dpx; top:%dpx; width:%dpx; height:%dpx;\
        line-height:%dpx; font-size:%fem;' onclick='alert(\"%s\")'>%s</div>"\
         % (node.name, x, y, dw-5, dh-5, dh-5, fs, \
         "Do you like to eat "+node.name+"?", node.name)
   

import random    
def testCanvas():
    nodes = []
    for i in range(0,5):
        n1 = Node('node:%d' % i)
        n1.properties['level'] = 1
        n1.weight = random.uniform(1,20)
        for j in range(0,5):
            n2 = Node('node:%d-%d' %(i,j))
            n2.properties['level'] = 2
            n2.weight = random.uniform(1,20)
            for k in range(0,10):
                n3 = Node('node:%d-%d-%d' % (i,j,k))
                n3.properties['level'] = 3
                n3.weight = random.uniform(1,20)
                for l in range(0,50):
                    n4 = Node('node:(%d-%d-%d-%d)' % (i,j,k,l))
                    n4.properties['level'] = 4
                    n4.weight = random.uniform(1,20)
                    n3.addAChild(n4)
                n2.addAChild(n3)
            n1.addAChild(n2)
        nodes.append(n1)
    
    root = Node('root')
    root.addChildren(nodes)
    root.sortChildrenByWeight()
    root.properties['level']=0
    TM = CanvasTreemap(root)
    TM.setWidthHeight(800,800)
    TM.layout()
    print "<html><head>"
    print "<script>"
    TM.printAll() 
    print "</script><head>"
    print """<body onload="plotCanvas('c')">
    <canvas id="c" width=810 height=810></canvas></body></html>"""
    
def testDiv():
    tagArray = {"apples": 12,
	            "oranges": 38,
	            "pears" : 10,
	            "mangos" : 24,
	            "grapes" : 18,
	            "bananas" : 56,
	            "watermelons" : 80,
	            "lemons" : 12,
	            "limes" : 12,
	            "pineapples" : 15,
	            "strawberries" : 20,
	            "coconuts" : 43,
	            "cherries" : 20,
	            "raspberries" : 8,
	            "peaches" : 25
                }
    nodes = []
    for tag in tagArray:
        n = Node(tag)
        n.weight = tagArray[tag]
        n.properties['level']=1
        nodes.append(n)
    root = Node('root')
    root.addChildren(nodes)
    root.properties['level']=0
    root.sortChildrenByWeight()
    TM = DivTreemap(root)
    TM.setWidthHeight(800,250)
    TM.layout()
    TM.printAll() 
    
        
if __name__ == '__main__':
    
    #testCanvas();
    testDiv();

How to implement the Needleman–Wunsch alignment algorithm without using a single loop in Python

I am still fascinated about the programming style using co-routine. Actually, it is possible to implement the Needleman–Wunsch alignment algorithm by purely message passing fashion. The following code shows how to implement the algorithm using co-routines again. I modify the code from my previous post such that the alignment array itself is also generated dynamically. We can completely remove those setting up loops. This code is also annotated to show how it is done. If any reader is interested and have any comment, I do like to hear.

# @author Jason Chin
#
# Copyright (C) 2011 by Jason Chin
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

"""

This is an example to implement Needleman-Wunsch sequence algorithm using python's
co-routine. One of the most interest aspect of such implementation is that there 
is no explicitly loop. You can not find either the "for" nor "while" keywords
in this code.  Each alignment cell is a co-routine and the calculation of alignement
score and backtracking that generates the alignment string are done with a message
passing fashion.  The alignment cells are also generated in a dynamic way.  A 
banded alignment can be done by limiting not generate the whole alignment array but
only the banded part of the array.

Is it useful? I am not sure, but it is definitely fun to show it is possible.

--Jason Chin, Apr. 10, 2011

"""

### Set up the alignment score scheme
matchScore, mismatchScore, gapScore = 4, -3, -4

### Two testing string for alingment
seq1 = "TTAAGTGTAGCCTTGTGTGACATGTATTTTTAT"
seq2 = "TTTCTAGGTAGTTGTGGTGAGTTTAGTTGATAT"

### cellMap is a dictionary that maps integer pairs to the co-routines
cellMap = {}

### For tracking the global best alignment cell
globalBestCellScore = [None, -100000]



def getAnAlignCell(x, y, seq1, seq2):
    """
    This function returns a co-routine the represents an alignment cell at position
    x and y.  The alignment strings are passed explicilty for simplicity.
    """

    def alnCell():

        """
        This is the co-routine for an alignment cell. A alignment cell co-routine is
        excuted in roughly two stage. The first stage it collects the alignment score
        from the cells at (x-1,y-1), (x-1,y), and (x, y-1) and calculate the best 
        alignment score. Depending the alignment path through the alignment cell, a new
        alignment score is generated and passed to the cells at (x+1, y+1), 
        (x+1,y), and (x, y+1). If any of those cell has not be generated, it will 
        generate the co-routine and regisiter them with the cellMap dictionary. After 
        this it waits for the backtracking caculation.  If a cell is in the best alignment
        path, it will pass the best alignment pair to next cell in the best alignment
        path.
        """

        global globalBestCellScore
        global cellMap

        b1, b2 = seq1[x], seq2[y] 
        mx, my = len(seq1), len(seq2)

        cellData = []

        # if the cell is on the top or the left side of the alignment, they only have
        # to wait for one other cell to pass in the alignment score. Otherwise, they
        # need to collect three messages from those (x-1,y), (x,y-1), and (x-1, y-1)
        # before they can do any calculation.
        if x == 0 or y == 0:
            cellId, s = yield 
            cellData.append( (cellId, s) )
        else:
            cellId, s = yield 
            cellData.append( (cellId, s) )
            cellId, s = yield 
            cellData.append( (cellId, s) )
            cellId, s = yield 
            cellData.append( (cellId, s) )

        # find the best cell that gives the best alignment score
        cellData.sort( key=lambda x: -x[1] )
        bestCell, bestScore = cellData[0]

        if bestScore > globalBestCellScore[1]:
            globalBestCellScore = [ (x,y), bestScore ]

        # pass the new alignment score to (x+1, y+1)
        if x+1 < mx and y+1 < my:
            # generate the cell at (x+1, y+1) if necessary
            if (x+1, y+1) not in cellMap:
                cellMap[ (x+1, y+1) ] = getAnAlignCell( x+1, y+1, seq1, seq2 )()
                cellMap[ (x+1, y+1) ].next() 
            if b1 == b2: # a match, seq1[x] == seq[2], new_score = bestScore + matchScore
                cellMap[ (x+1, y+1) ].send( ((x,y), bestScore + matchScore) ) # pass the new score to cell (x+1, y+1)
            else: # a mismatch, seq1[x] != seq[2], new_score = bestScore + mismatchScore
                cellMap[ (x+1, y+1) ].send( ((x,y), bestScore + mismatchScore) ) # pass the new score to cell (x+1, y+1)
        # pass the new alignment score to (x+1, y), namely, the base seq1[x] is aligned to a gap
        if x+1 < mx:
            # generate the cell at (x+1, y) if necessary
            if (x+1, y) not in cellMap:
                cellMap[ (x+1, y) ] = getAnAlignCell( x+1, y, seq1, seq2 )()
                cellMap[ (x+1, y) ].next() 
            cellMap[ (x+1, y) ].send( ((x,y), bestScore + gapScore) )
        # pass the new alignment score to (x, y+1), namely, the base seq2[y] is aligned to a gap
        if y+1 < my:
            # generate the cell at (x, y+1) if necessary
            if (x, y+1) not in cellMap:
                cellMap[ (x, y+1) ] = getAnAlignCell( x, y+1, seq1, seq2 )()
                cellMap[ (x, y+1) ].next() 
            cellMap[ (x, y+1) ].send( ((x,y), bestScore + gapScore) )
            
        path = yield # wait, if the cell is on the best path, the co-routine will resume 


        # generate the alignment pair according the best alinged cells
        if bestCell[0] >= 0 and bestCell[1] >=0 :
            if path == None:
                path = []
            
            if bestCell[0] - x == 0:
                c1 = "-"
            else:
                c1 = seq1[x-1]
            if bestCell[1] - y == 0:
                c2 = "-"
            else:
                c2 = seq2[y-1]
            path.extend( [ (c1, c2) ] )
            
            # send calculated partial path to the best alingment cell to this cell
            cellMap[ bestCell ].send(  path   )
        
        # return the best path if bestCell[0] = -1 or bestCell[1] = -1
        yield path

    return alnCell


# initialize the cell at (0,0)
cellMap[ (0,0) ] = getAnAlignCell( 0, 0, seq1, seq2 )()
# prime it
cellMap[(0,0)].next()
# start the whole execution by sending in the initial score to cell at (0,0)
cellMap[(0,0)].send( ( (-1, -1), 0 ) )

# get the best global cell
bestCell = globalBestCellScore[0]

# continue to excute the best cell co-routine to get the alignment path
bestPath = cellMap[bestCell].next()
bestPath.reverse()

# some simple mechinary to print out the alignment path
alnRes = zip(*bestPath)
print "".join(alnRes[0])
print "".join(alnRes[1])


The result:

$ python coAlign_v2.py   
-TT-AAGTGTAGCCTTGT-GTGACATGTA-TTTTTA
TTTCTAG-GTAG--TTGTGGTGA-GTTTAGTTGATA