Okay, I think you want to calculate the distances between every leaf node. So I'm having a go at solving your problem, not answering your question.
Your commonAncestor algorithm is flawed as it assumes that the leaf nodes are all at the same depth. They are not.
The first solution that springs to mind is to determine all of the leaf nodes and calculate the path to the root node for each of them. Determine the closest common ancestor by comparing both paths in reverse.
The output from this is a dictionary of pairs of nodes and the number of hops between them.
from itertools import combinations
data = {'A': [('AD', 4.0), None, None], 'C': [('ADBFGC', 14.5), None, None], 'B': [('BF', 0.5), None, None], 'E': [('ADBFGCE', 17.0), None, None], 'D': [('AD', 4.0), None, None], 'G': [('BFG', 6.25), None, None], 'F': [('BF', 0.5), None, None], 'ADBFG': [('ADBFGC', 6.25), ('AD', 4.25), ('BFG', 2.0)], 'BF': [('BFG', 5.75), ('B', 0.5), ('F', 0.5)], 'ADBFGC': [('ADBFGCE', 2.5), ('ADBFG', 6.25), ('C', 14.5)], 'ADBFGCE': [None, ('ADBFGC', 2.5), ('E', 17.0)], 'BFG': [('ADBFG', 2.0), ('BF', 5.75), ('G', 6.25)], 'AD': [('ADBFG', 4.25), ('A', 4.0), ('D', 4.0)]}
def get_path(tree,leaf):
path = []
location = leaf
while True:
path.append(location)
parent = tree.get(location)[0]
if parent:
location = parent[0]
else:
break
return path
def get_leaves(tree):
return [ x for (x,y) in tree.items() if y[1] is None and y[2] is None ]
def leafDistances(tree):
paths = {}
leaves = get_leaves(tree)
for leaf in leaves:
paths[leaf] = get_path(tree,leaf)
results = {}
for l1,l2 in combinations(leaves,2):
commonAncestor = [ x for (x,y) in zip(paths[l1][::-1],paths[l2][::-1]) if x == y ][-1]
distance = paths[l1].index(commonAncestor) + paths[l2].index(commonAncestor)
results[(l1,l2)] = distance
print "%s <-> %s Ancestor == %s, distance == %s\nPath of %s == %s\nPath of %s == %s" % (l1,l2,commonAncestor,distance,l1,paths[l1],l2,paths[l2])
return results
leafDistances(data)
This prints out for clarity:
A <-> C Ancestor == ADBFGC, distance == 4
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
A <-> B Ancestor == ADBFG, distance == 5
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
A <-> E Ancestor == ADBFGCE, distance == 5
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of E == ['E', 'ADBFGCE']
A <-> D Ancestor == AD, distance == 2
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
A <-> G Ancestor == ADBFG, distance == 4
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
A <-> F Ancestor == ADBFG, distance == 5
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
C <-> B Ancestor == ADBFGC, distance == 5
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
C <-> E Ancestor == ADBFGCE, distance == 3
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of E == ['E', 'ADBFGCE']
C <-> D Ancestor == ADBFGC, distance == 4
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
C <-> G Ancestor == ADBFGC, distance == 4
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
C <-> F Ancestor == ADBFGC, distance == 5
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
B <-> E Ancestor == ADBFGCE, distance == 6
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of E == ['E', 'ADBFGCE']
B <-> D Ancestor == ADBFG, distance == 5
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
B <-> G Ancestor == BFG, distance == 3
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
B <-> F Ancestor == BF, distance == 2
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
E <-> D Ancestor == ADBFGCE, distance == 5
Path of E == ['E', 'ADBFGCE']
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
E <-> G Ancestor == ADBFGCE, distance == 5
Path of E == ['E', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
E <-> F Ancestor == ADBFGCE, distance == 6
Path of E == ['E', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
D <-> G Ancestor == ADBFG, distance == 4
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
D <-> F Ancestor == ADBFG, distance == 5
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
G <-> F Ancestor == BFG, distance == 3
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
otu1,otu2,tree = tree[otu1][0][0],tree[otu2][0][0],treeyou are redefiningtreeas the same object. It is either an error or a useless assignement