Natural Join Implementation Python

Question

I am working on implementing natural join in python. The first two lines show the tables attributes and the next two lines each tables' tuples or rows.

Expected Output:

[['A', 1, 'A', 'a', 'A'], 
 ['A', 1, 'A', 'a', 'Y'], 
 ['A', 1, 'Y', 'a', 'A'], 
 ['A', 1, 'Y', 'a', 'Y'], 
 ['S', 2, 'B', 'b', 'S']]

And what I got:

[['A', 1, 'A', 'a', 'A', 'Y'], 
 ['A', 1, 'A', 'a', 'A', 'Y']]

I have looked through the code and everything seems to be right, I would appreciate any help.

t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')

t1tuples = [['A', 1, 'A', 'a'], 
            ['B', 2, 'Y', 'a'], 
            ['Y', 4, 'B', 'b'], 
            ['A', 1, 'Y', 'a'], 
            ['S', 2, 'B', 'b']]

t2tuples = [[1, 'a', 'A'], 
            [3, 'a', 'B'], 
            [1, 'a', 'Y'], 
            [2, 'b', 'S'], 
            [3, 'b', 'E']]

def findindices(t1atts, t2atts):
  t1index=[]
  t2index=[]
  for index, att in enumerate(t1atts):
    for index2, att2 in enumerate(t2atts):
      if att == att2:
        t1index.append(index)
        t2index.append(index2)
  return t1index, t2index

def main():
  tpl=0; tpl2=0; i=0; j=0; count=0; result=[]
  t1index, t2index = findindices(t1atts, t2atts)
  for tpl in t1tuples:
    while tpl2 in range(len(t2tuples)):
      i=0; j=0
      while (i in range(len(t1index))) and (j in range(len(t2index))):
          if tpl[t1index[i]] != t2tuples[tpl2][t2index[j]]:
            i=len(t1index)
            j=len(t1index)
          else:
            count+=1
          i+=1
          j+=1
      if count == len(t1index):
        extravals = [val for index, val in enumerate(t2tuples[tpl2]) if index not in t2index]
        temp = tpl
        tpl += extravals
        result.append(tpl)
        tpl = temp
      count=0
      tpl2+=1
  print result

yes, I am. Then, the output will be a list of tuples as well. — user3264378
– user3264378, Commented Feb 3, 2014 at 4:59
But, I am not able to see any rules to apply to get '['A', 1, 'A', 'a', 'A', 'Y']' from ['A', 1, 'A', 'a'] + [1, 'a', 'A'] — James Sapam
– James Sapam, Commented Feb 3, 2014 at 5:02
in natural join, every tuple in t1tuples is compared with every tuple in t2tuples...if a match is found, that tuple is attached to the result — user3264378
– user3264378, Commented Feb 3, 2014 at 5:06
That is the output I got or the incorrect output. Please look under "expected output" to see what the program should output. You can also run the program yourself by calling main() and see the output. — user3264378
– user3264378, Commented Feb 3, 2014 at 5:12

Potrebic · Accepted Answer · 2014-02-03 07:00:54Z

Here's what I came up with. I'd do some more refactoring, etc before calling it done

import pprint

t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')

t1tuples = [
    ['A', 1, 'A', 'a'],
    ['B', 2, 'Y', 'a'],
    ['Y', 4, 'B', 'b'],
    ['A', 1, 'Y', 'a'],
    ['S', 2, 'B', 'b']]

t2tuples = [
    [1, 'a', 'A'],
    [3, 'a', 'B'],
    [1, 'a', 'Y'],
    [2, 'b', 'S'],
    [3, 'b', 'E']]


t1columns = set(t1atts)
t2columns = set(t2atts)
t1map = {k: i for i, k in enumerate(t1atts)}
t2map = {k: i for i, k in enumerate(t2atts)}

join_on = t1columns & t2columns
diff = t2columns - join_on

def match(row1, row2):
   return all(row1[t1map[rn]] == row2[t2map[rn]] for rn in join_on)

results = []
for t1row in t1tuples:
    for t2row in t2tuples:
        if match(t1row, t2row):
            row = t1row[:]
            for rn in diff:
                row.append(t2row[t2map[rn]])
            results.append(row)

pprint.pprint(results)

And I get the expected results:

[['A', 1, 'A', 'a', 'A'],
 ['A', 1, 'A', 'a', 'Y'],
 ['A', 1, 'Y', 'a', 'A'],
 ['A', 1, 'Y', 'a', 'Y'],
 ['S', 2, 'B', 'b', 'S']]

James Sapam · Accepted Answer · 2014-02-03 06:57:53Z

Ok, here is the solution please verify and let me know if it works for you:

I change little bit of naming to understood myself:

#!/usr/bin/python

table1 = ('A', 'B', 'C', 'D')
table2 = ('B', 'D', 'E')

row1 = [['A', 1, 'A', 'a'],
        ['B', 2, 'Y', 'a'],
        ['Y', 4, 'B', 'b'],
        ['A', 1, 'Y', 'a'],
        ['S', 2, 'B', 'b']]

row2 = [[1, 'a', 'A'],
        [3, 'a', 'B'],
        [1, 'a', 'Y'],
        [2, 'b', 'S'],
        [3, 'b', 'E']]

def findindices(table1, table2):
    inter = set(table1).intersection(set(table2))
    tup_index1 = [table1.index(x) for x in inter]
    tup_index2 = [table2.index(x) for x in inter]]
    return tup_index1, tup_index2

def main():

    final_lol = list()

    tup_index1, tup_index2 = findindices(table1, table2)

    merge_tup = zip(tup_index1, tup_index2)

    for tup1 in row1:
        for tup2 in row2:
            for m in merge_tup:
                if tup1[m[0]] != tup2[m[1]]:
                    break
            else:
               ls = []
               ls.extend(tup1)
               ls.append(tup2[-1])
               final_lol.append(ls)
    return final_lol

if __name__ == '__main__':
    import pprint
    pprint.pprint(main())

Output:

[['A', 1, 'A', 'a', 'A'],
 ['A', 1, 'A', 'a', 'Y'],
 ['A', 1, 'Y', 'a', 'A'],
 ['A', 1, 'Y', 'a', 'Y'],
 ['S', 2, 'B', 'b', 'S']]

Collectives™ on Stack Overflow

Natural Join Implementation Python

2 Answers 2

Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related