I am having exactly the same problem like the one described here: Pandas Dataframe Multiindex Merge
When using the proposed solution I am getting a key error.
I'm using pandas version = 0.20.1 (the code works for at least 0.23.0 and above).
Is there a way to merge with this version? Thanks for any help!
import pandas as pd
import numpy as np
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
tuples = list(zip(*arrays))
index1 = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
index2 = pd.MultiIndex.from_tuples(tuples, names=['third', 'fourth'])
s1 = pd.DataFrame(np.random.randn(8), index=index1, columns=['s1'])
s2 = pd.DataFrame(np.random.randn(8), index=index2, columns=['s2'])
s1.merge(s2, left_index=True, right_on=['third', 'fourth'])
The error message when submitting this code is as follows:
KeyError Traceback (most recent call last)
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2392 try:
-> 2393 return self._engine.get_loc(key)
2394 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)()
KeyError: 'third'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-2-b491d0bdf2dc> in <module>()
17
18
---> 19 s1.merge(s2, left_index=True, right_on=['third', 'fourth'])
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
4818 right_on=right_on, left_index=left_index,
4819 right_index=right_index, sort=sort, suffixes=suffixes,
-> 4820 copy=copy, indicator=indicator)
4821
4822 def round(self, decimals=0, *args, **kwargs):
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\reshape\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
51 right_on=right_on, left_index=left_index,
52 right_index=right_index, sort=sort, suffixes=suffixes,
---> 53 copy=copy, indicator=indicator)
54 return op.get_result()
55
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\reshape\merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator)
556 (self.left_join_keys,
557 self.right_join_keys,
--> 558 self.join_names) = self._get_merge_keys()
559
560 # validate the merge keys dtypes. We may need to coerce
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\reshape\merge.py in _get_merge_keys(self)
847 join_names.append(None)
848 else:
--> 849 right_keys.append(right[k]._values)
850 join_names.append(k)
851 if isinstance(self.left.index, MultiIndex):
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2060 return self._getitem_multilevel(key)
2061 else:
-> 2062 return self._getitem_column(key)
2063
2064 def _getitem_column(self, key):
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2067 # get column
2068 if self.columns.is_unique:
-> 2069 return self._get_item_cache(key)
2070
2071 # duplicate columns & possible reduce dimensionality
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1532 res = cache.get(item)
1533 if res is None:
-> 1534 values = self._data.get(item)
1535 res = self._box_item_values(item, values)
1536 cache[item] = res
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\Users\User_Name\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2393 return self._engine.get_loc(key)
2394 except KeyError:
-> 2395 return self._engine.get_loc(self._maybe_cast_indexer(key))
2396
2397 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)()
KeyError: 'third'