One way of doing this it to define a function that will look at each row of dfA as a list and compare with the same thing from dfB:
import pandas as pd
data_A = {'Index': [0, 1, 2], 'Column 1': ['Albuquerque', 'New York', 'Miami'], 'Column 2': ['NM', 'NY', 'FL'], 'Column 3': ['87101', '10009', '33101']}
dfA = pd.DataFrame(data_A).set_index('Index')
data_B = {'Index': [0, 1, 2, 3], 'Column 1': ['NM', 'Atlanta', 'San Francisco', '10009'], 'Column 2': ['Albuquerque', 'GA', 'CA', 'NY'], 'Column 3': ['87101', '30033', '94016', 'New York']}
dfB = pd.DataFrame(data_B).set_index('Index')
print(dfA)
print(dfB)
def match_rows(dfA, dfB):
in_both = []
not_in_both = []
for index_a, row_a in dfA.iterrows():
match_found = False
for _, row_b in dfB.iterrows():
if set(row_a) == set(row_b):
in_both.append((index_a, row_a.to_list()))
match_found = True
break
if not match_found:
not_in_both.append((index_a, row_a.to_list()))
for index_b, row_b in dfB.iterrows():
if not any(set(row_b) == set(row_a) for _, row_a in dfA.iterrows()):
not_in_both.append((index_b, row_b.to_list()))
return in_both, not_in_both
matches, non_matches = match_rows(dfA, dfB)
matches, non_matches
def format_output(matches, non_matches):
formatted_matches = [
f"Matching from dfA, Index {index}: {values}"
for index, values in matches
]
formatted_non_matches = [
f"Not Matching from {'dfA' if index in dfA.index else 'dfB'}, Index {index}: {values}"
for index, values in non_matches
]
return formatted_matches, formatted_non_matches
formatted_matches, formatted_non_matches = format_output(matches, non_matches)
formatted_matches, formatted_non_matches
I introduced a second function here to format the output in a more understandable way:
(["Matching from dfA, Index 0: ['Albuquerque', 'NM', '87101']",
"Matching from dfA, Index 1: ['New York', 'NY', '10009']"],
["Not Matching from dfA, Index 2: ['Miami', 'FL', '33101']",
"Not Matching from dfA, Index 1: ['Atlanta', 'GA', '30033']",
"Not Matching from dfA, Index 2: ['San Francisco', 'CA', '94016']"])
.merge()you already attempted, and supply a reprex that we can run.