Given the following function, what would be the correct and pythonic way to archiving the same (and faster) result?
My code is not efficient and I believe I'm missing something that is staring at me.
The idea is to find a pattern that is [[A,B],[A,C],[C,B]] without having to generate additional permutations (since this will result in a higher processing time for the comparisons).
The length of the dictionary fed into find_path in real-life would be approximately 10,000, so having to iterate over that amount with the current code version below is not efficient.
from time import perf_counter
from typing import List, Generator, Dict
def find_path(data: Dict) -> Generator:
for first_pair in data:
pair1: List[str] = first_pair.split("/")
for second_pair in data:
pair2: List[str] = second_pair.split("/")
if pair2[0] == pair1[0] and pair2[1] != pair1[1]:
for third_pair in data:
pair3: List[str] = third_pair.split("/")
if pair3[0] == pair2[1] and pair3[1] == pair1[1]:
amount_pair_1: int = data.get(first_pair)[
"amount"
]
id_pair_1: int = data.get(first_pair)["id"]
amount_pair_2: int = data.get(second_pair)[
"amount"
]
id_pair_2: int = data.get(second_pair)["id"]
amount_pair_3: int = data.get(third_pair)[
"amount"
]
id_pair_3: int = data.get(third_pair)["id"]
yield (
pair1,
amount_pair_1,
id_pair_1,
pair2,
amount_pair_2,
id_pair_2,
pair3,
amount_pair_3,
id_pair_3,
)
raw_data = {
"EZ/TC": {"id": 1, "amount": 9},
"LM/TH": {"id": 2, "amount": 8},
"CD/EH": {"id": 3, "amount": 7},
"EH/TC": {"id": 4, "amount": 6},
"LM/TC": {"id": 5, "amount": 5},
"CD/TC": {"id": 6, "amount": 4},
"BT/TH": {"id": 7, "amount": 3},
"BT/TX": {"id": 8, "amount": 2},
"TX/TH": {"id": 9, "amount": 1},
}
processed_data = list(find_path(raw_data))
for i in processed_data:
print(("The path to traverse is:", i))
>> ('The path to traverse is:', (['CD', 'TC'], 4, 6, ['CD', 'EH'], 7, 3, ['EH', 'TC'], 6, 4))
>> ('The path to traverse is:', (['BT', 'TH'], 3, 7, ['BT', 'TX'], 2, 8, ['TX', 'TH'], 1, 9))
>> ('Time to complete', 5.748599869548343e-05)
# Timing for a simple ref., as mentioned above, the raw_data is a dict containing about 10,000 keys