Skip to main content
I updated the code to reflect my current code and changed the description to be more precise with the question.
Source Link

Optimizing Parsing Files,Iteration of List, and Dictionaries, Python

This project's goal is to parse through many small filesa large file containing data. I parse that data into a list containing a dictionary. I then do calculations based on that data and optionally plot it for visualization purposes. I use the data for simple calculations to the rewards based on their performance. I wrote this so that each worker mining to a pool will be rewarded fairly. This allows multiple people to mine on the same account for quicker payout times.

<utc_time>Worker_Data.WorkersData:

pool_current_hashrate=787410673
pool_average_hashrate=882854395
pool_reported_hashrate=765620611
current_hashrate_rig0=381774870
average_hashrate_rig0=449380846
reported_hashrate_rig0=353983862
current_hashrate_rig1=405635805
average_hashrate_rig1=433473558
reported_hashrate_rig1=410862369
time_stamp=1621017328
eth=0Data={'pool_current_hashrate': '100215904', 'pool_average_hashrate': '61640734', 'pool_reported_hashrate': '78165786', 'current_hashrate_alex147': '47721859', 'average_hashrate_alex147': '35791394', 'reported_hashrate_alex147': '36895352', 'current_hashrate_henry147': '52494045', 'average_hashrate_henry147': '25849340', 'reported_hashrate_henry147': '41354162', 'time_stamp': '1620751617', 'eth': '0.19041033273478156008999485617836284', 'zil': '4.654624711084'}
zil=109Data={'pool_current_hashrate': '100215904', 'pool_average_hashrate': '61640734', 'pool_reported_hashrate': '78337185', 'current_hashrate_alex147': '47721859', 'average_hashrate_alex147': '35791394', 'reported_hashrate_alex147': '36890956', 'current_hashrate_henry147': '52494045', 'average_hashrate_henry147': '25849340', 'reported_hashrate_henry147': '41509445', 'time_stamp': '1620751678', 'eth': '0.258827011171008999485617836284', 'zil': '4.654624711084'}

I parse this file which contains tens of thousands of these fileslines to calculate how much 'work' each miner has done in the time between payouts and then calculate their take of the change in balance. Each line is a separate dictionary in a list.

from ast import osliteral_eval

PATH = "A:\\Python Project\\ezil_api\\Data\\" # Pathpath of savedata filesfile
WORKER_SPLIT = 0.50  # used if start balance is not 0


def read_configmake_file(type_configname, path=PATH):
   config_dict, deftype_conf, get_data(file_namepath=PATH):
        return_dict = {}
  with open(path + name + "." with+ open(file_nametype_conf, "r+""a+") as configfile:
          for keys, linesvalues =in configzip(config_dict.readlineskeys()
            for line in lines:
                if "\n" in line:
                    line = line[:-1]
                key, value = lineconfig_dict.splitvalues("=")):
                return_dict[key] file.write(f"{keys}= value
        return return_dict{values}\n")

    file_list = []
    
    _, _, filenames = next(os.walk(path))
    filenames.sort()
    for files in filenames:
        index = -1 * len(type_config)
        if files[index:] == type_config:
            local_path = path + files[:index] + type_config
            file_list.append(get_data(local_path))
    return file_list

def read_data(path, file_name):
    data = []
    with open(path + file_name, "r+") as config:
        lines = config.readlines()
        for line in lines:
            line = line[line.find("=") + 1:]
            line_data = literal_eval(line)
            data.append(line_data)
    return data


def eval_data():
    workers = []
    start_balance_eth = 0
    start_balance_zil = 0
    balance_eth = []
    balance_zil = []
    balance_delta_eth = []
    balance_delta_zil = []
    delta_eth_range = [0]
    time = []
    time_delta = []
    balance_workers_eth = {}
    balance_workers_zil = {}
    hashrate_workers = {}
    integral_worker = {}
    worker_percentage = {}
    b = {} # used for ploting, contains balance of a worker with respect to time
    odd = 0
    even = 0
    hashrate_pool = []
    balance_eth_delta = []
    total_integral = []
    temp_integral = 0
    files_workers = read_config(".Workers", path=PATH)

    forfiles_workers worker_data= inread_data(path=PATH, files_workers:file_name="Worker_Data.Data")

    from time import time as t

    for worker_data in files_workers:
        index = files_workers.index(worker_data)
        for keys in worker_data.keys():
            if "average_hashrate_" in keys:
                worker = keys[17:]
                if worker not in workers:
                    workers.append(worker)
                    hashrate_workers[worker] = []
                    balance_workers_eth[worker] = 0
                    balance_workers_zil[worker] = 0
                    integral_worker[worker] = []
                    worker_percentage[worker] = []
                    b[worker] = []

        worker_list_temp = [worker_temp[17:] for worker_temp in worker_data.keys() if "average_hashrate_" in worker_temp]

        for worker in worker_list_temp:
            if worker not in workers:
                workers.append(worker)
                hashrate_workers[worker] = []
                balance_workers_eth[worker] = 0
                balance_workers_zil[worker] = 0
                integral_worker[worker] = []
                worker_percentage[worker] = []
                b[worker] = []

        current_balance_eth = float(worker_data["eth"])
        current_balance_zil = float(worker_data["zil"])
        current_time = int(worker_data["time_stamp"])

        for worker in workers:
            current_worker_in_keys = False
            for keys in worker_data.keys():
                if worker in keys:
                    current_worker_in_keys = True
            if current_worker_in_keys:
                worker_hashrate = worker_data[f"current_hashrate_{worker}"]
                hashrate_workers[worker].append(int(worker_hashrate))
            else:
                hashrate_workers[worker].append(0)

        hashrate_pool.append(float(worker_data["pool_current_hashrate"]))

        if index > 0:
            if current_balance_eth > balance_eth[-1]:
                delta_eth = current_balance_eth - balance_eth[-1]
                balance_delta_eth.append(delta_eth)
            else:
                balance_delta_eth.append(0)
            if current_balance_zil > balance_zil[-1]:
                delta_zil = current_balance_zil - balance_zil[-1]
                balance_delta_zil.append(delta_zil)
            else:
                balance_delta_zil.append(0)

            delta_time = current_time - time[-1]
            time_delta.append(delta_time)

        else:
            start_balance_eth = current_balance_eth
            start_balance_zil = current_balance_zil
            balance_delta_eth.append(0)
            balance_delta_zil.append(0)

        balance_eth.append(current_balance_eth)
        balance_zil.append(current_balance_zil)
        time.append(current_time)

    for current_delta_ethd_eth, current_indexindex_temp in zip(balance_delta_eth, range(len(balance_delta_eth))):
        if current_delta_ethd_eth != 0:
            delta_eth_range.append(current_indexindex_temp)

    for worker in workers:
        # if it doesn't have data for balances, it splits it between workers
        if start_balance_zil > 0:
            balance_workers_zil[worker] += start_balance_zil * WORKER_SPLIT
        if start_balance_eth > 0:
            balance_workers_eth[worker] += start_balance_eth * WORKER_SPLIT

        for index in range(len(delta_eth_range)):
            # integral of hashrate
            if index !=> 0:
                temp_time_delta_list = time_delta[delta_eth_range[index - 1]:delta_eth_range[index]]
                temp_hashrate_list = [hashrate_workers[worker][delta_eth_range[index - 1]:delta_eth_range[index]],
                                      temp_time_delta_list]
                while len(temp_hashrate_list[0]) < len(temp_hashrate_list[1]):
                    temp_hashrate_list[0].append(0)

                temp_hashrate_len = len(temp_hashrate_list[0])
                x = temp_hashrate_list[0]
                y = temp_hashrate_list[1]
                if temp_hashrate_len > 4:
                    # do simpsons integration:
                    # start = (delta x * h[0] + delta x * h[-1])/3
                    # odd = (delta x * h[1] + delta x * h[3]...) * (4/3)
                    # evens = (delta x * h[2] + delta x h[4]...) * (2/3)
                    start = (x[0] * y[0] + x[-1] * y[-1]) * (4 / 3)

                    for i in range(len(temp_hashrate_list)):
                        if ((temp_hashrate_len - 1) > i) and (i > 0):
                            if i % 2:
                                odd += (x[i] * y[i]) * (4 / 3)
                            else:
                                even += (x[i] * y[i]) * (2 / 3)

                    integral = start + even + odd
                    integral_worker[worker].append(integral)
                    even = 0
                    odd = 0

                elif temp_hashrate_len > 1:
                    # do trapezoid integration
                    # delta x/2(h[0] + 2*h[1] + 2*h[2]... + h[-1])
                    trap_integral = ((x[0] * y[0]) + (x[-1] * y[-1]))
                    for i in range(len(temp_hashrate_list)):
                        if ((temp_hashrate_len - 1) > i) and (i > 0):
                            trap_integral += (x[i] * y[i])
                    integral_worker[worker].append(trap_integral)

                elif temp_hashrate_len == 1:
                    # do riemann sum integration
                    # y * delta x
                    riemann_integral = y[0] * x[0]
                    integral_worker[worker].append(riemann_integral)

    for index in range(len(integral_worker[workers[0]])):
        for worker in integral_worker.keys():
            temp_integral += integral_worker[worker][index]
        total_integral.append(temp_integral)
        temp_integral = 0

    for worker in workers:
        for integral_t, worker_integral in zip(total_integral, integral_worker[worker]):
            percentagetry:
 = worker_integral / integral_t
            worker_percentage[worker].append(percentageworker_integral / integral_t)
            except ZeroDivisionError:
    balance_eth_delta = []          pass

    for delta in balance_delta_eth:
        if delta != 0:
            balance_eth_delta.append(delta)

    for worker in workers:
        for percentage, delta in zip(worker_percentage[worker], balance_eth_delta):
            balance_workers_eth[worker] += percentage * delta
            b[worker].append(balance_workers_eth[worker])

    def plot():
        import matplotlib.pyplot as plt
        time.sort(reverse=True)
        plt.xlabel = "Time"Delta inBalance Minutes"index"
        plt.ylabel = "Balance in"ETH ETH"Balance"
        plt.title("Time"Index Vs Balance"ETH")

        for worker_nameworker_d in workers:
            temp_x = []
            for x_valueindex_d in range(len(b[worker_name]worker_percentage[worker_d])):
                temp_x.append(x_valueindex_d + 1)
            x_listx_d = temp_x
            y_listy_d = b[worker_name]b[worker_d]
            plt.plot(x_listx_d, y_listy_d, "-", label=f"{worker_nameworker_d}")

        def plot_ddx():
            d_list = []
            for local_index in range(len(delta_eth_range)):
                if local_index > 0:
                    temp_index = delta_eth_range[local_index]
                    prev_temp_index = delta_eth_range[local_index - 1]

                    delta_eth_temp = balance_eth[temp_index] - balance_eth[prev_temp_index]
                    delta_time_temp = time[temp_index] - time[prev_temp_index]
                    average_hashrate_temp = (sum(hashrate_pool[prev_temp_index:temp_index])) / (
                            temp_index - prev_temp_index)

                    d_list.append(
                        ((delta_eth_temp / delta_time_temp) / average_hashrate_temp) * 1000000 * 60 * 60 * 24 * 10)
                    # magic numbers are as follows:
                    # 1000000, convert to per mh/s,
                    # 60*60*24, convert from seconds to days,

            plt.plot(x_d, d_list, "-", label="ETH per 10 Mh/s per day")

        plt.legend()
        plt.show()

    for keys in balance_workers_eth.keys():
        print(balance_workers_ethkeys, balance_workers_eth[keys])

    plot()


if __name__ == "__main__":
    eval_data()
 

Is there any way I can speed this up? Currently, at 422,200 files000 lines it takes about 2040 seconds to execute this program. I feel like this could be done more efficiently, becauseThe main section of code that takes up the most time is the part in which I iterate through files_workers in the same list multiple times throughoutline: for worker_data in files_workers: This takes up a large percentage of the codetime needed based on my testing and peg's a core on my cpu. AnyIs there a more efficient approach to this problem? I appreciate any help is appreciated, thanks in advance/constructive criticism.

Optimizing Parsing Files, List, and Dictionaries, Python

This project's goal is to parse through many small files containing data. I then do calculations based on that data and optionally plot it for visualization purposes. I use the data for simple calculations to the rewards based on their performance. I wrote this so that each worker mining to a pool will be rewarded fairly. This allows multiple people to mine on the same account for quicker payout times.

<utc_time>.Workers:

pool_current_hashrate=787410673
pool_average_hashrate=882854395
pool_reported_hashrate=765620611
current_hashrate_rig0=381774870
average_hashrate_rig0=449380846
reported_hashrate_rig0=353983862
current_hashrate_rig1=405635805
average_hashrate_rig1=433473558
reported_hashrate_rig1=410862369
time_stamp=1621017328
eth=0.19041033273478156
zil=109.258827011171

I parse thousands of these files to calculate how much 'work' each miner has done in the time between payouts and then calculate their take of the change in balance.

import os

PATH = "A:\\Python Project\\ezil_api\\Data\\" # Path of save files
WORKER_SPLIT = 0.50  # used if start balance is not 0


def read_config(type_config, path=PATH):
    def get_data(file_name):
        return_dict = {}
        with open(file_name, "r+") as config:
            lines = config.readlines()
            for line in lines:
                if "\n" in line:
                    line = line[:-1]
                key, value = line.split("=")
                return_dict[key] = value
        return return_dict

    file_list = []
    
    _, _, filenames = next(os.walk(path))
    filenames.sort()
    for files in filenames:
        index = -1 * len(type_config)
        if files[index:] == type_config:
            local_path = path + files[:index] + type_config
            file_list.append(get_data(local_path))
    return file_list


def eval_data():
    workers = []
    start_balance_eth = 0
    start_balance_zil = 0
    balance_eth = []
    balance_zil = []
    balance_delta_eth = []
    balance_delta_zil = []
    delta_eth_range = [0]
    time = []
    time_delta = []
    balance_workers_eth = {}
    balance_workers_zil = {}
    hashrate_workers = {}
    integral_worker = {}
    worker_percentage = {}
    b = {} # used for ploting, contains balance of a worker with respect to time
    odd = 0
    even = 0
    hashrate_pool = []
    total_integral = []
    temp_integral = 0
    files_workers = read_config(".Workers", path=PATH)

    for worker_data in files_workers:

        index = files_workers.index(worker_data)
        for keys in worker_data.keys():
            if "average_hashrate_" in keys:
                worker = keys[17:]
                if worker not in workers:
                    workers.append(worker)
                    hashrate_workers[worker] = []
                    balance_workers_eth[worker] = 0
                    balance_workers_zil[worker] = 0
                    integral_worker[worker] = []
                    worker_percentage[worker] = []
                    b[worker] = []

        current_balance_eth = float(worker_data["eth"])
        current_balance_zil = float(worker_data["zil"])
        current_time = int(worker_data["time_stamp"])

        for worker in workers:
            current_worker_in_keys = False
            for keys in worker_data.keys():
                if worker in keys:
                    current_worker_in_keys = True
            if current_worker_in_keys:
                worker_hashrate = worker_data[f"current_hashrate_{worker}"]
                hashrate_workers[worker].append(int(worker_hashrate))
            else:
                hashrate_workers[worker].append(0)

        hashrate_pool.append(worker_data["pool_current_hashrate"])

        if index > 0:
            if current_balance_eth > balance_eth[-1]:
                delta_eth = current_balance_eth - balance_eth[-1]
                balance_delta_eth.append(delta_eth)
            else:
                balance_delta_eth.append(0)
            if current_balance_zil > balance_zil[-1]:
                delta_zil = current_balance_zil - balance_zil[-1]
                balance_delta_zil.append(delta_zil)
            else:
                balance_delta_zil.append(0)

            delta_time = current_time - time[-1]
            time_delta.append(delta_time)

        else:
            start_balance_eth = current_balance_eth
            start_balance_zil = current_balance_zil
            balance_delta_eth.append(0)
            balance_delta_zil.append(0)

        balance_eth.append(current_balance_eth)
        balance_zil.append(current_balance_zil)
        time.append(current_time)

    for current_delta_eth, current_index in zip(balance_delta_eth, range(len(balance_delta_eth))):
        if current_delta_eth != 0:
            delta_eth_range.append(current_index)

    for worker in workers:
        # if it doesn't have data for balances, it splits it between workers
        if start_balance_zil > 0:
            balance_workers_zil[worker] += start_balance_zil * WORKER_SPLIT
        if start_balance_eth > 0:
            balance_workers_eth[worker] += start_balance_eth * WORKER_SPLIT

        for index in range(len(delta_eth_range)):
            if index != 0:
                temp_time_delta_list = time_delta[delta_eth_range[index - 1]:delta_eth_range[index]]
                temp_hashrate_list = [hashrate_workers[worker][delta_eth_range[index - 1]:delta_eth_range[index]],
                                      temp_time_delta_list]
                while len(temp_hashrate_list[0]) < len(temp_hashrate_list[1]):
                    temp_hashrate_list[0].append(0)

                temp_hashrate_len = len(temp_hashrate_list[0])
                x = temp_hashrate_list[0]
                y = temp_hashrate_list[1]
                if temp_hashrate_len > 4:
                    # do simpsons integration:
                    # start = (delta x * h[0] + delta x * h[-1])/3
                    # odd = (delta x * h[1] + delta x * h[3]...) * (4/3)
                    # evens = (delta x * h[2] + delta x h[4]...) * (2/3)
                    start = (x[0] * y[0] + x[-1] * y[-1]) * (4 / 3)

                    for i in range(len(temp_hashrate_list)):
                        if ((temp_hashrate_len - 1) > i) and (i > 0):
                            if i % 2:
                                odd += (x[i] * y[i]) * (4 / 3)
                            else:
                                even += (x[i] * y[i]) * (2 / 3)

                    integral = start + even + odd
                    integral_worker[worker].append(integral)
                    even = 0
                    odd = 0

                elif temp_hashrate_len > 1:
                    # do trapezoid integration
                    # delta x/2(h[0] + 2*h[1] + 2*h[2]... + h[-1])
                    trap_integral = ((x[0] * y[0]) + (x[-1] * y[-1]))
                    for i in range(len(temp_hashrate_list)):
                        if ((temp_hashrate_len - 1) > i) and (i > 0):
                            trap_integral += (x[i] * y[i])
                    integral_worker[worker].append(trap_integral)

                elif temp_hashrate_len == 1:
                    # do riemann sum integration
                    # y * delta x
                    riemann_integral = y[0] * x[0]
                    integral_worker[worker].append(riemann_integral)

    for index in range(len(integral_worker[workers[0]])):
        for worker in integral_worker.keys():
            temp_integral += integral_worker[worker][index]
        total_integral.append(temp_integral)
        temp_integral = 0

    for worker in workers:
        for integral_t, worker_integral in zip(total_integral, integral_worker[worker]):
            percentage = worker_integral / integral_t
            worker_percentage[worker].append(percentage)

    balance_eth_delta = []

    for delta in balance_delta_eth:
        if delta != 0:
            balance_eth_delta.append(delta)

    for worker in workers:
        for percentage, delta in zip(worker_percentage[worker], balance_eth_delta):
            balance_workers_eth[worker] += percentage * delta
            b[worker].append(balance_workers_eth[worker])

    def plot():
        import matplotlib.pyplot as plt
        plt.xlabel = "Time in Minutes"
        plt.ylabel = "Balance in ETH"
        plt.title("Time Vs Balance")

        for worker_name in workers:
            temp_x = []
            for x_value in range(len(b[worker_name])):
                temp_x.append(x_value + 1)
            x_list = temp_x
            y_list = b[worker_name]
            plt.plot(x_list, y_list, "-", label=f"{worker_name}")

        plt.legend()
        plt.show()

    print(balance_workers_eth)

    plot()


if __name__ == "__main__":
    eval_data()

Is there any way I can speed this up? Currently, at 4,200 files it takes about 20 seconds to execute this program. I feel like this could be done more efficiently, because I iterate through the same list multiple times throughout the code. Any help is appreciated, thanks in advance.

Optimizing Iteration of List and Dictionaries Python

This project's goal is to parse through a large file containing data. I parse that data into a list containing a dictionary. I then do calculations based on that data and optionally plot it for visualization purposes. I use the data for simple calculations to the rewards based on their performance. I wrote this so that each worker mining to a pool will be rewarded fairly. This allows multiple people to mine on the same account for quicker payout times.

Worker_Data.Data:

Data={'pool_current_hashrate': '100215904', 'pool_average_hashrate': '61640734', 'pool_reported_hashrate': '78165786', 'current_hashrate_alex147': '47721859', 'average_hashrate_alex147': '35791394', 'reported_hashrate_alex147': '36895352', 'current_hashrate_henry147': '52494045', 'average_hashrate_henry147': '25849340', 'reported_hashrate_henry147': '41354162', 'time_stamp': '1620751617', 'eth': '0.008999485617836284', 'zil': '4.654624711084'}
Data={'pool_current_hashrate': '100215904', 'pool_average_hashrate': '61640734', 'pool_reported_hashrate': '78337185', 'current_hashrate_alex147': '47721859', 'average_hashrate_alex147': '35791394', 'reported_hashrate_alex147': '36890956', 'current_hashrate_henry147': '52494045', 'average_hashrate_henry147': '25849340', 'reported_hashrate_henry147': '41509445', 'time_stamp': '1620751678', 'eth': '0.008999485617836284', 'zil': '4.654624711084'}

I parse this file which contains tens of thousands of these lines to calculate how much 'work' each miner has done in the time between payouts and then calculate their take of the change in balance. Each line is a separate dictionary in a list.

from ast import literal_eval

PATH = "A:\\Python Project\\ezil_api\\Data\\" # path of data file
WORKER_SPLIT = 0.50  # used if start balance is not 0


def make_file(name, config_dict, type_conf, path=PATH):
    with open(path + name + "." + type_conf, "a+") as file:
        for keys, values in zip(config_dict.keys(), config_dict.values()):
            file.write(f"{keys}={values}\n")


def read_data(path, file_name):
    data = []
    with open(path + file_name, "r+") as config:
        lines = config.readlines()
        for line in lines:
            line = line[line.find("=") + 1:]
            line_data = literal_eval(line)
            data.append(line_data)
    return data


def eval_data():
    workers = []
    start_balance_eth = 0
    start_balance_zil = 0
    balance_eth = []
    balance_zil = []
    balance_delta_eth = []
    balance_delta_zil = []
    delta_eth_range = [0]
    time = []
    time_delta = []
    balance_workers_eth = {}
    balance_workers_zil = {}
    hashrate_workers = {}
    integral_worker = {}
    worker_percentage = {}
    b = {}
    odd = 0
    even = 0
    hashrate_pool = []
    balance_eth_delta = []
    total_integral = []
    temp_integral = 0

    files_workers = read_data(path=PATH, file_name="Worker_Data.Data")

    from time import time as t

    for worker_data in files_workers:
        index = files_workers.index(worker_data)

        worker_list_temp = [worker_temp[17:] for worker_temp in worker_data.keys() if "average_hashrate_" in worker_temp]

        for worker in worker_list_temp:
            if worker not in workers:
                workers.append(worker)
                hashrate_workers[worker] = []
                balance_workers_eth[worker] = 0
                balance_workers_zil[worker] = 0
                integral_worker[worker] = []
                worker_percentage[worker] = []
                b[worker] = []

        current_balance_eth = float(worker_data["eth"])
        current_balance_zil = float(worker_data["zil"])
        current_time = int(worker_data["time_stamp"])

        for worker in workers:
            current_worker_in_keys = False
            for keys in worker_data.keys():
                if worker in keys:
                    current_worker_in_keys = True
            if current_worker_in_keys:
                worker_hashrate = worker_data[f"current_hashrate_{worker}"]
                hashrate_workers[worker].append(int(worker_hashrate))
            else:
                hashrate_workers[worker].append(0)

        hashrate_pool.append(float(worker_data["pool_current_hashrate"]))

        if index > 0:
            if current_balance_eth > balance_eth[-1]:
                delta_eth = current_balance_eth - balance_eth[-1]
                balance_delta_eth.append(delta_eth)
            else:
                balance_delta_eth.append(0)
            if current_balance_zil > balance_zil[-1]:
                delta_zil = current_balance_zil - balance_zil[-1]
                balance_delta_zil.append(delta_zil)
            else:
                balance_delta_zil.append(0)

            delta_time = current_time - time[-1]
            time_delta.append(delta_time)

        else:
            start_balance_eth = current_balance_eth
            start_balance_zil = current_balance_zil
            balance_delta_eth.append(0)
            balance_delta_zil.append(0)

        balance_eth.append(current_balance_eth)
        balance_zil.append(current_balance_zil)
        time.append(current_time)

    for d_eth, index_temp in zip(balance_delta_eth, range(len(balance_delta_eth))):
        if d_eth != 0:
            delta_eth_range.append(index_temp)

    for worker in workers:
        # if it doesn't have data for balances, it splits it between workers
        if start_balance_zil > 0:
            balance_workers_zil[worker] += start_balance_zil * WORKER_SPLIT
        if start_balance_eth > 0:
            balance_workers_eth[worker] += start_balance_eth * WORKER_SPLIT

        for index in range(len(delta_eth_range)):
            # integral of hashrate
            if index > 0:
                temp_time_delta_list = time_delta[delta_eth_range[index - 1]:delta_eth_range[index]]
                temp_hashrate_list = [hashrate_workers[worker][delta_eth_range[index - 1]:delta_eth_range[index]],
                                      temp_time_delta_list]
                while len(temp_hashrate_list[0]) < len(temp_hashrate_list[1]):
                    temp_hashrate_list[0].append(0)

                temp_hashrate_len = len(temp_hashrate_list[0])
                x = temp_hashrate_list[0]
                y = temp_hashrate_list[1]
                if temp_hashrate_len > 4:
                    # do simpsons integration:
                    # start = (delta x * h[0] + delta x * h[-1])/3
                    # odd = (delta x * h[1] + delta x * h[3]...) * (4/3)
                    # evens = (delta x * h[2] + delta x h[4]...) * (2/3)
                    start = (x[0] * y[0] + x[-1] * y[-1]) * (4 / 3)

                    for i in range(len(temp_hashrate_list)):
                        if ((temp_hashrate_len - 1) > i) and (i > 0):
                            if i % 2:
                                odd += (x[i] * y[i]) * (4 / 3)
                            else:
                                even += (x[i] * y[i]) * (2 / 3)

                    integral = start + even + odd
                    integral_worker[worker].append(integral)
                    even = 0
                    odd = 0

                elif temp_hashrate_len > 1:
                    # do trapezoid integration
                    # delta x/2(h[0] + 2*h[1] + 2*h[2]... + h[-1])
                    trap_integral = ((x[0] * y[0]) + (x[-1] * y[-1]))
                    for i in range(len(temp_hashrate_list)):
                        if ((temp_hashrate_len - 1) > i) and (i > 0):
                            trap_integral += (x[i] * y[i])
                    integral_worker[worker].append(trap_integral)

                elif temp_hashrate_len == 1:
                    # do riemann sum integration
                    # y * delta x
                    riemann_integral = y[0] * x[0]
                    integral_worker[worker].append(riemann_integral)

    for index in range(len(integral_worker[workers[0]])):
        for worker in integral_worker.keys():
            temp_integral += integral_worker[worker][index]
        total_integral.append(temp_integral)
        temp_integral = 0

    for worker in workers:
        for integral_t, worker_integral in zip(total_integral, integral_worker[worker]):
            try:
                worker_percentage[worker].append(worker_integral / integral_t)
            except ZeroDivisionError:
                pass

    for delta in balance_delta_eth:
        if delta != 0:
            balance_eth_delta.append(delta)

    for worker in workers:
        for percentage, delta in zip(worker_percentage[worker], balance_eth_delta):
            balance_workers_eth[worker] += percentage * delta
            b[worker].append(balance_workers_eth[worker])

    def plot():
        import matplotlib.pyplot as plt
        time.sort(reverse=True)
        plt.xlabel = "Delta Balance index"
        plt.ylabel = "ETH Balance"
        plt.title("Index Vs ETH")

        for worker_d in workers:
            temp_x = []
            for index_d in range(len(worker_percentage[worker_d])):
                temp_x.append(index_d + 1)
            x_d = temp_x
            y_d = b[worker_d]
            plt.plot(x_d, y_d, "-", label=f"{worker_d}")

        def plot_ddx():
            d_list = []
            for local_index in range(len(delta_eth_range)):
                if local_index > 0:
                    temp_index = delta_eth_range[local_index]
                    prev_temp_index = delta_eth_range[local_index - 1]

                    delta_eth_temp = balance_eth[temp_index] - balance_eth[prev_temp_index]
                    delta_time_temp = time[temp_index] - time[prev_temp_index]
                    average_hashrate_temp = (sum(hashrate_pool[prev_temp_index:temp_index])) / (
                            temp_index - prev_temp_index)

                    d_list.append(
                        ((delta_eth_temp / delta_time_temp) / average_hashrate_temp) * 1000000 * 60 * 60 * 24 * 10)
                    # magic numbers are as follows:
                    # 1000000, convert to per mh/s,
                    # 60*60*24, convert from seconds to days,

            plt.plot(x_d, d_list, "-", label="ETH per 10 Mh/s per day")

        plt.legend()
        plt.show()

    for keys in balance_workers_eth.keys():
        print(keys, balance_workers_eth[keys])

    plot()


if __name__ == "__main__":
    eval_data()
 

Is there any way I can speed this up? Currently, at 22,000 lines it takes about 40 seconds to execute this program. The main section of code that takes up the most time is the part in which I iterate through files_workers in the line: for worker_data in files_workers: This takes up a large percentage of the time needed based on my testing and peg's a core on my cpu. Is there a more efficient approach to this problem? I appreciate any help/constructive criticism.

edited title
Link

Python Optimizing Parsing Many Files, List, and Iteration of Dictionaries and Lists, Python

Source Link

Python Parsing Many Files and Iteration of Dictionaries and Lists

This project's goal is to parse through many small files containing data. I then do calculations based on that data and optionally plot it for visualization purposes. I use the data for simple calculations to the rewards based on their performance. I wrote this so that each worker mining to a pool will be rewarded fairly. This allows multiple people to mine on the same account for quicker payout times.

Saved data file example:

<utc_time>.Workers:

pool_current_hashrate=787410673
pool_average_hashrate=882854395
pool_reported_hashrate=765620611
current_hashrate_rig0=381774870
average_hashrate_rig0=449380846
reported_hashrate_rig0=353983862
current_hashrate_rig1=405635805
average_hashrate_rig1=433473558
reported_hashrate_rig1=410862369
time_stamp=1621017328
eth=0.19041033273478156
zil=109.258827011171

Note: there can be as few as 1 miner or as many as the pool allows, each one specifies the current, average, and reported hash rate of the mining rig.

I parse thousands of these files to calculate how much 'work' each miner has done in the time between payouts and then calculate their take of the change in balance.

Code:

import os

PATH = "A:\\Python Project\\ezil_api\\Data\\" # Path of save files
WORKER_SPLIT = 0.50  # used if start balance is not 0


def read_config(type_config, path=PATH):
    def get_data(file_name):
        return_dict = {}
        with open(file_name, "r+") as config:
            lines = config.readlines()
            for line in lines:
                if "\n" in line:
                    line = line[:-1]
                key, value = line.split("=")
                return_dict[key] = value
        return return_dict

    file_list = []
    
    _, _, filenames = next(os.walk(path))
    filenames.sort()
    for files in filenames:
        index = -1 * len(type_config)
        if files[index:] == type_config:
            local_path = path + files[:index] + type_config
            file_list.append(get_data(local_path))
    return file_list


def eval_data():
    workers = []
    start_balance_eth = 0
    start_balance_zil = 0
    balance_eth = []
    balance_zil = []
    balance_delta_eth = []
    balance_delta_zil = []
    delta_eth_range = [0]
    time = []
    time_delta = []
    balance_workers_eth = {}
    balance_workers_zil = {}
    hashrate_workers = {}
    integral_worker = {}
    worker_percentage = {}
    b = {} # used for ploting, contains balance of a worker with respect to time
    odd = 0
    even = 0
    hashrate_pool = []
    total_integral = []
    temp_integral = 0
    files_workers = read_config(".Workers", path=PATH)

    for worker_data in files_workers:

        index = files_workers.index(worker_data)
        for keys in worker_data.keys():
            if "average_hashrate_" in keys:
                worker = keys[17:]
                if worker not in workers:
                    workers.append(worker)
                    hashrate_workers[worker] = []
                    balance_workers_eth[worker] = 0
                    balance_workers_zil[worker] = 0
                    integral_worker[worker] = []
                    worker_percentage[worker] = []
                    b[worker] = []

        current_balance_eth = float(worker_data["eth"])
        current_balance_zil = float(worker_data["zil"])
        current_time = int(worker_data["time_stamp"])

        for worker in workers:
            current_worker_in_keys = False
            for keys in worker_data.keys():
                if worker in keys:
                    current_worker_in_keys = True
            if current_worker_in_keys:
                worker_hashrate = worker_data[f"current_hashrate_{worker}"]
                hashrate_workers[worker].append(int(worker_hashrate))
            else:
                hashrate_workers[worker].append(0)

        hashrate_pool.append(worker_data["pool_current_hashrate"])

        if index > 0:
            if current_balance_eth > balance_eth[-1]:
                delta_eth = current_balance_eth - balance_eth[-1]
                balance_delta_eth.append(delta_eth)
            else:
                balance_delta_eth.append(0)
            if current_balance_zil > balance_zil[-1]:
                delta_zil = current_balance_zil - balance_zil[-1]
                balance_delta_zil.append(delta_zil)
            else:
                balance_delta_zil.append(0)

            delta_time = current_time - time[-1]
            time_delta.append(delta_time)

        else:
            start_balance_eth = current_balance_eth
            start_balance_zil = current_balance_zil
            balance_delta_eth.append(0)
            balance_delta_zil.append(0)

        balance_eth.append(current_balance_eth)
        balance_zil.append(current_balance_zil)
        time.append(current_time)

    for current_delta_eth, current_index in zip(balance_delta_eth, range(len(balance_delta_eth))):
        if current_delta_eth != 0:
            delta_eth_range.append(current_index)

    for worker in workers:
        # if it doesn't have data for balances, it splits it between workers
        if start_balance_zil > 0:
            balance_workers_zil[worker] += start_balance_zil * WORKER_SPLIT
        if start_balance_eth > 0:
            balance_workers_eth[worker] += start_balance_eth * WORKER_SPLIT

        for index in range(len(delta_eth_range)):
            if index != 0:
                temp_time_delta_list = time_delta[delta_eth_range[index - 1]:delta_eth_range[index]]
                temp_hashrate_list = [hashrate_workers[worker][delta_eth_range[index - 1]:delta_eth_range[index]],
                                      temp_time_delta_list]
                while len(temp_hashrate_list[0]) < len(temp_hashrate_list[1]):
                    temp_hashrate_list[0].append(0)

                temp_hashrate_len = len(temp_hashrate_list[0])
                x = temp_hashrate_list[0]
                y = temp_hashrate_list[1]
                if temp_hashrate_len > 4:
                    # do simpsons integration:
                    # start = (delta x * h[0] + delta x * h[-1])/3
                    # odd = (delta x * h[1] + delta x * h[3]...) * (4/3)
                    # evens = (delta x * h[2] + delta x h[4]...) * (2/3)
                    start = (x[0] * y[0] + x[-1] * y[-1]) * (4 / 3)

                    for i in range(len(temp_hashrate_list)):
                        if ((temp_hashrate_len - 1) > i) and (i > 0):
                            if i % 2:
                                odd += (x[i] * y[i]) * (4 / 3)
                            else:
                                even += (x[i] * y[i]) * (2 / 3)

                    integral = start + even + odd
                    integral_worker[worker].append(integral)
                    even = 0
                    odd = 0

                elif temp_hashrate_len > 1:
                    # do trapezoid integration
                    # delta x/2(h[0] + 2*h[1] + 2*h[2]... + h[-1])
                    trap_integral = ((x[0] * y[0]) + (x[-1] * y[-1]))
                    for i in range(len(temp_hashrate_list)):
                        if ((temp_hashrate_len - 1) > i) and (i > 0):
                            trap_integral += (x[i] * y[i])
                    integral_worker[worker].append(trap_integral)

                elif temp_hashrate_len == 1:
                    # do riemann sum integration
                    # y * delta x
                    riemann_integral = y[0] * x[0]
                    integral_worker[worker].append(riemann_integral)

    for index in range(len(integral_worker[workers[0]])):
        for worker in integral_worker.keys():
            temp_integral += integral_worker[worker][index]
        total_integral.append(temp_integral)
        temp_integral = 0

    for worker in workers:
        for integral_t, worker_integral in zip(total_integral, integral_worker[worker]):
            percentage = worker_integral / integral_t
            worker_percentage[worker].append(percentage)

    balance_eth_delta = []

    for delta in balance_delta_eth:
        if delta != 0:
            balance_eth_delta.append(delta)

    for worker in workers:
        for percentage, delta in zip(worker_percentage[worker], balance_eth_delta):
            balance_workers_eth[worker] += percentage * delta
            b[worker].append(balance_workers_eth[worker])

    def plot():
        import matplotlib.pyplot as plt
        plt.xlabel = "Time in Minutes"
        plt.ylabel = "Balance in ETH"
        plt.title("Time Vs Balance")

        for worker_name in workers:
            temp_x = []
            for x_value in range(len(b[worker_name])):
                temp_x.append(x_value + 1)
            x_list = temp_x
            y_list = b[worker_name]
            plt.plot(x_list, y_list, "-", label=f"{worker_name}")

        plt.legend()
        plt.show()

    print(balance_workers_eth)

    plot()


if __name__ == "__main__":
    eval_data()

Is there any way I can speed this up? Currently, at 4,200 files it takes about 20 seconds to execute this program. I feel like this could be done more efficiently, because I iterate through the same list multiple times throughout the code. Any help is appreciated, thanks in advance.