This project's goal is to parse through many small filesa large file containing data. I parse that data into a list containing a dictionary. I then do calculations based on that data and optionally plot it for visualization purposes. I use the data for simple calculations to the rewards based on their performance. I wrote this so that each worker mining to a pool will be rewarded fairly. This allows multiple people to mine on the same account for quicker payout times.
<utc_time>Worker_Data.WorkersData:
pool_current_hashrate=787410673
pool_average_hashrate=882854395
pool_reported_hashrate=765620611
current_hashrate_rig0=381774870
average_hashrate_rig0=449380846
reported_hashrate_rig0=353983862
current_hashrate_rig1=405635805
average_hashrate_rig1=433473558
reported_hashrate_rig1=410862369
time_stamp=1621017328
eth=0Data={'pool_current_hashrate': '100215904', 'pool_average_hashrate': '61640734', 'pool_reported_hashrate': '78165786', 'current_hashrate_alex147': '47721859', 'average_hashrate_alex147': '35791394', 'reported_hashrate_alex147': '36895352', 'current_hashrate_henry147': '52494045', 'average_hashrate_henry147': '25849340', 'reported_hashrate_henry147': '41354162', 'time_stamp': '1620751617', 'eth': '0.19041033273478156008999485617836284', 'zil': '4.654624711084'}
zil=109Data={'pool_current_hashrate': '100215904', 'pool_average_hashrate': '61640734', 'pool_reported_hashrate': '78337185', 'current_hashrate_alex147': '47721859', 'average_hashrate_alex147': '35791394', 'reported_hashrate_alex147': '36890956', 'current_hashrate_henry147': '52494045', 'average_hashrate_henry147': '25849340', 'reported_hashrate_henry147': '41509445', 'time_stamp': '1620751678', 'eth': '0.258827011171008999485617836284', 'zil': '4.654624711084'}
I parse this file which contains tens of thousands of these fileslines to calculate how much 'work' each miner has done in the time between payouts and then calculate their take of the change in balance. Each line is a separate dictionary in a list.
from ast import osliteral_eval
PATH = "A:\\Python Project\\ezil_api\\Data\\" # Pathpath of savedata filesfile
WORKER_SPLIT = 0.50 # used if start balance is not 0
def read_configmake_file(type_configname, path=PATH):
config_dict, deftype_conf, get_data(file_namepath=PATH):
return_dict = {}
with open(path + name + "." with+ open(file_nametype_conf, "r+""a+") as configfile:
for keys, linesvalues =in configzip(config_dict.readlineskeys()
for line in lines:
if "\n" in line:
line = line[:-1]
key, value = lineconfig_dict.splitvalues("=")):
return_dict[key] file.write(f"{keys}= value
return return_dict{values}\n")
file_list = []
_, _, filenames = next(os.walk(path))
filenames.sort()
for files in filenames:
index = -1 * len(type_config)
if files[index:] == type_config:
local_path = path + files[:index] + type_config
file_list.append(get_data(local_path))
return file_list
def read_data(path, file_name):
data = []
with open(path + file_name, "r+") as config:
lines = config.readlines()
for line in lines:
line = line[line.find("=") + 1:]
line_data = literal_eval(line)
data.append(line_data)
return data
def eval_data():
workers = []
start_balance_eth = 0
start_balance_zil = 0
balance_eth = []
balance_zil = []
balance_delta_eth = []
balance_delta_zil = []
delta_eth_range = [0]
time = []
time_delta = []
balance_workers_eth = {}
balance_workers_zil = {}
hashrate_workers = {}
integral_worker = {}
worker_percentage = {}
b = {} # used for ploting, contains balance of a worker with respect to time
odd = 0
even = 0
hashrate_pool = []
balance_eth_delta = []
total_integral = []
temp_integral = 0
files_workers = read_config(".Workers", path=PATH)
forfiles_workers worker_data= inread_data(path=PATH, files_workers:file_name="Worker_Data.Data")
from time import time as t
for worker_data in files_workers:
index = files_workers.index(worker_data)
for keys in worker_data.keys():
if "average_hashrate_" in keys:
worker = keys[17:]
if worker not in workers:
workers.append(worker)
hashrate_workers[worker] = []
balance_workers_eth[worker] = 0
balance_workers_zil[worker] = 0
integral_worker[worker] = []
worker_percentage[worker] = []
b[worker] = []
worker_list_temp = [worker_temp[17:] for worker_temp in worker_data.keys() if "average_hashrate_" in worker_temp]
for worker in worker_list_temp:
if worker not in workers:
workers.append(worker)
hashrate_workers[worker] = []
balance_workers_eth[worker] = 0
balance_workers_zil[worker] = 0
integral_worker[worker] = []
worker_percentage[worker] = []
b[worker] = []
current_balance_eth = float(worker_data["eth"])
current_balance_zil = float(worker_data["zil"])
current_time = int(worker_data["time_stamp"])
for worker in workers:
current_worker_in_keys = False
for keys in worker_data.keys():
if worker in keys:
current_worker_in_keys = True
if current_worker_in_keys:
worker_hashrate = worker_data[f"current_hashrate_{worker}"]
hashrate_workers[worker].append(int(worker_hashrate))
else:
hashrate_workers[worker].append(0)
hashrate_pool.append(float(worker_data["pool_current_hashrate"]))
if index > 0:
if current_balance_eth > balance_eth[-1]:
delta_eth = current_balance_eth - balance_eth[-1]
balance_delta_eth.append(delta_eth)
else:
balance_delta_eth.append(0)
if current_balance_zil > balance_zil[-1]:
delta_zil = current_balance_zil - balance_zil[-1]
balance_delta_zil.append(delta_zil)
else:
balance_delta_zil.append(0)
delta_time = current_time - time[-1]
time_delta.append(delta_time)
else:
start_balance_eth = current_balance_eth
start_balance_zil = current_balance_zil
balance_delta_eth.append(0)
balance_delta_zil.append(0)
balance_eth.append(current_balance_eth)
balance_zil.append(current_balance_zil)
time.append(current_time)
for current_delta_ethd_eth, current_indexindex_temp in zip(balance_delta_eth, range(len(balance_delta_eth))):
if current_delta_ethd_eth != 0:
delta_eth_range.append(current_indexindex_temp)
for worker in workers:
# if it doesn't have data for balances, it splits it between workers
if start_balance_zil > 0:
balance_workers_zil[worker] += start_balance_zil * WORKER_SPLIT
if start_balance_eth > 0:
balance_workers_eth[worker] += start_balance_eth * WORKER_SPLIT
for index in range(len(delta_eth_range)):
# integral of hashrate
if index !=> 0:
temp_time_delta_list = time_delta[delta_eth_range[index - 1]:delta_eth_range[index]]
temp_hashrate_list = [hashrate_workers[worker][delta_eth_range[index - 1]:delta_eth_range[index]],
temp_time_delta_list]
while len(temp_hashrate_list[0]) < len(temp_hashrate_list[1]):
temp_hashrate_list[0].append(0)
temp_hashrate_len = len(temp_hashrate_list[0])
x = temp_hashrate_list[0]
y = temp_hashrate_list[1]
if temp_hashrate_len > 4:
# do simpsons integration:
# start = (delta x * h[0] + delta x * h[-1])/3
# odd = (delta x * h[1] + delta x * h[3]...) * (4/3)
# evens = (delta x * h[2] + delta x h[4]...) * (2/3)
start = (x[0] * y[0] + x[-1] * y[-1]) * (4 / 3)
for i in range(len(temp_hashrate_list)):
if ((temp_hashrate_len - 1) > i) and (i > 0):
if i % 2:
odd += (x[i] * y[i]) * (4 / 3)
else:
even += (x[i] * y[i]) * (2 / 3)
integral = start + even + odd
integral_worker[worker].append(integral)
even = 0
odd = 0
elif temp_hashrate_len > 1:
# do trapezoid integration
# delta x/2(h[0] + 2*h[1] + 2*h[2]... + h[-1])
trap_integral = ((x[0] * y[0]) + (x[-1] * y[-1]))
for i in range(len(temp_hashrate_list)):
if ((temp_hashrate_len - 1) > i) and (i > 0):
trap_integral += (x[i] * y[i])
integral_worker[worker].append(trap_integral)
elif temp_hashrate_len == 1:
# do riemann sum integration
# y * delta x
riemann_integral = y[0] * x[0]
integral_worker[worker].append(riemann_integral)
for index in range(len(integral_worker[workers[0]])):
for worker in integral_worker.keys():
temp_integral += integral_worker[worker][index]
total_integral.append(temp_integral)
temp_integral = 0
for worker in workers:
for integral_t, worker_integral in zip(total_integral, integral_worker[worker]):
percentagetry:
= worker_integral / integral_t
worker_percentage[worker].append(percentageworker_integral / integral_t)
except ZeroDivisionError:
balance_eth_delta = [] pass
for delta in balance_delta_eth:
if delta != 0:
balance_eth_delta.append(delta)
for worker in workers:
for percentage, delta in zip(worker_percentage[worker], balance_eth_delta):
balance_workers_eth[worker] += percentage * delta
b[worker].append(balance_workers_eth[worker])
def plot():
import matplotlib.pyplot as plt
time.sort(reverse=True)
plt.xlabel = "Time"Delta inBalance Minutes"index"
plt.ylabel = "Balance in"ETH ETH"Balance"
plt.title("Time"Index Vs Balance"ETH")
for worker_nameworker_d in workers:
temp_x = []
for x_valueindex_d in range(len(b[worker_name]worker_percentage[worker_d])):
temp_x.append(x_valueindex_d + 1)
x_listx_d = temp_x
y_listy_d = b[worker_name]b[worker_d]
plt.plot(x_listx_d, y_listy_d, "-", label=f"{worker_nameworker_d}")
def plot_ddx():
d_list = []
for local_index in range(len(delta_eth_range)):
if local_index > 0:
temp_index = delta_eth_range[local_index]
prev_temp_index = delta_eth_range[local_index - 1]
delta_eth_temp = balance_eth[temp_index] - balance_eth[prev_temp_index]
delta_time_temp = time[temp_index] - time[prev_temp_index]
average_hashrate_temp = (sum(hashrate_pool[prev_temp_index:temp_index])) / (
temp_index - prev_temp_index)
d_list.append(
((delta_eth_temp / delta_time_temp) / average_hashrate_temp) * 1000000 * 60 * 60 * 24 * 10)
# magic numbers are as follows:
# 1000000, convert to per mh/s,
# 60*60*24, convert from seconds to days,
plt.plot(x_d, d_list, "-", label="ETH per 10 Mh/s per day")
plt.legend()
plt.show()
for keys in balance_workers_eth.keys():
print(balance_workers_ethkeys, balance_workers_eth[keys])
plot()
if __name__ == "__main__":
eval_data()
Is there any way I can speed this up? Currently, at 422,200 files000 lines it takes about 2040 seconds to execute this program. I feel like this could be done more efficiently, becauseThe main section of code that takes up the most time is the part in which I iterate through files_workers in the same list multiple times throughoutline: for worker_data in files_workers: This takes up a large percentage of the codetime needed based on my testing and peg's a core on my cpu. AnyIs there a more efficient approach to this problem? I appreciate any help is appreciated, thanks in advance/constructive criticism.