I have been researching but couldn't solve this one with the lack of documentation on multiprocessing.Manager().dict(). I'm trying to download tickers information from yahoo finance using multiprocessing and have constructed the class below:
# Import packages
import json
import math
import multiprocessing as mp
import os
import pandas as pd
import yfinance as yf
def get_ticker_data_object(tickers: list, mp_dict: mp.Manager().dict):
"""
Function to get a single ticker info from yahoo finance API
:param tickers: list of tickers of a stock
:param mp_dict: mp.Manager().dict object for multiprocess
:return: A dictionary to q
"""
for ticker in tickers:
ticker_info = yf.Ticker(ticker)
mp_dict[ticker] = {
f'yf_ticker': ticker_info,
f'{ticker}_info': ticker_info.info
}
class data_manager:
"""
A class to manage data.
...
Attributes
----------
indexes : list
dictionary of
Methods
-------
read_index_tickers():
Read stock tickers.
download_data():
Downloads data for each key (ticker) in self.data and then updates a dictionary of historical data and company KPI.
"""
def __init__(self):
"""
Constructs all the necessary attributes for data_manager.
"""
with open(os.path.join(os.getcwd(), 'model\configurations\index_data_config.json')) as f:
self.indexes = json.load(f)
self.data = {}
def get_tickers(self):
"""
Get all tickers.
"""
self.data = [*A list of n tickers*]
def get_tickers_data_objects(self):
"""
Get ticker objects for tickers.
"""
get_ticker_info_processes = []
mp_dict = mp.Manager().dict()
for ticker in self.data.keys():
mp_dict[ticker] = {}
list_of_tickers = list(self.data.keys())
n = (mp.cpu_count()-2)
size = math.ceil(len(list_of_tickers) / n)
broken_down_list = list(
map(lambda x: list_of_tickers[x * size:x * size + size],
list(range(n)))
)
for tickers in broken_down_list:
process = mp.Process(
target=get_ticker_data_object,
args=(tickers, mp_dict,),
)
get_ticker_info_processes.append(process)
process.start()
for process in get_ticker_info_processes:
process.join()
print('Get info out of process result:')
print(mp_dict)
And this class is used in main.py as below:
if __name__ == '__main__':
from model.download_data import *
data_manager = data_manager()
data_manager.get_tickers()
data_manager.get_tickers_data_objects()
By the looks of it I'm able to start all the processes but each of them are giving me a runtime error as below:
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
I'm trying to get a dictionary in below format:
{
'ticker_1': {
'yf_ticker': ticker_1_info,
'ticker_info': ticker_1_info.info
},
'ticker_2': {
'yf_ticker': ticker_2_info,
'ticker_info': ticker_2_info.info
},
...
}
Am I missing something here? Thanks.
if __name__ ...look like ? can you move theimportabove theif __name__ ....mainbeing loaded by the actual script?