I an trying to create a plot for all csvs in a directory. When I run the script below, my RAM memory consumption just goes up monotonically. The code is simple albeit a bit longer:
import multiprocessing
import os
from glob import glob
import pandas as pd
from matplotlib import pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
root_data_dir = '/home/user1/data/20191121'
root_img_dir = os.path.join(root_data_dir, 'figures')
if not os.path.exists(root_img_dir):
os.mkdir(root_img_dir)
def plot_file(file):
print("Processing {}".format(file))
df = pd.read_csv(file, parse_dates=['date'], index_col='date', compression='xz')
plt.plot(df)
base_file = os.path.splitext(os.path.basename(file))[0]
img_file = os.path.join(root_img_dir, base_file + '.png')
plt.title(base_file)
plt.savefig(img_file, dpi=300)
print("Saved {}".format(img_file))
plt.close()
multiprocessing.Pool(16).map(plot_file, sorted(glob(os.path.join(root_data_dir, '*.csv.xz'))))
del dfafterplt.close()but didn't work. The system has 32GB of RAM and I ended up consuming all of it within a short period of time after which the machine became unresponsive (I am running a recent version of linux for reference)