My code down below generates entries for my program, but it's veryyy slow. I'm looking to generate about 10 million, is there any way to speed it up?
FirstNames, LastNames, and Objects (.txt) are all files with one entry per line
TempList=[]
maxData=10000000 #The maximum amount of entries that can be produced
import random,pickle,time,math,statistics
FirstNames = './FirstNames.txt'
LastNames = './LastNames.txt'
Objects = './Objects.txt'
def rawCount(filename):
with open(filename, 'rb') as f:
lines = 1
buf_size = 1024 * 1024
read_f = f.raw.read
buf = read_f(buf_size)
while buf:
lines += buf.count(b'\n')
buf = read_f(buf_size)
return lines
def randomLine(filename):
num = int(random.uniform(0, rawCount(filename)))
with open(filename, 'r') as f:
for i, line in enumerate(f, 1):
if i == num:
break
return line.strip('\n')
def str_time_prop(start, end, format, prop):
stime = time.mktime(time.strptime(start, format))
etime = time.mktime(time.strptime(end, format))
ptime = stime + prop * (etime - stime)
return time.strftime(format, time.localtime(ptime))
def random_date(start, end, prop):
return str_time_prop(start, end, '%m/%d/%Y', prop)
def numCheck(question,low,high):
global errorState
errorState = True
while errorState == True:
checkString = input(question)
if len(checkString) == 0:
print("\nYou have to enter something!\n")
elif not checkString.isdigit():
print("\nThat's not a number!\n")
elif not low <= int(checkString) <= high:
print("\nThe number must be between "+str(low)+" and "+str(high)+"!\n")
else:
errorState = False
return checkString
def yesNoCheck(question):
while True:
sel = input("> ")
if sel.lower() == "y":
return True
elif sel.lower() == "n":
return False
else:
print("\nPlease type either 'y' or 'n'.\n")
last_times = []
def get_remaining_time(i, total, time):
last_times.append(time)
len_last_t = len(last_times)
if len_last_t > 500:
last_times.pop(0)
mean_t = statistics.median(last_times)
remain_s_tot = mean_t * (int(total) - i + 1)
remain_m = round(remain_s_tot / 60)
remain_s = round(remain_s_tot % 60)
#return "Time left: "+str(remain_m)+"m "+str(remain_s)+"s"
return "Time left: "+str(remain_m)+"m "+str(remain_s)+"s."
#Ordered
MainList=[]
RaffleList=[]
TempList=[]
def addstuff():
global TempList,MainList
Name = str(randomLine(FirstNames)+" "+randomLine(LastNames))
Amount = random.choice(range(1,500))
Datehire = random_date("1/1/2008", "1/1/2030", random.random())
Datereturn = random_date("2/1/2030", "1/1/2060", random.random())
RandomObject = str(randomLine(Objects))
TempList.append(Name) #Customer name
TempList.append(str(random.choice(range(10000000,99999999)))) #Reciept number
TempList.append(RandomObject) #Item hired
TempList.append(str(Amount)) #Item Amount
TempList.append(Datehire) #Date hired
TempList.append(Datereturn) #Date returned
TempList.append(str(math.ceil(int(Amount) / 25))) #Boxes needed
raffle=str(random.choice(range(1,1000)))
RaffleList.append(raffle)
MainList+=[TempList]
lista=TempList
TempList=[]
return lista,raffle
print("Random data generator\nHow many entries do you want?")
copies = numCheck("> ",1,maxData)
last_t = 0
print("Generating entries...\n")
for x in range(1,int(copies)):
t = time.time()
lista = addstuff()
last_t = time.time() - t
remain = get_remaining_time(x, copies, last_t)
if x % 250 == 0:
print(str(x)+")\t"+str(remain))
print("\nGeneration done.\n\nDo you want to save? (y/n)")
sel = yesNoCheck("> ")
if sel == True:
with open('data1.dat', 'wb') as x:
pickle.dump(MainList, x)
with open('data2.dat', 'wb') as x:
pickle.dump(RaffleList, x)
print("\nSaved.")
time.sleep(2)
else:
print("Okay, don't know why you generated but cya!")
time.sleep(2)