This took 0.23 seconds. I have HP EliteBook i5-processor 8 gb ram, 500 gb ssd.
Dictionary data structure is used to reach O(n) time complexity.
I noticed from the other entries here that Python is far from the fastest.
import pandas as pd
import time
import statistics
class Mode:
def __init__(self):
self.data=[]
self.numbers={}
self.mode1=None
self.largest_count=0
def read_data(self):
# Ignore the header, read the rest of the file:
df = pd.read_csv('1M_random_numbers.txt', header=None, sep=r'\s+')
self.data=df[0].to_list()
def count(self):
#print('mode statistics function: ',statistics.mode(self.data))
for x in self.data:
if x not in self.numbers:
self.numbers[x]=1
else:
self.numbers[x]+=1
if self.numbers[x]>self.largest_count:
self.largest_count=self.numbers[x]
self.mode1=x
def mode(self):
return self.mode1
if __name__ == "__main__":
m = Mode()
m.read_data()
start=time.time()
m.count()
end=time.time()
print(m.mode()) # 4
print('Time in seconds: ', round(end-start,2))