Revisions to

added 198 characters in body

Source Link

edited Sep 18 at 18:01

148
3

This took 0.23 seconds. I have HP EliteBook i5-processor 8 gb ram, 500 gb ssd.

Dictionary data structure is used to reach O(n) time complexity.

I noticed from the other entries here that Python is far from the fastest.


import pandas as pd
import time
import statistics
class Mode:
    def __init__(self):
        self.data=[]
        self.numbers={}
        self.mode1=None
        self.largest_count=0

    def read_data(self):
        # Ignore the header, read the rest of the file:
        df = pd.read_csv('1M_random_numbers.txt', header=None, sep=r'\s+')
        self.data=df[0].to_list()
    
        
        
              
       
    def count(self):
        #print('mode statistics function: ',statistics.mode(self.data))
        
        for x in self.data:
            if x not in self.numbers:
                self.numbers[x]=1
            else:
                self.numbers[x]+=1
            if self.numbers[x]>self.largest_count:
                self.largest_count=self.numbers[x]
                self.mode1=x
            
    def mode(self):
        return self.mode1

if __name__ == "__main__":
    m = Mode()
    m.read_data()
    start=time.time()
    m.count()
    end=time.time()
    print(m.mode()) # 4
   
    print('Time in seconds: ', round(end-start,2))

This took 0.23 seconds.


import pandas as pd
import time
import statistics
class Mode:
    def __init__(self):
        self.data=[]
        self.numbers={}
        self.mode1=None
        self.largest_count=0

    def read_data(self):
        # Ignore the header, read the rest of the file:
        df = pd.read_csv('1M_random_numbers.txt', header=None, sep=r'\s+')
        self.data=df[0].to_list()
    
        
        
              
       
    def count(self):
        #print('mode statistics function: ',statistics.mode(self.data))
        
        for x in self.data:
            if x not in self.numbers:
                self.numbers[x]=1
            else:
                self.numbers[x]+=1
            if self.numbers[x]>self.largest_count:
                self.largest_count=self.numbers[x]
                self.mode1=x
            
    def mode(self):
        return self.mode1

if __name__ == "__main__":
    m = Mode()
    m.read_data()
    start=time.time()
    m.count()
    end=time.time()
    print(m.mode()) # 4
   
    print('Time in seconds: ', round(end-start,2))

This took 0.23 seconds. I have HP EliteBook i5-processor 8 gb ram, 500 gb ssd.

Dictionary data structure is used to reach O(n) time complexity.

I noticed from the other entries here that Python is far from the fastest.


import pandas as pd
import time
import statistics
class Mode:
    def __init__(self):
        self.data=[]
        self.numbers={}
        self.mode1=None
        self.largest_count=0

    def read_data(self):
        # Ignore the header, read the rest of the file:
        df = pd.read_csv('1M_random_numbers.txt', header=None, sep=r'\s+')
        self.data=df[0].to_list()
    
        
        
              
       
    def count(self):
        #print('mode statistics function: ',statistics.mode(self.data))
        
        for x in self.data:
            if x not in self.numbers:
                self.numbers[x]=1
            else:
                self.numbers[x]+=1
            if self.numbers[x]>self.largest_count:
                self.largest_count=self.numbers[x]
                self.mode1=x
            
    def mode(self):
        return self.mode1

if __name__ == "__main__":
    m = Mode()
    m.read_data()
    start=time.time()
    m.count()
    end=time.time()
    print(m.mode()) # 4
   
    print('Time in seconds: ', round(end-start,2))

deleted 175 characters in body

Source Link

edited Sep 18 at 16:32

Veli-Matti Sorvala

148
3

It is possible to count mode from list in linear time O(n), by Using dictionary data structure. ItThis took 0.23 seconds (list length 1000 000). I have Hp Elitebook i5 processor. I used Python language.


import pandas as pd
import time
import statistics
class Mode:
    def __init__(self):
        self.data=[]
        self.numbers={}
        self.mode1=None
        self.largest_count=0

    def read_data(self):
        # Ignore the header, read the rest of the file:
        df = pd.read_csv('1M_random_numbers.txt', header=None, sep=r'\s+')
        self.data=df[0].to_list()
    
        
        
              
       
    def count(self):
        #print('mode statistics function: ',statistics.mode(self.data))
        
        for x in self.data:
            if x not in self.numbers:
                self.numbers[x]=1
            else:
                self.numbers[x]+=1
            if self.numbers[x]>self.largest_count:
                self.largest_count=self.numbers[x]
                self.mode1=x
            
    def mode(self):
        return self.mode1

if __name__ == "__main__":
    m = Mode()
    m.read_data()
    start=time.time()
    m.count()
    end=time.time()
    print(m.mode()) # 4
   
    print('Time in seconds: ', round(end-start,2))

It is possible to count mode from list in linear time O(n), by Using dictionary data structure. It took 0.23 seconds (list length 1000 000). I have Hp Elitebook i5 processor. I used Python language.


import pandas as pd
import time
import statistics
class Mode:
    def __init__(self):
        self.data=[]
        self.numbers={}
        self.mode1=None
        self.largest_count=0

    def read_data(self):
        # Ignore the header, read the rest of the file:
        df = pd.read_csv('1M_random_numbers.txt', header=None, sep=r'\s+')
        self.data=df[0].to_list()
    
        
        
              
       
    def count(self):
        #print('mode statistics function: ',statistics.mode(self.data))
        
        for x in self.data:
            if x not in self.numbers:
                self.numbers[x]=1
            else:
                self.numbers[x]+=1
            if self.numbers[x]>self.largest_count:
                self.largest_count=self.numbers[x]
                self.mode1=x
            
    def mode(self):
        return self.mode1

if __name__ == "__main__":
    m = Mode()
    m.read_data()
    start=time.time()
    m.count()
    end=time.time()
    print(m.mode()) # 4
   
    print('Time in seconds: ', round(end-start,2))

This took 0.23 seconds.


import pandas as pd
import time
import statistics
class Mode:
    def __init__(self):
        self.data=[]
        self.numbers={}
        self.mode1=None
        self.largest_count=0

    def read_data(self):
        # Ignore the header, read the rest of the file:
        df = pd.read_csv('1M_random_numbers.txt', header=None, sep=r'\s+')
        self.data=df[0].to_list()
    
        
        
              
       
    def count(self):
        #print('mode statistics function: ',statistics.mode(self.data))
        
        for x in self.data:
            if x not in self.numbers:
                self.numbers[x]=1
            else:
                self.numbers[x]+=1
            if self.numbers[x]>self.largest_count:
                self.largest_count=self.numbers[x]
                self.mode1=x
            
    def mode(self):
        return self.mode1

if __name__ == "__main__":
    m = Mode()
    m.read_data()
    start=time.time()
    m.count()
    end=time.time()
    print(m.mode()) # 4
   
    print('Time in seconds: ', round(end-start,2))

Source Link

created Sep 18 at 15:52

Veli-Matti Sorvala

148
3

It is possible to count mode from list in linear time O(n), by Using dictionary data structure. It took 0.23 seconds (list length 1000 000). I have Hp Elitebook i5 processor. I used Python language.


import pandas as pd
import time
import statistics
class Mode:
    def __init__(self):
        self.data=[]
        self.numbers={}
        self.mode1=None
        self.largest_count=0

    def read_data(self):
        # Ignore the header, read the rest of the file:
        df = pd.read_csv('1M_random_numbers.txt', header=None, sep=r'\s+')
        self.data=df[0].to_list()
    
        
        
              
       
    def count(self):
        #print('mode statistics function: ',statistics.mode(self.data))
        
        for x in self.data:
            if x not in self.numbers:
                self.numbers[x]=1
            else:
                self.numbers[x]+=1
            if self.numbers[x]>self.largest_count:
                self.largest_count=self.numbers[x]
                self.mode1=x
            
    def mode(self):
        return self.mode1

if __name__ == "__main__":
    m = Mode()
    m.read_data()
    start=time.time()
    m.count()
    end=time.time()
    print(m.mode()) # 4
   
    print('Time in seconds: ', round(end-start,2))

Collectives™ on Stack Overflow

Return to discussion