1. Home
2. Questions
3. Unanswered
4. AI Assist Labs
5. Tags
7. Chat
8. Users
10. Companies
Stack Internal

Stack Overflow for Teams is now called Stack Internal. Bring the best of human thought and AI automation together at your work.
Try for free Learn more
Stack Internal
Bring the best of human thought and AI automation together at your work. Learn more

Return to Question

deleted 11 characters in body

Source Link

edited Aug 11, 2015 at 3:21

dermen

353
2
3
9

df.get_lines_standard()[:5] # first five rows in dataframe
#[u'   A         B       C',
# u' 504  0.924385      <3',
# u' 388  0.285854    0.16',
# u' 984  0.254156    0.16',
# u' 446  0.472621  PANDAS']
# ...

df.get_lines_fast_struct()[:5] 
#['   A                 B      C',
# ' 504      0.9243853594     <3',
# ' 388    0.285854082778   0.16',
# ' 984    0.254155910401   0.16',
# ' 446    0.472621088021 PANDAS']
# ...

df.get_lines_fast_unstruct()[:5]
#['A B C',
# '504 0.9243853594 <3',
# '388 0.285854082778 0.16',
# '984 0.254155910401 0.16',
# '446 0.472621088021 PANDAS']
# ...

df.get_lines_standard()[:5] # first five rows in dataframe
#[u'   A         B       C',
# u' 504  0.924385      <3',
# u' 388  0.285854    0.16',
# u' 984  0.254156    0.16',
# u' 446  0.472621  PANDAS']

df.get_lines_fast_struct()[:5] 
#['   A                 B      C',
# ' 504      0.9243853594     <3',
# ' 388    0.285854082778   0.16',
# ' 984    0.254155910401   0.16',
# ' 446    0.472621088021 PANDAS']

df.get_lines_fast_unstruct()[:5]
#['A B C',
# '504 0.9243853594 <3',
# '388 0.285854082778 0.16',
# '984 0.254155910401 0.16',
# '446 0.472621088021 PANDAS']

df.get_lines_standard()
#[u'   A         B       C',
# u' 504  0.924385      <3',
# u' 388  0.285854    0.16',
# u' 984  0.254156    0.16',
# u' 446  0.472621  PANDAS']
# ...

df.get_lines_fast_struct()
#['   A                 B      C',
# ' 504      0.9243853594     <3',
# ' 388    0.285854082778   0.16',
# ' 984    0.254155910401   0.16',
# ' 446    0.472621088021 PANDAS']
# ...

df.get_lines_fast_unstruct()
#['A B C',
# '504 0.9243853594 <3',
# '388 0.285854082778 0.16',
# '984 0.254155910401 0.16',
# '446 0.472621088021 PANDAS']
# ...

deleted 13 characters in body

Source Link

edited Aug 11, 2015 at 2:52

dermen

353
2
3
9

import pandas


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line

import pandas


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line

import pandas


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs = self.to_records(index=False) # convert dataframe to array of records
        str_data = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data = [self._format_line(row) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line

added 4 characters in body

Source Link

edited Aug 11, 2015 at 2:41

dermen

353
2
3
9

import pandas
np = pandas.np


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line

import random
import numpy

#SOME TEST DATA
df = DataFrame2({'A':npnumpy.random.randint(0,1000,1000), 
                 'B':npnumpy.random.random(1000), 
                 'C':[random.choice(['EYE', '<3', 'PANDAS', '0.16']) 
                      for _ in range(1000)]})

import pandas
np = pandas.np


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line

import random

#SOME TEST DATA
df = DataFrame2({'A':np.random.randint(0,1000,1000), 
                 'B':np.random.random(1000), 
                 'C':[random.choice(['EYE', '<3', 'PANDAS', '0.16']) 
                      for _ in range(1000)]})

import pandas


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line

import random
import numpy

#SOME TEST DATA
df = DataFrame2({'A':numpy.random.randint(0,1000,1000), 
                 'B':numpy.random.random(1000), 
                 'C':[random.choice(['EYE', '<3', 'PANDAS', '0.16']) 
                      for _ in range(1000)]})

added 51 characters in body

Source Link

edited Aug 11, 2015 at 2:32

dermen

353
2
3
9

Loading

Tweeted twitter.com/#!/StackCodeReview/status/629489535961972736

occurred Aug 7, 2015 at 3:09

edited title

Link

edited Aug 5, 2015 at 22:01

dermen

353
2
3
9

Loading

added 116 characters in body

Source Link

edited Aug 5, 2015 at 21:50

dermen

353
2
3
9

Loading

edited tags

Link

edited Aug 5, 2015 at 20:37

200_success

145.7k
22
191
481

Loading

added 4 characters in body

Source Link

edited Aug 5, 2015 at 19:49

dermen

353
2
3
9

Loading

deleted 42 characters in body; edited title

Source Link

edited Aug 5, 2015 at 0:53

Jamal

35.2k
13
134
238

Loading

Source Link

asked Aug 5, 2015 at 0:47

dermen

353
2
3
9

Loading