I have a dataframe as df, i want to split my activities into different functions so that i can use those functions into future programs
# check if dataframe has duplicates
def duplicate_check ():
global df
df = df.drop_duplicates(['datetime', 'tagname'])
df.drop(['tagname'], axis=1, inplace=True)
return df
df = duplicate_check()
# Split my dataframe array column to individual column
def array_split():
global df
date = df['datetime']
df = df['value'] \
.str.split('\t', expand=True).fillna('0') \
.replace(r'\s+|\\n', ' ', regex=True) \
.apply(pd.to_numeric)
df['datetime'] = date # Join date back to dataframe
return df
df = array_split()
# split dataframe df to df and df_spec
def remove_duplicate_spec():
global df, df_spec
df_spec = df.loc[df[123].isin([1])]
df = df.loc[df[123].isin([0])]
df_spec = df_spec.drop_duplicates(119)
return df, df_spec
df, df_spec = remove_duplicate_spec()
Question: Should i declare global df/ df_spec inside each function? Is this the best practice? or how can I optimize the code further