you can append pandas dataframe to an existing sheet with openpyxl or overwrite it.
but it would help if you were cautious about what to do in case of:
- The file (.xlsx) does not exist
- The sheet does not exist
- Include the header of the
df in each append or not.
That is why it would be better to create a custom function to help with those case
The custom function help handling:
- creating a
file if does not exist
- creating a
Sheet if does not exist
- not repeating the header of the df for each append
- copy the existing Excel file into the memory
- Selecting the target
Sheet to modify
- append the df content
- save the Excel file after the changes
creating a custom function "df2xlsx", One of the demerits of this function is that it has a long execution time
# stdlib imports ------------
import os
# Third-party imports --------
import pandas as pd
import openpyxl
from openpyxl.utils.dataframe import dataframe_to_rows
df2xlsx function
def df2xlsx(df:pd.DataFrame, file:str, sheet_name:str, append:bool = True) -> None:
'''
Parameters
----------
df : pd.DataFrame
target dataframe.
file : str, optional
File path.
sheet_name : str, optional
target sheet.
append : bool, optional
[True] Append data, [False] Overwriting the existing sheet withe df data. The default is True.
Returns
-------
None
'''
# if the file exists
if os.path.isfile(file):
# read the existing file
wb = openpyxl.load_workbook(file)
# If sheet_name not in the file, create one
if sheet_name not in wb.sheetnames:
wb.create_sheet(sheet_name)
# Add the header
header = True
elif sheet_name in wb.sheetnames:
# if append contents and the file exists
if append :
# remove the header, there is one already
header = False
elif append == False:
# Add the header
header = True
# remove sheetnames
wb.remove(wb[sheet_name])
# create one
wb.create_sheet(sheet_name)
# if the file exists
elif os.path.isfile(file) == False:
# create new workbook in memory
wb = openpyxl.Workbook()
# Add the header
header = True
# If sheet_name not in the file, rename the active sheet
if sheet_name not in wb.sheetnames :
## select active sheet
ws = wb.active
ws.title = sheet_name
## select sheet_name sheet
ws = wb[sheet_name]
## write the df to the sheet
for r in dataframe_to_rows(df, index=False, header=header):
ws.append(r)
# saving xlsx
wb.save(file)
code case #1 [appending data to the existing Sheet]
'''
some modifications or adding data from other sources
'''
# append df_latency to existing xlsx file in "Latency" sheet name
df2xlsx(df = df_latency, file = "C:\Claro\Pre-Sales\E2E Optimization\Transport\Transport Network Dashboard.xlsx", sheet_name = "Latency", append = True)
code case #2 [overwiting the existing Sheet]
'''
some modifications or adding data from other sources
'''
# overwrite df_latency to existing xlsx file in "Latency" sheet name
df2xlsx(df = df_latency, file = "C:\Claro\Pre-Sales\E2E Optimization\Transport\Transport Network Dashboard.xlsx", sheet_name = "Latency", append = False)