I'm trying to do zonal statistics in Python API when I encounter error message:
EEException: Computation timed out
while running this cell of code to convert feature collection into data frame:
df_asc = geemap.ee_to_df(zonal_asc)
df_des = geemap.ee_to_df(zonal_des)
My overall code somehow looks like this:
import ee
import geemap
import pandas as pd
import matplotlib.pyplot as plt
import os
ee.Authenticate()
ee.Initialize()
fc = ee.FeatureCollection('projects/project-id/assets/assets-name')
bounds = fc.geometry().bounds()
# start = '2024-07-01'
start = '2024-01-01'
end = '2024-06-30'
s1 = (ee.ImageCollection('COPERNICUS/S1_GRD')
.filterDate(start, end)
.filterBounds(bounds)
.filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))
.filter(ee.Filter.eq('instrumentMode', 'IW'))
.select('VV'))
ascending = s1.filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))
descending = s1.filter(ee.Filter.eq('orbitProperties_pass', 'DESCENDING'))
def process_sar(image):
sigma = ee.Image(10).pow(image.divide(10)).rename('sigma')
filtered = sigma.focalMean(30, 'square', 'meters')
return filtered.copyProperties(image, image.propertyNames())
sigma_asc = ascending.map(process_sar)
sigma_des = descending.map(process_sar)
def normalize_to_moisture(sigma_collection, band_name):
min_img = sigma_collection.min()
max_img = sigma_collection.max()
def normalize(image):
moisture = image.subtract(min_img).divide(max_img.subtract(min_img))
return moisture.rename(band_name).copyProperties(image, ['system:time_start'])
return sigma_collection.map(normalize)
soil_moisture_asc = normalize_to_moisture(sigma_asc, 'SoilMoisture_ASC')
soil_moisture_des = normalize_to_moisture(sigma_des, 'SoilMoisture_DES')
def zonal_stats_per_image(image, band_name):
date = image.date()
doy = date.getRelative('day', 'year')
stats = image.reduceRegions(
collection=fc,
reducer=ee.Reducer.median(),
scale=30,
tileScale=16
)
return stats.map(lambda f: f.set('DOY', doy).set('date', date.format('YYYY-MM-dd')))
zonal_asc = soil_moisture_asc.map(lambda img: zonal_stats_per_image(img, 'SoilMoisture_ASC')).flatten()
zonal_des = soil_moisture_des.map(lambda img: zonal_stats_per_image(img, 'SoilMoisture_DES')).flatten()
df_asc = geemap.ee_to_df(zonal_asc)
df_des = geemap.ee_to_df(zonal_des)
df_asc['date'] = pd.to_datetime(df_asc['date'])
df_des['date'] = pd.to_datetime(df_des['date'])
df_asc = df_asc.sort_values('DOY')
df_des = df_des.sort_values('DOY')
agg_asc = df_asc.groupby(['code', 'öko', 'DOY'])['median'].agg(['median', 'std']).reset_index()
agg_des = df_des.groupby(['code', 'öko', 'DOY'])['median'].agg(['median', 'std']).reset_index()
def plot_soil_moisture(agg_asc, agg_des, save_plots=False, output_dir=None):
unique_codes = sorted(agg_asc['code'].unique())
unique_okos = sorted(agg_asc['öko'].unique())
fig, axes = plt.subplots(len(unique_codes), len(unique_okos), figsize=(15, len(unique_codes) * 5))
for i, code in enumerate(unique_codes):
for j, oko in enumerate(unique_okos):
subset_asc = agg_asc[(agg_asc['code'] == code) & (agg_asc['öko'] == oko)].sort_values('DOY')
subset_des = agg_des[(agg_des['code'] == code) & (agg_des['öko'] == oko)].sort_values('DOY')
ax = axes[i, j] if len(unique_codes) > 1 or len(unique_okos) > 1 else axes
if not subset_asc.empty:
ax.plot(subset_asc['DOY'], subset_asc['median'], color='blue', linewidth=2, label='ASC Median')
ax.fill_between(subset_asc['DOY'],
subset_asc['median'] - subset_asc['std'].fillna(0),
subset_asc['median'] + subset_asc['std'].fillna(0),
color='blue', alpha=0.2, label='ASC Std')
if not subset_des.empty:
ax.plot(subset_des['DOY'], subset_des['median'], color='orange', linewidth=2, label='DES Median')
ax.fill_between(subset_des['DOY'],
subset_des['median'] - subset_des['std'].fillna(0),
subset_des['median'] + subset_des['std'].fillna(0),
color='orange', alpha=0.2, label='DES Std')
ax.set_title(f'{oko}: CODE {code} Soil Moisture', fontsize=13)
ax.set_xlabel('Day of Year', fontsize=12)
ax.set_ylabel('Soil Moisture Index (0-1)', fontsize=12)
ax.grid(True, alpha=0.3)
ax.legend(fontsize=10)
ax.set_xlim(0, 366)
ax.set_ylim(0, 1)
plt.suptitle('Temporal Soil Moisture by Crop Code and Öko (2024)', fontsize=14, y=1)
plt.tight_layout()
if save_plots:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
plt.savefig(os.path.join(output_dir, f's1_sm_plot.png'),
dpi=300, bbox_inches='tight')
plt.show()
# plot_soil_moisture(agg_asc, agg_des)
I have try to change the reducer, tileScale, reducing the scale and setting bestEffort parameter to true, but it took ages and always ended up showing the error message. However, when I try to change the timespan into just 6 month or dropping some of the columns it works, but still took ages to process and I need one whole year as well as all of the columns.
My feature collection have around 8000 polygons.
Is there any way I can overcome this problem and reduce the computation time?