alternative without merge_asof
Since apparently merge_asof doesn't work as good with duplicate data, here a variant with a loop. If there are a lot of weekends, this might be slower, but I reckon it will still be faster than the original code
def mark_runin(time, week_endpoints, run_in, direction='backward'):
mask = np.zeros_like(time, dtype=bool)
for point in week_endpoints:
interval = (point, point + run_in) if direction == 'forward' else (point - run_in, point)
mask |= time.between(*interval).values
return mask
mark_runin(time, weekend_start, run_in)
array([False, True, True, True, True, True, True, False, False, False, False, False], dtype=bool)
def drop_irregular_gaps2(data, gap_max, run_in, time_label = 'time'):
times = data[time_label]
weekend_start, week_start = find_weekend(times, gap_max)
before_weekend = mark_runin(times, weekend_start, run_in, direction = 'backward')
after_weekend = mark_runin(times, week_start, run_in, direction = 'forward')
to_drop = before_weekend | after_weekend
return data[~to_drop]
drop_irregular_gaps2(data, gap_max, run_in)
time values 0 2018-01-01 0.417022004702574 9 2018-01-17 0.538816734003357 10 2018-01-18 0.4191945144032948 11 2018-01-19 0.6852195003967595