1

I have created the following pandas dataframe:

ds = {
    'Date' : ['2025-08-22 16:00:00',    '2025-08-22 16:01:00',  '2025-08-22 16:02:00',  '2025-08-22 16:03:00',  '2025-08-22 16:04:00',  '2025-08-22 16:05:00',  '2025-08-22 16:06:00',  '2025-08-22 16:07:00',  '2025-08-22 16:08:00',  '2025-08-22 16:09:00',  '2025-08-22 16:10:00',  '2025-08-22 16:11:00',  '2025-08-22 16:12:00',  '2025-08-22 16:13:00',  '2025-08-22 16:14:00',  '2025-08-22 16:15:00',  '2025-08-22 16:16:00',  '2025-08-22 16:17:00',  '2025-08-22 16:18:00',  '2025-08-22 16:19:00',  '2025-08-22 16:20:00',  '2025-08-22 16:21:00',  '2025-08-22 16:22:00',  '2025-08-22 16:23:00',  '2025-08-22 16:24:00'],
    'Open': [   11717.9,    11717.95,   11716.6,    11717.4,    11719.5,    11727.25,   11725.55,   11724.35,   11725.45,   11724.15,   11728.2,    11726.6,    11727.6,    11729.1,    11724.1,    11722.8,    11721.8,    11720.8,    11718.8,    11716.7,    11716.9,    11722.5,    11721.6,    11727.8,    11728.1],
    'Low': [    11715.9,    11716,  11715.35,   11716.45,   11719.5,    11724.3,    11723.55,   11723.15,   11723.85,   11724.15,   11725.2,    11726.6,    11727.6,    11724.2,    11722.6,    11721.6,    11719.7,    11715.8,    11716.5,    11716,  11716.9,    11721.3,    11721.4,    11726.35,   11727],
    'High': [   11718.1,    11718.1,    11717.9,    11719.4,    11727.15,   11727.45,   11726,  11725.65,   11727.2,    11727.85,   11728.2,    11728.7,    11729.5,    11729.1,    11725.5,    11723.9,    11722,  11720.8,    11719.8,    11717.7,    11722.9,    11724.3,    11727.8,    11728.3,    11728.8],
    'Close' : [11718.05,    11716.5,    11717,  11719.3,    11727.15,   11725.65,   11724.15,   11725.35,   11724.05,   11727.65,   11726.7,    11727.8,    11729.2,    11724.2,    11722.6,    11721.7,    11721.2,    11718.7,    11716.6,    11716.8,    11722.6,    11721.5,    11727.6,    11728,  11727.2],
    'Volume': [ 130,    88, 125,    93, 154,    102,    118,    92, 105,    116,    84, 88, 108,    99, 82, 109,    98, 130,    71, 86, 96, 83, 80, 93, 73],
    'Regime': [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3],
}

df = pd.DataFrame(data=ds)

The dataframe contains a field called Regime, which has three values:1,2 and 3.

I have created a volume profile plot for each of those Regimes after grouping the records by 5 minutes.

df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
 
# Resample to 5-minute intervals
df_30 = df.resample('5T').agg({'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum','Regime':'last'})
df_30 = df_30.dropna()

# Add date column for grouping
df_30['date'] = df_30.index.date


# For volume profile, use the original 1min df
df['date'] = df.index.date

# Group by regime
for regime in df['Regime'].unique():
    daily_30 = df_30[df_30['Regime'] == regime]
    daily_1 = df[df['Regime'] == regime]

    if daily_30.empty:
        continue

    # For candlestick plotting
    daily_30 = daily_30.reset_index()
    xdates = np.arange(len(daily_30))

    # For volume profile
    min_price = daily_1['Low'].min()
    max_price = daily_1['High'].max()
    num_bins = 200
    bins = np.linspace(min_price, max_price, num_bins + 1)
    bin_size = bins[1] - bins[0]
    bin_centers = (bins[:-1] + bins[1:]) / 2
    volume_profile = np.zeros(num_bins)

    display(volume_profile)
 
    for _, row in daily_1.iterrows():
        low = row['Low']
        high = row['High']
        vol = row['Volume']
        if high == low:
            bin_idx = np.digitize(low, bins) - 1
            if 0 <= bin_idx < num_bins:
                volume_profile[bin_idx] += vol
        else:
            vol_per_unit = vol / (high - low)
            start_bin = np.digitize(low, bins)
            end_bin = np.digitize(high, bins)
            for b in range(start_bin, end_bin + 1):
                if b > 0 and b <= num_bins:
                    bin_start = bins[b - 1]
                    bin_end = bins[b]
                    start = max(low, bin_start)
                    end = min(high, bin_end)
                    portion = (end - start) * vol_per_unit
                    volume_profile[b - 1] += portion

    # Normalize for plotting (scale to chart width)
    chart_width = len(daily_30)
    if max(volume_profile) > 0:
        scaled_volume = (volume_profile / max(volume_profile)) * chart_width
    else:
        scaled_volume = volume_profile

    display(scaled_volume)
    # POC (Point of Control)
    poc_idx = np.argmax(volume_profile)
    poc_price = bin_centers[poc_idx]

    # print(poc_price)
    # Plot
    fig, ax = plt.subplots(figsize=(10, 6))

    # Plot volume profile first (as background)
    ax.fill_betweenx(bin_centers, 0, scaled_volume, color='blue', alpha=0.3, step='mid')

    # Plot POC
    ax.axhline(poc_price, color='red', linestyle='-', linewidth=1)

    # Plot candlesticks on top
    candle_width = 0.6
    for i in range(len(daily_30)):
        o = daily_30['Open'][i]
        h = daily_30['High'][i]
        l = daily_30['Low'][i]
        c = daily_30['Close'][i]
        if c > o:
            color = 'green'
            bottom = o
            height = c - o
        else:
            color = 'red'
            bottom = c
            height = o - c
        # Wick
        ax.vlines(xdates[i], l, h, color='black', linewidth=0.5)
        # Body
        ax.bar(xdates[i], height, candle_width, bottom, color=color, edgecolor='black')

    ax.set_xlim(-1, chart_width + 1)
    ax.set_ylim(min_price - bin_size, max_price + bin_size)
    ax.set_xticks(xdates)
    ax.set_xticklabels(daily_30['Date'].dt.strftime('%H:%M'), rotation=45)
    ax.set_title(f'30-min Candlestick with Volume Profile - Regime: {regime}')
    ax.set_xlabel('Time')
    ax.set_ylabel('Price')
    plt.tight_layout()
    plt.show()

The code creates three separate volume profile plots.

VP for Regime 1 VP for Regime 2 VP for Regime 3

I need to combine the three plots into one single plot, such that on the x-axis Plot2 follows Plot 1, and Plot 3 follows Plot 2. Basically, I'd like to see a graph similar (but of course not exactly the same) to this.

Resulting plot format

Does someone know how to do it?

0

1 Answer 1

3

Here is the closest I have been able to get to your expected output:

enter image description here

To accomplish this, we need to do a few changes:

  • Use a master .subplots instead of one .subplots for one regime, and use with gridspec (for no spacing between charts).
  1. fig = plt.figure(figsize=(30, 6)) # (30, 6) is just what I think is a good size, feel free to change this.
    gs = fig.add_gridspec(nrows=1, ncols=3, hspace=0, wspace=0) # hspace and wspace removes all space from between two regimes
    
    axes = gs.subplots(sharey=True) # share the y axis
    

    and change your for loop to also get the index of regime so we can use that for our axes list:

    for index, regime in enumerate(df['Regime'].unique()):
    

    and

    axes[index]. # use instead of ax.
    
  2. Instead of every chart having a long title of 30-min Candlestick with Volume Profile - Regime: {regime} three times, we can make the common part (30-min Candlestick with Volume Profile) the master title, and the regimes the minor titles:

  3. fig.suptitle("30-min Candlestick with Volume Profile") # Assign master title
    

    and:

  4. axes[index].set_title(f'Regime: {regime}')
    
  • Since we don't want labels appearing for every chart, we can add a loop to the end which only keeps the needed labels (for shared axes):

    for ax in axes:
        ax.label_outer()
    
  • Instead of calling plt.show() for every regime, call it at the end.

Final Code:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

ds = {
    'Date' : ['2025-08-22 16:00:00',    '2025-08-22 16:01:00',  '2025-08-22 16:02:00',  '2025-08-22 16:03:00',  '2025-08-22 16:04:00',  '2025-08-22 16:05:00',  '2025-08-22 16:06:00',  '2025-08-22 16:07:00',  '2025-08-22 16:08:00',  '2025-08-22 16:09:00',  '2025-08-22 16:10:00',  '2025-08-22 16:11:00',  '2025-08-22 16:12:00',  '2025-08-22 16:13:00',  '2025-08-22 16:14:00',  '2025-08-22 16:15:00',  '2025-08-22 16:16:00',  '2025-08-22 16:17:00',  '2025-08-22 16:18:00',  '2025-08-22 16:19:00',  '2025-08-22 16:20:00',  '2025-08-22 16:21:00',  '2025-08-22 16:22:00',  '2025-08-22 16:23:00',  '2025-08-22 16:24:00'],
    'Open': [   11717.9,    11717.95,   11716.6,    11717.4,    11719.5,    11727.25,   11725.55,   11724.35,   11725.45,   11724.15,   11728.2,    11726.6,    11727.6,    11729.1,    11724.1,    11722.8,    11721.8,    11720.8,    11718.8,    11716.7,    11716.9,    11722.5,    11721.6,    11727.8,    11728.1],
    'Low': [    11715.9,    11716,  11715.35,   11716.45,   11719.5,    11724.3,    11723.55,   11723.15,   11723.85,   11724.15,   11725.2,    11726.6,    11727.6,    11724.2,    11722.6,    11721.6,    11719.7,    11715.8,    11716.5,    11716,  11716.9,    11721.3,    11721.4,    11726.35,   11727],
    'High': [   11718.1,    11718.1,    11717.9,    11719.4,    11727.15,   11727.45,   11726,  11725.65,   11727.2,    11727.85,   11728.2,    11728.7,    11729.5,    11729.1,    11725.5,    11723.9,    11722,  11720.8,    11719.8,    11717.7,    11722.9,    11724.3,    11727.8,    11728.3,    11728.8],
    'Close' : [11718.05,    11716.5,    11717,  11719.3,    11727.15,   11725.65,   11724.15,   11725.35,   11724.05,   11727.65,   11726.7,    11727.8,    11729.2,    11724.2,    11722.6,    11721.7,    11721.2,    11718.7,    11716.6,    11716.8,    11722.6,    11721.5,    11727.6,    11728,  11727.2],
    'Volume': [ 130,    88, 125,    93, 154,    102,    118,    92, 105,    116,    84, 88, 108,    99, 82, 109,    98, 130,    71, 86, 96, 83, 80, 93, 73],
    'Regime': [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3],
}

df = pd.DataFrame(data=ds)

df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Resample to 5-minute intervals
df_30 = df.resample('5T').agg(
    {'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum', 'Regime': 'last'})
df_30 = df_30.dropna()

# Add date column for grouping
df_30['date'] = df_30.index.date

# For volume profile, use the original 1min df
df['date'] = df.index.date

fig = plt.figure(figsize=(30, 6))
gs = fig.add_gridspec(nrows=1, ncols=3, hspace=0, wspace=0)

axes = gs.subplots(sharey=True)
fig.suptitle("30-min Candlestick with Volume Profile")
# Group by regime
for index, regime in enumerate(df['Regime'].unique()):
    daily_30 = df_30[df_30['Regime'] == regime]
    daily_1 = df[df['Regime'] == regime]

    if daily_30.empty:
        continue

    # For candlestick plotting
    daily_30 = daily_30.reset_index()
    xdates = np.arange(len(daily_30))

    # For volume profile
    min_price = daily_1['Low'].min()
    max_price = daily_1['High'].max()
    num_bins = 200
    bins = np.linspace(min_price, max_price, num_bins + 1)
    bin_size = bins[1] - bins[0]
    bin_centers = (bins[:-1] + bins[1:]) / 2
    volume_profile = np.zeros(num_bins)

    # display(volume_profile)

    for _, row in daily_1.iterrows():
        low = row['Low']
        high = row['High']
        vol = row['Volume']
        if high == low:
            bin_idx = np.digitize(low, bins) - 1
            if 0 <= bin_idx < num_bins:
                volume_profile[bin_idx] += vol
        else:
            vol_per_unit = vol / (high - low)
            start_bin = np.digitize(low, bins)
            end_bin = np.digitize(high, bins)
            for b in range(start_bin, end_bin + 1):
                if b > 0 and b <= num_bins:
                    bin_start = bins[b - 1]
                    bin_end = bins[b]
                    start = max(low, bin_start)
                    end = min(high, bin_end)
                    portion = (end - start) * vol_per_unit
                    volume_profile[b - 1] += portion

    # Normalize for plotting (scale to chart width)
    chart_width = len(daily_30)
    if max(volume_profile) > 0:
        scaled_volume = (volume_profile / max(volume_profile)) * chart_width
    else:
        scaled_volume = volume_profile

    # display(scaled_volume)
    # POC (Point of Control)
    poc_idx = np.argmax(volume_profile)
    poc_price = bin_centers[poc_idx]

    # print(poc_price)
    # Plot

    # Plot volume profile first (as background)
    axes[index].fill_betweenx(bin_centers, 0, scaled_volume, color='blue', alpha=0.3, step='mid')

    # Plot POC
    axes[index].axhline(poc_price, color='red', linestyle='-', linewidth=1)

    # Plot candlesticks on top
    candle_width = 0.6
    for i in range(len(daily_30)):
        o = daily_30['Open'][i]
        h = daily_30['High'][i]
        l = daily_30['Low'][i]
        c = daily_30['Close'][i]
        if c > o:
            color = 'green'
            bottom = o
            height = c - o
        else:
            color = 'red'
            bottom = c
            height = o - c
        # Wick
        axes[index].vlines(xdates[i], l, h, color='black', linewidth=0.5)
        # Body
        axes[index].bar(xdates[i], height, candle_width, bottom, color=color, edgecolor='black')

    axes[index].set_xlim(-1, chart_width + 1)
    axes[index].set_ylim(min_price - bin_size, max_price + bin_size)
    axes[index].set_xticks(xdates)
    axes[index].set_xticklabels(daily_30['Date'].dt.strftime('%H:%M'), rotation=45)
    axes[index].set_title(f'Regime: {regime}')
    axes[index].set_xlabel('Time')
    axes[index].set_ylabel('Price')

for ax in axes:
    ax.label_outer()

plt.tight_layout()
plt.show()

If you want some modifications, I will be happy to help you with that!

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.