2

My post relates to this one here: Formatting a broken y axis in python matplotlib

I have borrowed code from this post and adapted it to what I am doing.

I am attempting to create a graph whereby I am maximizing the space where the majority of the data is by customising both the scales and positions of the x and dual y-axes.

The intended effect of this graph is to zoom in on the majority of the data and relegate what I am considering to be the outliers to a smaller space in the same image.

What should happen is that these outliers for the dual y-axes should "spill over" the axis breaks which I have created

My synthetic data:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec

np.random.seed(123)

df = pd.DataFrame({
    "Age": np.random.poisson(lam = 7, size = 1000) + 1, # plus 1 so minimum age is 1
    "Hours": np.random.normal(loc = 2100, scale = 100, size = 1000), 
    "Cost": np.random.gamma(shape = 1.5, scale = 1, size = 1000) * 1000
})

My current attempt:

figure = plt.figure(figsize = (10, 5))

full_grid = GridSpec(nrows = 2, ncols = 2, height_ratios = [1, 4], width_ratios = [4, 1])
# subdivide entire figure into four subplots
ax1 = figure.add_subplot(full_grid[0, 0]) # first row, first column
ax2 = figure.add_subplot(full_grid[0, 1]) # first row, second column
ax3 = figure.add_subplot(full_grid[1, 0]) # second row, first column
ax4 = figure.add_subplot(full_grid[1, 1]) # second row, second column
# So the layout is:
    # subplots 1 and 3 share the same x-axis limits and scales
    # subplots 2 and 4 share the same x-axis limits and scales
    # subplots 3 and 4 share the same y-axis limits and scales
    # subplots 1 and 2 share the same y-axis limits and scales

# beginning with bottom half of figure
ax3hrs = ax3.twinx() # to plot hours
ax4hrs = ax4.twinx() # to plot hours
# age range 1 to 10
sns.scatterplot(data = df[(df["Age"] <= 10) & (df["Cost"] <= df["Cost"].quantile(0.90))], x = "Age", y = "Cost", 
                color = "grey", alpha = 0.1, ax = ax3); 
sns.scatterplot(data = df[(df["Age"] <= 10) & (df["Hours"] <= df["Hours"].quantile(0.90))], x = "Age", y = "Hours", 
                color = "grey", alpha = 0.1, ax = ax3hrs)
# age range > 10
sns.scatterplot(data = df[(df["Age"] > 10) & (df["Cost"] <= df["Cost"].quantile(0.90))], x = "Age", y = "Cost", 
                color = "grey", alpha = 0.1, ax = ax4)
sns.scatterplot(data = df[(df["Age"] > 10) & (df["Hours"] <= df["Hours"].quantile(0.90))], x = "Age", y = "Hours", 
                color = "grey", alpha = 0.1, ax = ax4hrs)
# turn off hours labels and tick marks
ax3.set_ylabel(None); ax3.set_xlabel(None); ax3hrs.set_ylabel(None); ax3hrs.set_yticklabels(""); 
ax3hrs.tick_params(axis = "y", length = 0) # remove ticks entirely
# have to turn off both ax3 and ax3hrs else it wont work
# turn off right wall and 'roof' of plot
ax3.spines[["right", "top"]].set_visible(False); ax3hrs.spines[["right", "top"]].set_visible(False)
ax4.set_ylabel(None); ax4.set_xlabel(None); ax4.set_yticklabels(""); ax4hrs.set_ylabel(None)
ax4.tick_params(axis = "y", length = 0) # turn off ticks entirely
# turn off left wall and 'roof' of plot
# have to turn off both ax4 and ax4hrs else wont work
ax4.spines[["left", "top"]].set_visible(False); ax4hrs.spines[["left", "top"]].set_visible(False)
# age ticks from 1 to 10
ax3.set_xticks(range(1, 11))
ax3.set_xticklabels(range(1, 11))
# now inserting x-axis break for age
d = 0.95  # angle of the diagonal lines
kwargs = dict(marker=[(-1, -d), (1, d)], markersize = 20,
              linestyle = "none", color = 'k', mec = 'k', mew = 1, clip_on = False)
# Add diagonals at the right edge of ax3 (end of x-axis)
ax3.plot([1.015, 1.015], [0, 0.03], transform = ax3.transAxes, **kwargs)

# now working on top half of plot
ax1hrs = ax1.twinx() # to plot hours
ax2hrs = ax2.twinx() # to plot hours
ax1.set_xlim(ax3.get_xlim())
ax2.set_xlim(ax4.get_xlim())
# plotting outliers
# age range 1 to 10
sns.scatterplot(data = df[(df["Age"] <= 10) & (df["Cost"] > df["Cost"].quantile(0.90))], x = "Age", y = "Cost", 
                color = "grey", alpha = 0.1, ax = ax1)
sns.scatterplot(data = df[(df["Age"] <= 10) & (df["Hours"] > df["Hours"].quantile(0.90))], x = "Age", y = "Hours", 
                color = "grey", alpha = 0.1, ax = ax1hrs)
# age range > 10
sns.scatterplot(data = df[(df["Age"] > 10) & (df["Cost"] > df["Cost"].quantile(0.90))], x = "Age", y = "Cost", 
                color = "grey", alpha = 0.1, ax = ax2)
sns.scatterplot(data = df[(df["Age"] > 10) & (df["Hours"] > df["Hours"].quantile(0.90))], x = "Age", y = "Hours", 
                color = "grey", alpha = 0.1, ax = ax2hrs)
ax1.set_xlabel(None); ax1.set_xticklabels(""); ax1.tick_params(axis = "x", length = 0)
ax1.set_ylabel(None); ax1hrs.set_ylabel(None); ax1hrs.set_yticklabels(""); ax1hrs.tick_params(axis = "y", length = 0)
ax1.spines[["bottom", "right"]].set_visible(False); ax1hrs.spines[["bottom", "right"]].set_visible(False)
ax2.set_xlabel(None); ax2.set_ylabel(None); ax2.set_xticklabels(""); 
ax2.set_yticklabels(""); ax2.tick_params(axis = "both", length = 0)
ax2hrs.set_xlabel(None); ax2hrs.set_ylabel(None)
#ax2hrs.set_xticklabels(""); ax2hrs.tick_params(axis = "both", length = 0)
ax2.spines[["left", "bottom"]].set_visible(False); ax2hrs.spines[["left", "bottom"]].set_visible(False)
# plot line breaks
ax3.plot([0.015, 0.015], [1.095, 1.05], transform = ax3.transAxes, **kwargs)
ax4.plot([1.25, 1.25], [1.025, 1.075], transform = ax3.transAxes, **kwargs)

plt.subplots_adjust(wspace = 0.025)

Using "sns.regplot()" how can I draw two continuous locally weighted trend lines (one for "Cost", one for "Hours") that goes across the entire figure which respects the dimensions of entire figure? That is to say, the regions of the figure (those regions reserved for "Age > 10" and the outliers for both "Cost" and "Hours".

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.