I have been looking into the method called Conflation which looks into combining several continuous distributions of some type but different shapes/arguments (mean and variance) into one continuous distribution. The question can be found at the following link: Conflation Method Question.
So, I wanted to look for a way to combine different types of continuous distributions instead of normal distribution but I have been struggling with how to form the conflated distribution array using an equation for continuous distributions (Conflation equation can be found in the link above) with norm distribution. I got the following error message:
Error Message:
Traceback (most recent call last):
File "D:\Anaconda\Projects\untitled63.py", line 34, in <module>
graph=conflate_pdf(dists,lb,ub)
File "D:\Anaconda\Projects\untitled63.py", line 20, in conflate_pdf
denom = quad(func=(prod_pdf), a=(lb), b=(ub), args=(dists))[0]
File "D:\Anaconda\lib\site-packages\scipy\integrate\quadpack.py", line 351, in quad
retval = _quad(func, a, b, args, full_output, epsabs, epsrel, limit,
File "D:\Anaconda\lib\site-packages\scipy\integrate\quadpack.py", line 463, in _quad
return _quadpack._qagse(func,a,b,args,full_output,epsabs,epsrel,limit)
TypeError: prod_pdf() takes 1 positional argument but 2 were given
How can I solve this problem and obtain conflated PDF array? Please find the code below:
Code:
from scipy.integrate import quad
from scipy import stats
import numpy as np
def prod_pdf(dists):
for dist in dists:
p_pdf=np.ones(dist.shape[0])
for x in dist:
p_pdf=p_pdf*x
return p_pdf
def conflate_pdf(dists,lb,ub):
denom = quad(func=(prod_pdf), a=(lb), b=(ub), args=(dists))[0]
return prod_pdf(dists)/denom
lb=-10
ub=10
domain=np.arange(lb,ub,.01)
dist_1 = stats.norm.pdf(domain, 2,1)
dist_2 = stats.norm.pdf(domain, 2.5,1.5)
dist_3 = stats.norm.pdf(domain, 2.2,1.6)
dist_4 = stats.norm.pdf(domain, 2.4,1.3)
dist_5 = stats.norm.pdf(domain, 2.7,1.5)
dists=[dist_1, dist_2, dist_3, dist_4, dist_5]
graph=conflate_pdf(dists,lb,ub)
from matplotlib import pyplot as plt
plt.plot(domain,graph)
plt.xlabel("domain")
plt.ylabel("pdf")
plt.title("Conflated PDF")
plt.show()
Edit 1:
I modified the function based on @MindTheBandGap and now I get a new error. I am thinking could it be because of the division, integrating using an array, or could it something else? Find the code and error message below:
Code:
def prod_pdf(x,pdfs):
prod=np.ones(np.array(pdfs)[0].shape)
for pdf in pdfs:
print(prod)
for c,y in enumerate(pdf):
prod[c]=prod[c]*y
print('final:', prod)
return prod
def conflate_pdf(x,dists,lb,ub):
denom = quad(prod_pdf, lb, ub, args=(dists))
print('Denom: ',denom)
print('product pdf: ', prod_pdf(x,dists)[0])
conflated_pdf=prod_pdf(x,dists)[0]/denom
print(conflated_pdf)
return conflated_pdf
lb=-10
ub=10
domain=np.arange(lb,ub,.01)
dist_1 = st.norm.pdf(domain, 2,1)
dist_2 = st.norm.pdf(domain, 2.5,1.5)
dist_3 = st.norm.pdf(domain, 2.2,1.6)
dist_4 = st.norm.pdf(domain, 2.4,1.3)
dist_5 = st.norm.pdf(domain, 2.7,1.5)
from matplotlib import pyplot as plt
plt.plot(domain, dist_1, 'r')
plt.plot(domain, dist_2, 'g')
plt.plot(domain, dist_3, 'b')
plt.plot(domain, dist_4, 'y')
plt.plot(domain, dist_5, 'c')
dists=[dist_1, dist_2, dist_3, dist_4, dist_5]
graph=conflate_pdf(domain, dists,lb,ub)
plt.plot(domain,graph, 'm')
plt.xlabel("domain")
plt.ylabel("pdf")
plt.title("Conflated PDF")
plt.show()
Error Message:
Traceback (most recent call last):
File "D:\Anaconda\Projects\untitled62.py", line 85, in <module>
graph=conflate_pdf(domain, dists,lb,ub)
File "D:\Anaconda\Projects\untitled62.py", line 60, in conflate_pdf
denom = quad(prod_pdf, lb, ub, args=(dists))[0]
File "D:\Anaconda\lib\site-packages\scipy\integrate\quadpack.py", line 351, in quad
retval = _quad(func, a, b, args, full_output, epsabs, epsrel, limit,
File "D:\Anaconda\lib\site-packages\scipy\integrate\quadpack.py", line 463, in _quad
return _quadpack._qagse(func,a,b,args,full_output,epsabs,epsrel,limit)
TypeError: only size-1 arrays can be converted to Python scalars
Edit 2:
Answer From this link Conflation method question, the code was able to run without the error with of size-1 array. Here is the code with output and plot:
Code:
from scipy.integrate import quad
from scipy import stats
import numpy as np
def prod_pdf(x,dists):
p_pdf=1
print('Incoming Array:', p_pdf)
for dist in dists:
p_pdf=p_pdf*dist.pdf(x)
print('final:', p_pdf)
return p_pdf
def conflate_pdf(x,dists,lb,ub):
print('Input product pdf: ', prod_pdf(x,dists))
denom = quad(prod_pdf, lb, ub, args=(dists,))[0]
print('Denom: ', denom)
conflated_pdf=prod_pdf(x,dists)/denom
print('Conflated PDF: ', conflated_pdf)
return conflated_pdf
lb=-10
ub=10
domain=np.arange(lb,ub,.01)
dist_1 = st.norm.pdf(domain, 2,1)
dist_2 = st.norm.pdf(domain, 2.5,1.5)
dist_3 = st.norm.pdf(domain, 2.2,1.6)
dist_4 = st.norm.pdf(domain, 2.4,1.3)
dist_5 = st.norm.pdf(domain, 2.7,1.5)
dists=[stats.norm(2,1), stats.norm(2.5,1.5), stats.norm(2.2,1.6), stats.norm(2.4,1.3), stats.norm(2.7,1.5)]
from matplotlib import pyplot as plt
plt.plot(domain, dist_1, 'r', label='Dist. 1')
plt.plot(domain, dist_2, 'g', label='Dist. 2')
plt.plot(domain, dist_3, 'b', label='Dist. 3')
plt.plot(domain, dist_4, 'y', label='Dist. 4')
plt.plot(domain, dist_5, 'c', label='Dist. 5')
graph=conflate_pdf(domain,dists,lb,ub)
plt.plot(domain,graph, 'm', label='Conflated Dist.')
plt.xlabel("domain")
plt.ylabel("pdf")
plt.title("Conflated PDF")
plt.legend()
plt.show()
Here is a small portion of the output:
Incoming Array: 1
final: 0.15352177537004433
final: 0.034348669264845304
final: 0.006519131844904635
final: 0.0015040030811035296
final: 0.0003607258742065213
Incoming Array: 1
final: 0.042345986284209325
final: 0.006294747321619583
final: 0.0007651214249593444
final: 9.805307029794648e-05
final: 1.668121592516301e-05
Denom: 0.0029066671327537714
Incoming Array: 1
final: [2.14638374e-32 2.41991991e-32 2.72804284e-32 ... 6.41980576e-15
5.92770938e-15 5.47278628e-15]
final: [4.75178372e-48 5.66328097e-48 6.74864868e-48 ... 7.03075979e-21
6.27970218e-21 5.60806584e-21]
final: [2.80912097e-61 3.51131870e-61 4.38823989e-61 ... 1.32670185e-26
1.14952951e-26 9.95834610e-27]
final: [1.51005552e-81 2.03116529e-81 2.73144352e-81 ... 1.76466623e-34
1.46198598e-34 1.21092834e-34]
final: [1.09076800e-97 1.55234627e-97 2.20861552e-97 ... 3.72095218e-40
2.98464396e-40 2.39335035e-40]
Conflated PDF: [3.75264162e-95 5.34063998e-95 7.59844666e-95 ... 1.28014389e-37
1.02682689e-37 8.23400219e-38]
Plot:
The way I see it, it takes each density value from one and multiplies it with other distributions. I tried to run the following code (based on an answer from the link), but, I keep on getting one variable it gets the whole array after product function and it produces the same error regarding the size-1 array. See the following code with a portion of the output:
Code:
from scipy.integrate import quad
from scipy import stats
import numpy as np
def prod_pdf(x,dists):
p_pdf=1
print('Incoming Array:', p_pdf)
for dist in dists:
p_pdf=p_pdf*dist
print('final:', p_pdf)
return p_pd
def conflate_pdf(x,dists,lb,ub):
print('Input product pdf: ', prod_pdf(x,dists))
denom = quad(prod_pdf, lb, ub, args=(dists,))[0]
# denom = simps(prod_pdf)
# denom = nquad(func=(prod_pdf), ranges=([lb, ub]), args=(dists,))[0]
print('Denom: ', denom)
conflated_pdf=prod_pdf(x,dists)/denom
print('Conflated PDF: ', conflated_pdf)
return conflated_pdf
lb=-10
ub=10
domain=np.arange(lb,ub,.01)
dist_1 = st.norm.pdf(domain, 2,1)
dist_2 = st.norm.pdf(domain, 2.5,1.5)
dist_3 = st.norm.pdf(domain, 2.2,1.6)
dist_4 = st.norm.pdf(domain, 2.4,1.3)
dist_5 = st.norm.pdf(domain, 2.7,1.5)
from matplotlib import pyplot as plt
plt.xlabel("domain")
plt.ylabel("pdf")
plt.title("Conflated PDF")
plt.legend()
plt.plot(domain, dist_1, 'r', label='Dist. 1')
plt.plot(domain, dist_2, 'g', label='Dist. 2')
plt.plot(domain, dist_3, 'b', label='Dist. 3')
plt.plot(domain, dist_4, 'y', label='Dist. 4')
plt.plot(domain, dist_5, 'c', label='Dist. 5')
dists=[dist_1, dist_2, dist_3, dist_4, dist_5]
print('distribution list: \n', dists)
graph=conflate_pdf(domain, dists,lb,ub)
plt.plot(domain,graph, 'm', label='Conflated Dist.')
plt.show()
Here is a small portion of the output:
Incoming Array: 1
final: [2.14638374e-32 2.41991991e-32 2.72804284e-32 ... 6.41980576e-15
5.92770938e-15 5.47278628e-15]
final: [4.75178372e-48 5.66328097e-48 6.74864868e-48 ... 7.03075979e-21
6.27970218e-21 5.60806584e-21]
final: [2.80912097e-61 3.51131870e-61 4.38823989e-61 ... 1.32670185e-26
1.14952951e-26 9.95834610e-27]
final: [1.51005552e-81 2.03116529e-81 2.73144352e-81 ... 1.76466623e-34
1.46198598e-34 1.21092834e-34]
final: [1.09076800e-97 1.55234627e-97 2.20861552e-97 ... 3.72095218e-40
2.98464396e-40 2.39335035e-40]
Input product pdf: [1.09076800e-97 1.55234627e-97 2.20861552e-97 ... 3.72095218e-40
2.98464396e-40 2.39335035e-40]
Incoming Array: 1
final: [2.14638374e-32 2.41991991e-32 2.72804284e-32 ... 6.41980576e-15
5.92770938e-15 5.47278628e-15]
final: [4.75178372e-48 5.66328097e-48 6.74864868e-48 ... 7.03075979e-21
6.27970218e-21 5.60806584e-21]
final: [2.80912097e-61 3.51131870e-61 4.38823989e-61 ... 1.32670185e-26
1.14952951e-26 9.95834610e-27]
final: [1.51005552e-81 2.03116529e-81 2.73144352e-81 ... 1.76466623e-34
1.46198598e-34 1.21092834e-34]
final: [1.09076800e-97 1.55234627e-97 2.20861552e-97 ... 3.72095218e-40
2.98464396e-40 2.39335035e-40]
I managed to look into the code to implement the same method in the link, I edited the code where it gets the first variables from each distribution however, for the rest of the loop it keeps on printing the same values, it does not go to the next values in the lists and Conflated distribution is a single variable. See the following code with a portion of the output:
Code:
from scipy.integrate import quad
from scipy import stats
import numpy as np
def prod_pdf(x,dists):
p_pdf=1
print('Incoming Array:', p_pdf)
for c,dist in enumerate(dists):
p_pdf=p_pdf*dist[c]
print('final:', p_pdf)
return p_pdf
def conflate_pdf(x,dists,lb,ub):
print('Input product pdf: ', prod_pdf(x,dists))
denom = quad(prod_pdf, lb, ub, args=(dists,))[0]
# denom = simps(prod_pdf)
# denom = nquad(func=(prod_pdf), ranges=([lb, ub]), args=(dists,))[0]
print('Denom: ', denom)
conflated_pdf=prod_pdf(x,dists)/denom
print('Conflated PDF: ', conflated_pdf)
return conflated_pdf
lb=-10
ub=10
domain=np.arange(lb,ub,.01)
dist_1 = st.norm.pdf(domain, 2,1)
dist_2 = st.norm.pdf(domain, 2.5,1.5)
dist_3 = st.norm.pdf(domain, 2.2,1.6)
dist_4 = st.norm.pdf(domain, 2.4,1.3)
dist_5 = st.norm.pdf(domain, 2.7,1.5)
from matplotlib import pyplot as plt
plt.xlabel("domain")
plt.ylabel("pdf")
plt.title("Conflated PDF")
plt.legend()
plt.plot(domain, dist_1, 'r', label='Dist. 1')
plt.plot(domain, dist_2, 'g', label='Dist. 2')
plt.plot(domain, dist_3, 'b', label='Dist. 3')
plt.plot(domain, dist_4, 'y', label='Dist. 4')
plt.plot(domain, dist_5, 'c', label='Dist. 5')
dists=[dist_1, dist_2, dist_3, dist_4, dist_5]
print('distribution list: \n', dists)
graph=conflate_pdf(domain, dists,lb,ub)
plt.plot(domain,graph, 'm', label='Conflated Dist.')
plt.show()
A portion of the output:
Incoming Array: 1
final: 2.1463837356630605e-32
final: 5.0231307782193034e-48
final: 3.266239495519432e-61
final: 2.187514996217005e-81
final: 1.979657878680375e-97
Incoming Array: 1
final: 2.1463837356630605e-32
final: 5.0231307782193034e-48
final: 3.266239495519432e-61
final: 2.187514996217005e-81
final: 1.979657878680375e-97
Denom: 3.95931575736075e-96
Incoming Array: 1
final: 2.1463837356630605e-32
final: 5.0231307782193034e-48
final: 3.266239495519432e-61
final: 2.187514996217005e-81
final: 1.979657878680375e-97
Conflated PDF: 0.049999999999999996
Edit 3:
I implemented the following code and it seems to work, also, I managed to sort out the problem with quad it seems if I changed the part that has quad into fixed_quad and normalise the pdf list. I will get the same result. Here is the following code:
import scipy.stats as st
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, Normalizer, normalize, StandardScaler
from scipy.integrate import quad, simps, quad_vec, nquad, cumulative_trapezoid
from scipy.integrate import romberg, trapezoid, simpson, romb
from scipy.integrate import fixed_quad, quadrature, quad_explain
from scipy import stats
import time
def user_prod_pdf(x,dists):
p_list=[]
p_pdf=1
print('Incoming Array:', p_pdf)
for dist in dists:
print('Incoming Distribution Array:', dist.pdf(x))
p_pdf=p_pdf*dist.pdf(x)
print('Product PDF:', p_pdf)
p_list.append(p_pdf)
print('final Product PDF:', p_pdf)
print('Product PDF list: ', p_list)
return p_pdf
def user_conflate_pdf(x,dists,lb,ub):
print('Input product pdf: ', user_prod_pdf(x,dists))
denom = quad(user_prod_pdf, lb, ub, args=(dists,))[0]
print('Denom: ', denom)
conflated_pdf=user_prod_pdf(x,dists)/denom
print('Conflated PDF: ', conflated_pdf)
return conflated_pdf
def user_conflate_pdf_2(pdfs):
"""
Compute conflation of given pdfs.
[ARGS]
- pdfs: PDFs numpy array of shape (n, x)
where n is the number of PDFs
and x is the variable space.
[RETURN]
A 1d-array of normalized conflated PDF.
"""
# conflate
conflation = np.array(pdfs).prod(axis=0)
# normalize
conflation /= conflation.sum()
return conflation
def my_product_pdf(x,dists):
p_list=[]
p_pdf=1
print('Incoming Array:', p_pdf)
list_full_size=np.array(dists).shape
print('Full list size: ', list_full_size)
print('list size: ', list_full_size[0])
for x in range(list_full_size[1]):
p_pdf=1
for y in range(list_full_size[0]):
p_pdf=float(p_pdf)*dists[y][x]
print('Product value: ', p_pdf)
print('Product PDF:', p_pdf)
p_list.append(p_pdf)
print('final Product PDF:', p_pdf)
print('Product PDF list: ', p_list)
# return p_pdf
return p_list
# return np.array(p_list)
def my_conflate_pdf(x,dists,lb,ub):
print('\n')
# print('product pdf: ', prod_pdf(x,dists))
print('product pdf: ', my_product_pdf(x,dists))
denom = fixed_quad(my_product_pdf, lb, ub, args=(dists,), n=1)[0]
print('Denom: ', denom)
# conflated_pdf=prod_pdf(x,dists)/denom
conflated_pdf=my_product_pdf(x,dists)/denom
# conflated_pdf=[i / j for i,j in zip(my_product_pdf(x,dists), denom)]
print('Conflated PDF: ', conflated_pdf)
return conflated_pdf
lb=-10
ub=10
domain=np.arange(lb,ub,.01)
# dist_1 = st.norm(2,1)
# dist_2 = st.norm(2.5,1.5)
# dist_3 = st.norm(2.2,1.6)
# dist_4 = st.norm(2.4,1.3)
# dist_5 = st.norm(2.7,1.5)
# dist_1_pdf = st.norm.pdf(domain, 2,1)
# dist_2_pdf = st.norm.pdf(domain, 2.5,1.5)
# dist_3_pdf = st.norm.pdf(domain, 2.2,1.6)
# dist_4_pdf = st.norm.pdf(domain, 2.4,1.3)
# dist_5_pdf = st.norm.pdf(domain, 2.7,1.5)
# dist_1_pdf /= dist_1_pdf.sum()
# dist_2_pdf /= dist_2_pdf.sum()
# dist_3_pdf /= dist_3_pdf.sum()
# dist_4_pdf /= dist_4_pdf.sum()
# dist_5_pdf /= dist_5_pdf.sum()
dist_1 = st.norm(2,1)
dist_2 = st.norm(4,2)
dist_3 = st.norm(7,4)
dist_4 = st.norm(2.4,1.3)
dist_5 = st.norm(2.7,1.5)
dist_1_pdf = st.norm.pdf(domain, 2,1)
dist_2_pdf = st.norm.pdf(domain, 4,2)
dist_3_pdf = st.norm.pdf(domain, 7,4)
dist_4_pdf = st.norm.pdf(domain, 2.4,1.3)
dist_5_pdf = st.norm.pdf(domain, 2.7,1.5)
# dist_1_pdf /= dist_1_pdf.sum()
# dist_2_pdf /= dist_2_pdf.sum()
# dist_3_pdf /= dist_3_pdf.sum()
# dist_4_pdf /= dist_4_pdf.sum()
# dist_5_pdf /= dist_5_pdf.sum()
# User:
plt.xlabel("domain")
plt.ylabel("pdf")
plt.title("User Conflated PDF")
plt.plot(domain, dist_1_pdf, 'r', label='Dist. 1')
plt.plot(domain, dist_2_pdf, 'g', label='Dist. 2')
plt.plot(domain, dist_3_pdf, 'b', label='Dist. 3')
plt.plot(domain, dist_4_pdf, 'y', label='Dist. 4')
plt.plot(domain, dist_5_pdf, 'c', label='Dist. 5')
dists=[dist_1, dist_2, dist_3, dist_4, dist_5]
user_graph=user_conflate_pdf(domain,dists,lb,ub)
print('Final Conflated PDF: ', user_graph)
# user_graph /= user_graph.sum()
plt.plot(domain, user_graph, 'm', label='Conflated PDF')
plt.legend()
plt.show()
# User 2:
plt.xlabel("domain")
plt.ylabel("pdf")
plt.title("User Conflated PDF 2")
plt.plot(domain, dist_1_pdf, 'r', label='Dist. 1')
plt.plot(domain, dist_2_pdf, 'g', label='Dist. 2')
plt.plot(domain, dist_3_pdf, 'b', label='Dist. 3')
plt.plot(domain, dist_4_pdf, 'y', label='Dist. 4')
plt.plot(domain, dist_5_pdf, 'c', label='Dist. 5')
dists=[dist_1_pdf, dist_2_pdf, dist_3_pdf, dist_4_pdf, dist_5_pdf]
user_graph=user_conflate_pdf_2(dists)
print('Final User Conflated PDF 2 : ', user_graph)
# user_graph /= user_graph.sum()
plt.plot(domain, user_graph, 'm', label='Conflated PDF')
plt.legend()
plt.show()
# My Code:
# from matplotlib import pyplot as plt
plt.xlabel("domain")
plt.ylabel("pdf")
plt.title("My Conflated PDF Code")
plt.plot(domain, dist_1_pdf, 'r', label='Dist. 1')
plt.plot(domain, dist_2_pdf, 'g', label='Dist. 2')
plt.plot(domain, dist_3_pdf, 'b', label='Dist. 3')
plt.plot(domain, dist_4_pdf, 'y', label='Dist. 4')
plt.plot(domain, dist_5_pdf, 'c', label='Dist. 5')
dists=[dist_1_pdf, dist_2_pdf, dist_3_pdf, dist_4_pdf, dist_5_pdf]
my_graph=my_conflate_pdf(domain,dists,lb,ub)
print('Final Conflated PDF: ', my_graph)
my_graph /= np.array(my_graph).sum()
# my_graph = inverse_normalise(my_graph)
plt.plot(domain, my_graph, 'm', label='Conflated PDF')
plt.legend()
plt.show()
# Conflated PDF:
print('User Conflated PDF: ', user_graph)
print('My Conflated PDF: ', np.array(my_graph))
Here is the output:
My question here, I understand that I would need to normalise the PDF lists. But, say I did not normalise the PDF, how can I modify my conflation code to get the following plot?
To get the plot above and my conflated code:
# user_graph /= user_graph.sum()
# dist_1_pdf /= dist_1_pdf.sum()
# dist_2_pdf /= dist_2_pdf.sum()
# dist_3_pdf /= dist_3_pdf.sum()
# dist_4_pdf /= dist_4_pdf.sum()
# dist_5_pdf /= dist_5_pdf.sum()
My conflated code plot with no normalisation:





