In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import pymc3 as pm
from pymc3 import Model, Normal, Slice
from pymc3 import sample
from pymc3 import traceplot
from pymc3.distributions import Interpolated
from theano import as_op
import theano.tensor as tt
import numpy as np
from scipy import stats
import theano
plt.style.use('seaborn-darkgrid')
print('Running on PyMC3 v{}'.format(pm.__version__))
Running on PyMC3 v3.5
In [20]:
# Initialize random number generator
np.random.seed(123)
# True parameter values
alpha_true = 5
beta0_true = 7
beta1_true = 13
beta2_true = 10
# Size of dataset
size = 100
# Predictor variable
X1 = np.random.randn(size)
X2 = np.random.randn(size) * 0.2
X3 = np.random.randn(size) * 0.3
# Simulate outcome variable
Y = alpha_true + beta0_true * X1 + beta1_true * X2 + beta2_true * X3 + np.random.randn(size)
In [22]:
x_all = np.array([X1, X2, X3])
x = theano.shared(x_all)
In [24]:
basic_model = Model()
with basic_model:
# Priors for unknown model parameters
alpha = Normal('alpha', mu=0, sd=1)
beta0 = Normal('beta0', mu=12, sd=1)
beta1 = Normal('beta1', mu=18, sd=1)
beta2 = Normal('beta2', mu=15, sd=1)
# Expected value of outcome
mu = alpha + beta0 * x_all[0] + beta1 * x_all[1] + beta2*x_all[2]
# Likelihood (sampling distribution) of observations
Y_obs = Normal('Y_obs', mu=mu, sd=1, observed=Y)
# draw 1000 posterior samples
trace = sample(1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [beta2, beta1, beta0, alpha] Sampling 4 chains: 100%|██████████| 6000/6000 [00:02<00:00, 2931.73draws/s]
In [15]:
# pm.traceplot(trace);
In [25]:
def from_posterior(param, samples):
smin, smax = np.min(samples), np.max(samples)
width = smax - smin
x = np.linspace(smin, smax, 100)
y = stats.gaussian_kde(samples)(x)
# what was never sampled should have a small probability but not 0,
# so we'll extend the domain and use linear approximation of density on it
x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
y = np.concatenate([[0], y, [0]])
return Interpolated(param, x, y)
In [26]:
traces = [trace]
In [27]:
for _ in range(10):
# generate more data
X1 = np.random.randn(size)
X2 = np.random.randn(size) * 0.2
X3 = np.random.randn(size) * 0.3
Y = alpha_true + beta0_true * X1 + beta1_true * X2 + beta2_true * X3 + np.random.randn(size)
x_temp = np.array([X1,X2,X3])
x.set_value(x_temp)
model = Model()
with model:
# Priors are posteriors from previous iteration
alpha = from_posterior('alpha', trace['alpha'])
beta0 = from_posterior('beta0', trace['beta0'])
beta1 = from_posterior('beta1', trace['beta1'])
beta2 = from_posterior('beta2', trace['beta2'])
# Expected value of outcome
mu = alpha + beta0 * x[0] + beta1 * x[1] + beta2 * x[2]
# Likelihood (sampling distribution) of observations
Y_obs = Normal('Y_obs', mu=mu, sd=1, observed=Y)
# draw 10000 posterior samples
trace = sample(1000)
traces.append(trace)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [beta2, beta1, beta0, alpha] Sampling 4 chains: 100%|██████████| 6000/6000 [00:03<00:00, 1623.02draws/s] Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [beta2, beta1, beta0, alpha] Sampling 4 chains: 100%|██████████| 6000/6000 [00:03<00:00, 1531.26draws/s] The number of effective samples is smaller than 25% for some parameters. Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [beta2, beta1, beta0, alpha] Sampling 4 chains: 100%|██████████| 6000/6000 [00:03<00:00, 1617.53draws/s] Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [beta2, beta1, beta0, alpha] Sampling 4 chains: 100%|██████████| 6000/6000 [00:03<00:00, 1581.78draws/s] Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [beta2, beta1, beta0, alpha] Sampling 4 chains: 100%|██████████| 6000/6000 [00:03<00:00, 1562.58draws/s] Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [beta2, beta1, beta0, alpha] Sampling 4 chains: 100%|██████████| 6000/6000 [00:04<00:00, 1225.35draws/s] The gelman-rubin statistic is larger than 1.05 for some parameters. This indicates slight problems during sampling. The estimated number of effective samples is smaller than 200 for some parameters. Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag...
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-27-760206cdef8e> in <module> 24 25 # draw 10000 posterior samples ---> 26 trace = sample(1000) 27 traces.append(trace) ~/.local/lib/python3.5/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs) 403 start_, step = init_nuts(init=init, chains=chains, n_init=n_init, 404 model=model, random_seed=random_seed, --> 405 progressbar=progressbar, **args) 406 if start is None: 407 start = start_ ~/.local/lib/python3.5/site-packages/pymc3/sampling.py in init_nuts(init, chains, n_init, model, random_seed, progressbar, **kwargs) 1504 'Unknown initializer: {}.'.format(init)) 1505 -> 1506 step = pm.NUTS(potential=potential, model=model, **kwargs) 1507 1508 return start, step ~/.local/lib/python3.5/site-packages/pymc3/step_methods/hmc/nuts.py in __init__(self, vars, max_treedepth, early_max_treedepth, **kwargs) 150 `pm.sample` to the desired number of tuning steps. 151 """ --> 152 super(NUTS, self).__init__(vars, **kwargs) 153 154 self.max_treedepth = max_treedepth ~/.local/lib/python3.5/site-packages/pymc3/step_methods/hmc/base_hmc.py in __init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, integrator, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **theano_kwargs) 61 62 super(BaseHMC, self).__init__(vars, blocked=blocked, model=model, ---> 63 dtype=dtype, **theano_kwargs) 64 65 self.adapt_step_size = adapt_step_size ~/.local/lib/python3.5/site-packages/pymc3/step_methods/arraystep.py in __init__(self, vars, model, blocked, dtype, **theano_kwargs) 226 227 func = model.logp_dlogp_function( --> 228 vars, dtype=dtype, **theano_kwargs) 229 230 # handle edge case discovered in #2948 ~/.local/lib/python3.5/site-packages/pymc3/model.py in logp_dlogp_function(self, grad_vars, **kwargs) 707 varnames = [var.name for var in grad_vars] 708 extra_vars = [var for var in self.free_RVs if var.name not in varnames] --> 709 return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs) 710 711 @property ~/.local/lib/python3.5/site-packages/pymc3/model.py in __init__(self, cost, grad_vars, extra_vars, dtype, casting, **kwargs) 446 447 self._theano_function = theano.function( --> 448 inputs, [self._cost_joined, grad], givens=givens, **kwargs) 449 450 def set_extra_values(self, extra_vars): /usr/local/lib/python3.5/dist-packages/theano/compile/function.py in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input) 315 on_unused_input=on_unused_input, 316 profile=profile, --> 317 output_keys=output_keys) 318 return fn /usr/local/lib/python3.5/dist-packages/theano/compile/pfunc.py in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys) 484 accept_inplace=accept_inplace, name=name, 485 profile=profile, on_unused_input=on_unused_input, --> 486 output_keys=output_keys) 487 488 /usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys) 1839 name=name) 1840 with theano.change_flags(compute_test_value="off"): -> 1841 fn = m.create(defaults) 1842 finally: 1843 t2 = time.time() /usr/local/lib/python3.5/dist-packages/theano/compile/function_module.py in create(self, input_storage, trustme, storage_map) 1713 theano.config.traceback.limit = theano.config.traceback.compile_limit 1714 _fn, _i, _o = self.linker.make_thunk( -> 1715 input_storage=input_storage_lists, storage_map=storage_map) 1716 finally: 1717 theano.config.traceback.limit = limit_orig /usr/local/lib/python3.5/dist-packages/theano/gof/link.py in make_thunk(self, input_storage, output_storage, storage_map) 697 return self.make_all(input_storage=input_storage, 698 output_storage=output_storage, --> 699 storage_map=storage_map)[:3] 700 701 def make_all(self, input_storage, output_storage): /usr/local/lib/python3.5/dist-packages/theano/gof/vm.py in make_all(self, profiler, input_storage, output_storage, storage_map) 1089 compute_map, 1090 [], -> 1091 impl=impl)) 1092 linker_make_thunk_time[node] = time.time() - thunk_start 1093 if not hasattr(thunks[-1], 'lazy'): /usr/local/lib/python3.5/dist-packages/theano/gof/op.py in make_thunk(self, node, storage_map, compute_map, no_recycling, impl) 953 try: 954 return self.make_c_thunk(node, storage_map, compute_map, --> 955 no_recycling) 956 except (NotImplementedError, utils.MethodNotDefined): 957 # We requested the c code, so don't catch the error. /usr/local/lib/python3.5/dist-packages/theano/gof/op.py in make_c_thunk(self, node, storage_map, compute_map, no_recycling) 856 _logger.debug('Trying CLinker.make_thunk') 857 outputs = cl.make_thunk(input_storage=node_input_storage, --> 858 output_storage=node_output_storage) 859 thunk, node_input_filters, node_output_filters = outputs 860 /usr/local/lib/python3.5/dist-packages/theano/gof/cc.py in make_thunk(self, input_storage, output_storage, storage_map, keep_lock) 1215 cthunk, module, in_storage, out_storage, error_storage = self.__compile__( 1216 input_storage, output_storage, storage_map, -> 1217 keep_lock=keep_lock) 1218 1219 res = _CThunk(cthunk, init_tasks, tasks, error_storage, module) /usr/local/lib/python3.5/dist-packages/theano/gof/cc.py in __compile__(self, input_storage, output_storage, storage_map, keep_lock) 1155 output_storage, 1156 storage_map, -> 1157 keep_lock=keep_lock) 1158 return (thunk, 1159 module, /usr/local/lib/python3.5/dist-packages/theano/gof/cc.py in cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, keep_lock) 1618 node.op.prepare_node(node, storage_map, None, 'c') 1619 module = get_module_cache().module_from_key( -> 1620 key=key, lnk=self, keep_lock=keep_lock) 1621 1622 vars = self.inputs + self.outputs + self.orphans /usr/local/lib/python3.5/dist-packages/theano/gof/cmodule.py in module_from_key(self, key, lnk, keep_lock) 1145 # Is the source code already in the cache? 1146 module_hash = get_module_hash(src_code, key) -> 1147 module = self._get_from_hash(module_hash, key, keep_lock=keep_lock) 1148 if module is not None: 1149 return module /usr/local/lib/python3.5/dist-packages/theano/gof/cmodule.py in _get_from_hash(self, module_hash, key, keep_lock) 1055 if (key[0] and not key_broken and 1056 self.check_for_broken_eq): -> 1057 self.check_key(key, key_data.key_pkl) 1058 self._update_mappings(key, key_data, module.__file__, check_in_keys=not key_broken) 1059 return module /usr/local/lib/python3.5/dist-packages/theano/gof/cmodule.py in check_key(self, key, key_pkl) 1226 try: 1227 with open(key_pkl, 'rb') as f: -> 1228 key_data = pickle.load(f) 1229 break 1230 except EOFError: /usr/local/lib/python3.5/dist-packages/theano/scalar/basic.py in __setstate__(self, d) 4125 # self.perform will not work. 4126 self.prepare_node_called = set() -> 4127 self.init_fgraph() 4128 self.init_py_impls() 4129 /usr/local/lib/python3.5/dist-packages/theano/scalar/basic.py in init_fgraph(self) 3918 # the fgraph to be set to the variable as we need to pickle 3919 # them for the cache of c module to work. -> 3920 fgraph = FunctionGraph(self.inputs, self.outputs) 3921 gof.MergeOptimizer().optimize(fgraph) 3922 for node in fgraph.apply_nodes: /usr/local/lib/python3.5/dist-packages/theano/gof/fg.py in __init__(self, inputs, outputs, features, clone, update_mapping) 173 174 for output in outputs: --> 175 self.__import_r__(output, reason="init") 176 for i, output in enumerate(outputs): 177 output.clients.append(('output', i)) /usr/local/lib/python3.5/dist-packages/theano/gof/fg.py in __import_r__(self, variable, reason) 344 # Imports the owners of the variables 345 if variable.owner and variable.owner not in self.apply_nodes: --> 346 self.__import__(variable.owner, reason=reason) 347 elif (variable.owner is None and 348 not isinstance(variable, graph.Constant) and /usr/local/lib/python3.5/dist-packages/theano/gof/fg.py in __import__(self, apply_node, check, reason) 401 self.__setup_r__(output) 402 self.variables.add(output) --> 403 for i, input in enumerate(node.inputs): 404 if input not in self.variables: 405 self.__setup_r__(input) KeyboardInterrupt:
In [30]:
X1 = np.random.randn(size)
X2 = np.random.randn(size) * 0.2
x_temp = np.array([X1,X2])
x.set_value(x_temp)
In [31]:
ppc = pm.sample_ppc(trace, samples=50, model=basic_model)
y_obs = np.array(ppc['Y_obs'])
0%| | 0/50 [00:00<?, ?it/s]INFO (theano.gof.compilelock): Refreshing lock /home/pdguest/.theano/compiledir_Linux-4.13--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-3.5.2-64/lock_dir/lock 100%|██████████| 50/50 [00:00<00:00, 93.06it/s]
In [ ]: