July, 2020

Multi-Regimes Fundamental Diagram – New Calibration Approach ¶

Emmanuel Kidando, Alican Karaer, Boniphace Kutela, Angela E. Kitali, Ren Moses, Eren E. Ozguven & Thobias Sando

Highlights ¶

For almost a century, several models have been developed to calibrate the pairwise relationship among traffic flow variables, i.e., speed, density, and flow.

Multi-regime models are well known for being superior in fitting the speed-density relationship over single-regime models.

However, in modeling multi-regimes models, breakpoints separating the regimes are generally estimated by a model developer based on the subjective judgment of visual data characteristics.

A new calibration approach based on Bayesian inference is introduced for fitting the multi-regimes models.

The superiority of the new calibration approach is the ability to locate the breakpoints in the multi-regime considering the specific data characteristics.

More information: ¶

https://journals.sagepub.com/doi/10.1177/0361198120930221

Python Code ¶

If you want to replicate tha analysis feel free to use the code.

import pandas as pd, matplotlib.pyplot as plt, seaborn as sns
%pylab inline
%config InlineBackend.figure_format = 'retina'

Populating the interactive namespace from numpy and matplotlib

from rethinking import MAP, coef, extract_samples, link, precis, sim, vcov

# Import the "Default" dataset.
data = pd.read_csv("2262-3.csv")[['occupancy','speed']]
data.head()

import time
import torch
import numpy as onp

import jax.numpy as np
from jax import jit, random
# NB: replace gpu by cpu to run this notebook in cpu
from jax.config import config; config.update("jax_platform_name", "cpu")

import numpyro.distributions as dist
from numpyro.diagnostics import summary
import arviz as az
from numpyro.handlers import sample
from numpyro.hmc_util import initialize_model
from numpyro.mcmc import hmc, mcmc
from numpyro.util import fori_collect

%config InlineBackend.figure_format = 'retina'

occupancy = data.occupancy
speed = data.speed

f, ax = plt.subplots(1, 1, figsize=(10, 5))

plt.scatter(occupancy,speed, color='g', s=50)

<matplotlib.collections.PathCollection at 0x7ff3ecd3b668>

def sigmoid_f(x):
    return 1 / (1 + np.exp(-x))


def weight_f(w, mu1, mu2):
                 
    mu = (1.0 - w) * mu1 +  w * mu2              
    return mu

def weight_three(w1, w2, mu1, mu2, mu3):
    out_ = (1.0 - w1) * mu1 +  w1 * mu2              
    mu = (1.0 - w2) * out_ +  w2 * mu3              
    return mu

from jax.scipy.special import erf

def cumulative_normal(x):
    'Cumulative distribution function for the standard normal distribution'
    return  0.5 + 0.5 * erf(x / math.sqrt(2.0))

Two-regime model¶

def model_twoRegime(occupancy, speed):
      
    α1 = sample('α1', dist.Normal(0, 10))
    α2 = sample('α2', dist.Normal(0, 10))
    
    β1 = sample('β1', dist.Normal(0, 10))
    β2 = sample('β2', dist.Normal(0, 10))
    
    σ1  = sample('σ1', dist.HalfCauchy(0.5)) #Noise
    σ2  = sample('σ2', dist.HalfCauchy(0.5))
    
    switch_point = sample("switch_point", dist.Uniform(occupancy.min(), occupancy.max()))

    mu1 = α1 + β1 * occupancy
    mu2 = α2 + β2 * occupancy
    
    
    w = cumulative_normal(occupancy - switch_point)
    
    mu = weight_f(w, mu1, mu2)
    sigma = weight_f(w, σ1, σ2) 
    
    
    return sample("obs", dist.Normal(mu, sigma), obs = speed)

'''`hmc` is implemented (default).'''

init_params, potential_fn, constrain_fn = initialize_model(random.PRNGKey(1), model_twoRegime,
                                                           occupancy.values, speed.values)
num_warmup, num_samples = 1000, 1000
samples_twoRegime = mcmc(num_warmup, num_samples, init_params,
                         potential_fn=potential_fn,
                         constrain_fn=constrain_fn,
                         print_summary=False)

warmup: 100%|██████████| 1000/1000 [01:49<00:00, 25.77it/s, 31 steps of size 8.42e-02. acc. prob=0.80] 
sample: 100%|██████████| 1000/1000 [00:23<00:00, 42.72it/s, 63 steps of size 8.42e-02. acc. prob=0.86]

summary(samples_twoRegime, prob=0.95)


                           mean         sd       2.5%      97.5%      n_eff       Rhat
        switch_point      20.62       0.11      20.44      20.86     263.91       1.00
                  α1      88.38       0.45      87.52      89.26     260.35       1.00
                  α2      75.65       0.74      74.30      77.09     277.86       1.00
                  β1      -1.69       0.03      -1.74      -1.63     253.94       1.00
                  β2      -1.59       0.03      -1.64      -1.54     283.63       1.00
                  σ1       4.82       0.05       4.73       4.92     641.28       1.00
                  σ2       4.33       0.08       4.19       4.50     553.21       1.00

Three-regime model¶

def model_threeRegime(occupancy, speed):
      
    α1 = sample('α1', dist.Normal(0, 10))
    α2 = sample('α2', dist.Normal(0, 10))
    α3 = sample('α3', dist.Normal(0, 10))
    
    β1 = sample('β1', dist.Normal(0, 10))
    β2 = sample('β2', dist.Normal(0, 10))
    β3 = sample('β3', dist.Normal(0, 10))
    
    
    σ1  = sample('σ1', dist.HalfCauchy(0.5)) #Noise
    σ2  = sample('σ2', dist.HalfCauchy(0.5))
    σ3  = sample('σ3', dist.HalfCauchy(0.5))
    
    switch_point1 = sample("switch_point1", dist.Uniform(occupancy.min(), occupancy.max()))
    switch_point2 = sample("switch_point2", dist.Uniform(17, occupancy.max()))
    
    mu1 = α1 + β1 * occupancy
    mu2 = α2 + β2 * occupancy
    mu3 = α3 + β3 * occupancy
    
    w1 =cumulative_normal(occupancy - switch_point1)
    w2 = cumulative_normal(occupancy - switch_point2)
    
    mu = weight_three(w1, w2, mu1, mu2, mu3)
    sigma = weight_three(w1, w2, σ1, σ2, σ3)
    
    return sample("obs", dist.Normal(mu, sigma), obs = speed)

'''`hmc` is implemented (default).'''

init_params, potential_fn, constrain_fn = initialize_model(random.PRNGKey(5), model_threeRegime,
                                                           occupancy.values, speed.values)
num_warmup, num_samples = 1000, 1000
samples_threeRegime = mcmc(num_warmup, num_samples, init_params,
                           potential_fn=potential_fn,
                           constrain_fn=constrain_fn,
                           print_summary=False)

warmup: 100%|██████████| 1000/1000 [06:00<00:00,  2.77it/s, 63 steps of size 6.41e-02. acc. prob=0.80] 
sample: 100%|██████████| 1000/1000 [01:01<00:00, 16.24it/s, 31 steps of size 6.41e-02. acc. prob=0.87]

summary(samples_threeRegime, prob=0.95)


                           mean         sd       2.5%      97.5%      n_eff       Rhat
       switch_point1      21.01       0.12      20.78      21.25     327.97       1.00
       switch_point2      18.53       0.11      18.29      18.75     251.91       1.00
                  α1      83.09       0.54      82.03      84.13     321.89       1.00
                  α2       1.43       9.27     -14.66      20.50     953.43       1.00
                  α3      77.44       0.78      75.98      79.01     306.45       1.00
                  β1      -1.27       0.04      -1.34      -1.19     305.69       1.00
                  β2      31.32       4.30      22.94      39.77     699.94       1.00
                  β3      -1.64       0.03      -1.69      -1.59     321.75       1.00
                  σ1       4.34       0.07       4.21       4.48     364.16       1.00
                  σ2     121.80      22.28      76.19     165.30     554.30       1.00
                  σ3       4.54       0.08       4.38       4.71     692.78       1.00

Constant - Exponential model¶

def ConstantExponential(occupancy, speed):
      
    α1 = sample('α1', dist.Normal(0, 10))
       
    β1 = sample('β1', dist.Normal(0, 10))
    theta = sample('theta', dist.Normal(0, 10))
    σ1  = sample('σ1', dist.HalfCauchy(0.5)) #Noise
    σ2  = sample('σ2', dist.HalfCauchy(0.5))
   
    switch_point1 = sample("switch_point1", dist.Uniform(occupancy.min(), occupancy.max()))
    
    mu1 = β1
    mu2 = α1 * np.exp(-occupancy/theta) 
    
    w = cumulative_normal(occupancy - switch_point1)
    
    mu = weight_f(w, mu1, mu2)
    sigma = weight_f(w, σ1, σ2) 
    
    return sample("obs", dist.Normal(mu, sigma), obs = speed)

'''`hmc` is implemented (default).'''

init_params, potential_fn, constrain_fn = initialize_model(random.PRNGKey(10), ConstantExponential,
                                                           occupancy.values, speed.values)
num_warmup, num_samples = 1000, 1000
ConstantExponentials = mcmc(num_warmup, num_samples, init_params,
                sampler='hmc',
                potential_fn=potential_fn,
                constrain_fn=constrain_fn,
               #target_accept_prob=0.9, max_tree_depth=12,
                print_summary=False)

warmup: 100%|██████████| 1000/1000 [01:21<00:00, 12.23it/s, 127 steps of size 1.23e-01. acc. prob=0.80]
sample: 100%|██████████| 1000/1000 [00:22<00:00, 44.31it/s, 31 steps of size 1.23e-01. acc. prob=0.87]

summary(ConstantExponentials, prob=0.95)


                           mean         sd       2.5%      97.5%      n_eff       Rhat
       switch_point1      14.91       0.10      14.71      15.09     671.87       1.00
               theta      16.94       0.11      16.74      17.17     449.12       1.00
                  α1     161.77       1.24     159.44     164.02     439.07       1.00
                  β1      67.95       0.13      67.72      68.20     550.50       1.00
                  σ1       4.01       0.08       3.88       4.17     634.06       1.00
                  σ2       4.95       0.05       4.85       5.03     696.36       1.00

Linear Exponential¶

def LinearExponential(occupancy, speed):
      
    α1 = sample('α1', dist.Normal(0, 10))
       
    β1 = sample('β1', dist.Normal(0, 10))
    β2 = sample('β2', dist.Normal(0, 10))
    theta = sample('theta', dist.Normal(0, 10))
    σ1  = sample('σ1', dist.HalfCauchy(0.5)) #Noise
    σ2  = sample('σ2', dist.HalfCauchy(0.5))
   
    switch_point1 = sample("switch_point1", dist.Uniform(occupancy.min(), occupancy.max()))
    
    mu1 = β1 + β2 * occupancy
    mu2 = α1 * np.exp(-occupancy/theta) 
    
    w = cumulative_normal(occupancy - switch_point1)
    
    mu = weight_f(w, mu1, mu2)
    sigma = weight_f(w, σ1, σ2) 
    
    return sample("obs", dist.Normal(mu, sigma), obs = speed)

'''`hmc` is implemented (default).'''

init_params, potential_fn, constrain_fn = initialize_model(random.PRNGKey(10), LinearExponential,
                                                           occupancy.values, speed.values)
num_warmup, num_samples = 1000, 1000
LinearExponentialS = mcmc(num_warmup, num_samples, init_params,
                sampler='hmc',
                potential_fn=potential_fn,
                constrain_fn=constrain_fn,
               #target_accept_prob=0.9, max_tree_depth=12,
                print_summary=False)

warmup: 100%|██████████| 1000/1000 [05:26<00:00,  3.06it/s, 63 steps of size 9.73e-03. acc. prob=0.80] 
sample: 100%|██████████| 1000/1000 [00:35<00:00, 28.36it/s, 63 steps of size 9.73e-03. acc. prob=0.87]

summary(LinearExponentialS, prob=0.95)


                           mean         sd       2.5%      97.5%      n_eff       Rhat
       switch_point1      16.09       0.11      15.88      16.29     905.71       1.00
               theta      16.19       0.12      15.95      16.40     456.70       1.00
                  α1     171.84       1.52     168.90     174.78     437.30       1.00
                  β1      82.15       0.44      81.36      83.01     589.86       1.00
                  β2      -1.19       0.03      -1.25      -1.12     566.66       1.00
                  σ1       3.49       0.06       3.37       3.62     599.73       1.00
                  σ2       4.99       0.05       4.90       5.09     632.50       1.00

Modified Greenberg model¶

def ModifiedGreenbergmodel(occupancy, speed):
      
    α1 = sample('α1', dist.Normal(0, 10))
       
    β1 = sample('β1', dist.Normal(0, 10))
    β2 = sample('β2', dist.Normal(0, 10))
    
    σ1  = sample('σ1', dist.HalfCauchy(0.5)) #Noise
    σ2  = sample('σ2', dist.HalfCauchy(0.5))
   
    switch_point1 = sample("switch_point1", dist.Uniform(occupancy.min(), occupancy.max()))
    
    mu1 = α1 
    mu2 = β1 + β2 * np.log(occupancy)
    
    w = cumulative_normal(occupancy - switch_point1)
    
    mu = weight_f(w, mu1, mu2)
    sigma = weight_f(w, σ1, σ2) 
    
    return sample("obs", dist.Normal(mu, sigma), obs = speed)

'''`hmc` is implemented (default).'''

# Start from this source of randomness. We will split keys for subsequent operations.
rng = random.PRNGKey(5)
rng_, rng = random.split(rng)
init_params, potential_fn, constrain_fn = initialize_model(rng_, ModifiedGreenbergmodel,
                                                           occupancy.values, speed.values)
num_warmup, num_samples = 1000, 1000
ModifiedGreenbergsamples = mcmc(num_warmup, num_samples, init_params,
                sampler='hmc',
                potential_fn=potential_fn,
                constrain_fn=constrain_fn,
               #target_accept_prob=0.9, max_tree_depth=12,
                print_summary=False)

warmup: 100%|██████████| 1000/1000 [02:01<00:00,  8.24it/s, 223 steps of size 9.43e-03. acc. prob=0.81]
sample: 100%|██████████| 1000/1000 [00:47<00:00, 21.21it/s, 31 steps of size 9.43e-03. acc. prob=0.87]

summary(ModifiedGreenbergsamples, prob=0.95)


                           mean         sd       2.5%      97.5%      n_eff       Rhat
       switch_point1      14.80       0.11      14.61      15.02     449.76       1.00
                  α1      68.15       0.12      67.92      68.39     876.46       1.00
                  β1     216.83       0.88     215.08     218.46     238.50       1.00
                  β2     -55.69       0.29     -56.23     -55.10     238.42       1.00
                  σ1       3.94       0.08       3.80       4.09     784.13       1.00
                  σ2       4.98       0.04       4.90       5.06     110.32       1.00

Eddies Model¶

def Eddiesmodel(occupancy, speed):
      
    α1 = sample('α1', dist.Normal(0, 10))
    theta = sample('theta', dist.Normal(0, 10))   
    β1 = sample('β1', dist.Normal(0, 10))
    β2 = sample('β2', dist.Normal(0, 10))
    
    σ1  = sample('σ1', dist.HalfCauchy(0.5)) #Noise
    σ2  = sample('σ2', dist.HalfCauchy(0.5))
   
    switch_point1 = sample("switch_point1", dist.Uniform(occupancy.min(), occupancy.max()))
    
    mu1 = α1 * np.exp(-occupancy/theta)
    mu2 = β1 + β2 * np.log(occupancy)
    
    w = sigmoid_f(occupancy - switch_point1)
    
    mu = weight_f(w, mu1, mu2)
    sigma = weight_f(w, σ1, σ2) 
    
    return sample("obs", dist.Normal(mu, sigma), obs = speed)

'''`hmc` is implemented (default).'''

# Start from this source of randomness. We will split keys for subsequent operations.
rng = random.PRNGKey(2)
rng_, rng = random.split(rng)
init_params, potential_fn, constrain_fn = initialize_model(rng_, Eddiesmodel,
                                                           occupancy.values, speed.values)
num_warmup, num_samples = 1000, 1000
Eddies_samples = mcmc(num_warmup, num_samples, init_params,
                sampler='hmc',
                potential_fn=potential_fn,
                constrain_fn=constrain_fn,
               #target_accept_prob=0.9, max_tree_depth=12,
                print_summary=False)

warmup: 100%|██████████| 1000/1000 [02:25<00:00, 18.89it/s, 255 steps of size 1.17e-02. acc. prob=0.80] 
sample: 100%|██████████| 1000/1000 [00:57<00:00, 19.68it/s, 31 steps of size 1.17e-02. acc. prob=0.87]

summary(Eddies_samples, prob=0.95)


                           mean         sd       2.5%      97.5%      n_eff       Rhat
       switch_point1      16.67       0.20      16.32      17.11     217.92       1.01
               theta      63.43       1.94      59.30      66.94     495.05       1.00
                  α1      82.06       0.51      81.15      83.11     533.52       1.00
                  β1     213.68       1.91     209.72     217.19     116.87       1.01
                  β2     -54.82       0.59     -55.89     -53.57     116.76       1.01
                  σ1       3.60       0.08       3.42       3.75     487.77       1.00
                  σ2       5.16       0.06       5.05       5.28     247.78       1.01

Linear logarithm Model¶

def Linearlogarithm(occupancy, speed):
      
    α1 = sample('α1', dist.Normal(0, 10))
    α2 = sample('α2', dist.Normal(0, 10))
    
    β1 = sample('β1', dist.Normal(0, 10))
    β2 = sample('β2', dist.Normal(0, 10))
    
    σ1  = sample('σ1', dist.HalfCauchy(0.5)) #Noise
    σ2  = sample('σ2', dist.HalfCauchy(0.5))
   
    switch_point1 = sample("switch_point1", dist.Uniform(occupancy.min(), occupancy.max()))
    
    mu1 = α1 + α2 * occupancy
    mu2 = β1 + β2 * np.log(occupancy)
    
    w = cumulative_normal(occupancy - switch_point1)
    
    mu = weight_f(w, mu1, mu2)
    sigma = weight_f(w, σ1, σ2) 
    
    return sample("obs", dist.Normal(mu, sigma), obs = speed)

'''`hmc` is implemented (default).'''

# Start from this source of randomness. We will split keys for subsequent operations.
rng = random.PRNGKey(4)
rng_, rng = random.split(rng)
init_params, potential_fn, constrain_fn = initialize_model(rng_, Linearlogarithm,
                                                           occupancy.values, speed.values)
num_warmup, num_samples = 1000, 1000
Linearlogarithmsamples = mcmc(num_warmup, num_samples, init_params,
                sampler='hmc',
                potential_fn=potential_fn,
                constrain_fn=constrain_fn,
               #target_accept_prob=0.9, max_tree_depth=12,
                print_summary=False)

warmup: 100%|██████████| 1000/1000 [01:19<00:00, 29.73it/s, 31 steps of size 4.96e-02. acc. prob=0.80] 
sample: 100%|██████████| 1000/1000 [00:46<00:00, 21.50it/s, 63 steps of size 4.96e-02. acc. prob=0.86]

summary(Linearlogarithmsamples, prob=0.95)


                           mean         sd       2.5%      97.5%      n_eff       Rhat
       switch_point1      16.17       0.12      15.93      16.40     784.00       1.00
                  α1      81.24       0.44      80.39      82.05     389.33       1.00
                  α2      -1.11       0.03      -1.17      -1.04     354.98       1.00
                  β1     220.36       1.11     218.34     222.40     357.82       1.00
                  β2     -56.82       0.36     -57.48     -56.17     362.75       1.00
                  σ1       3.47       0.06       3.35       3.59     807.37       1.00
                  σ2       5.09       0.05       4.98       5.18     643.51       1.00

Posterior Predictive Distribution¶

def LinearExponential_predi(switch_point, α1,theta, β1, β2, occupancy):
    
    mu1 = β1 + β2 * occupancy
    mu2 = α1 * np.exp(-occupancy/theta) 
    w = cumulative_normal(occupancy - switch_point)
    mu = weight_f(w, mu1, mu2)      
    
    return mu

def TwoRegime_predi(switch_point, α1, α2, β1, β2, occupancy):
    
    mu1 = α1 + β1 * occupancy
    mu2 = α2 + β2 * occupancy
    
    w = cumulative_normal(occupancy - switch_point)
    mu = weight_f(w, mu1, mu2)            
    
    return mu

def ThreeRegime_predi(switch_point1, switch_point2, α1, α2, α3, β1, β2, β3, occupancy):
    
    mu1 = α1 + β1 * occupancy
    mu2 = α2 + β2 * occupancy
    mu3 = α3 + β3 * occupancy

    w1 =cumulative_normal(occupancy - switch_point1)
    w2 = cumulative_normal(occupancy - switch_point2)
    mu = weight_three(w1, w2, mu1, mu2, mu3)     
    
    return mu

def ConstantExponential_predi(switch_point, α1,theta, β1, occupancy):
    
    mu1 = β1
    mu2 = α1 * np.exp(-occupancy/theta) 
    w = cumulative_normal(occupancy - switch_point)
    mu = weight_f(w, mu1, mu2)      
    
    return mu

def ModifiedGreenberg_predi(switch_point, α, β1, β2, occupancy):
    
    mu1 = α 
    mu2 = β1 + β2 * np.log(occupancy)
    w = cumulative_normal(occupancy - switch_point)
    mu = weight_f(w, mu1, mu2)      
    
    return mu


def Eddiesmodel_predi(switch_point, α1, theta, β1, β2, occupancy):
    
    mu1 = α1 * np.exp(-occupancy/theta)
    mu2 = β1 + β2 * np.log(occupancy)
    w =sigmoid_f(occupancy - switch_point)
    mu = weight_f(w, mu1, mu2)      
    
    return mu


def Linearlogarithm_predi(switch_point, α1, α2, β1, β2, occupancy):
    
    mu1 = α1 + α2 * occupancy
    mu2 = β1 + β2 * np.log(occupancy)
    w = cumulative_normal(occupancy - switch_point)
    mu = weight_f(w, mu1, mu2)      
    
    return mu

f, ax = plt.subplots(figsize=(14, 16))

plt.subplot(421)
plt.scatter(occupancy,speed, color='r', s=50, label = 'Data scatter')
#plt.xlabel("Occupancy (%)"),
plt.ylabel("Traffic speed (mph)")

x = np.linspace(occupancy.min(), occupancy.max(), 1000)
for i in range(100):
    sns.lineplot(x, TwoRegime_predi(samples_twoRegime['switch_point'][i],
                                   samples_twoRegime['α1'][i],
                                   samples_twoRegime['α2'][i],
                                   samples_twoRegime['β1'][i],
                                   samples_twoRegime['β2'][i],
                                   x),
                 color="k",
                 alpha=0.3)
occupancyBreakdown = samples_twoRegime['switch_point'].mean()



BreakpointSpeed = TwoRegime_predi(samples_twoRegime['switch_point'].mean(),
                                         samples_twoRegime['α1'].mean(),
                                         samples_twoRegime['α2'].mean(),
                                         samples_twoRegime['β1'].mean(),
                                         samples_twoRegime['β2'].mean(),
                                         occupancyBreakdown)

plt.vlines(x=occupancyBreakdown, ymin=0, ymax=BreakpointSpeed, color='g',label='Occupancy threshold = {:.2f} %'.format(occupancyBreakdown),linewidth=2,linestyle='-.')

plt.hlines(BreakpointSpeed, xmin=0, xmax=occupancyBreakdown, color='b',label='Breakpoint speed = {:.2f} mph'.format(BreakpointSpeed),linewidth=2,linestyle='--')
    
plt.xlim(0, 65)
plt.ylim(0, 85)
plt.legend(loc=1,fontsize=12, frameon=False);
plt.title("Two-regime model", fontsize=14)
##########################@@@@ Three-regime model @@@@@@###########################################################
plt.subplot(422)
plt.scatter(occupancy,speed, color='b', s=50, label = 'Data scatter')
#plt.xlabel("Occupancy (%)"), plt.ylabel("Traffic speed")

# plot posterior predictive lines
x = np.linspace(occupancy.min(), occupancy.max(), 1000)
for i in range(100):
    sns.lineplot(x, ThreeRegime_predi(samples_threeRegime['switch_point1'][i],
                                   samples_threeRegime['switch_point2'][i],
                                   samples_threeRegime['α1'][i],
                                   samples_threeRegime['α2'][i],
                                   samples_threeRegime['α3'][i],
                                   samples_threeRegime['β1'][i],
                                   samples_threeRegime['β2'][i],
                                   samples_threeRegime['β3'][i],
                                   x),
                 color="chocolate",
                 alpha=0.3)

occupancyBreakdown = samples_threeRegime['switch_point1'].mean()
BreakpointSpeed = ThreeRegime_predi(samples_threeRegime['switch_point1'][i],
                                   samples_threeRegime['switch_point2'][i],
                                   samples_threeRegime['α1'][i],
                                   samples_threeRegime['α2'][i],
                                   samples_threeRegime['α3'][i],
                                   samples_threeRegime['β1'][i],
                                   samples_threeRegime['β2'][i],
                                   samples_threeRegime['β3'][i],
                                         occupancyBreakdown)

occupancyBreakdown2 = samples_threeRegime['switch_point2'].mean()
BreakpointSpeed2 = ThreeRegime_predi(samples_threeRegime['switch_point1'].mean(),
                                 samples_threeRegime['switch_point2'].mean(),
                                         samples_threeRegime['α1'].mean(),
                                         samples_threeRegime['α2'].mean(),
                                         samples_threeRegime['α3'].mean(),
                                         samples_threeRegime['β1'].mean(),
                                         samples_threeRegime['β2'].mean(),
                                         samples_threeRegime['β3'].mean(),
                                         occupancyBreakdown2)
plt.vlines(x=occupancyBreakdown, ymin=0, ymax=BreakpointSpeed, color='r',label='Occupancy threshold 2 = {:.2f} %'.format(occupancyBreakdown),linewidth=2,linestyle='-.')

plt.hlines(BreakpointSpeed, xmin=0, xmax=occupancyBreakdown, color='g',label='Breakpoint speed 2 = {:.2f} mph'.format(BreakpointSpeed),linewidth=2,linestyle='--')

plt.vlines(x=occupancyBreakdown2, ymin=0, ymax=BreakpointSpeed2, color='#c71585',label='Occupancy threshold 1 = {:.2f} %'.format(occupancyBreakdown2),linewidth=2,linestyle='-.')

plt.hlines(BreakpointSpeed2, xmin=0, xmax=occupancyBreakdown2, color='orange',label='Breakpoint speed 1 = {:.2f} mph'.format(BreakpointSpeed2),linewidth=2,linestyle='--')
      
plt.xlim(0, 65)
plt.ylim(0, 85)
plt.legend(loc=1,fontsize=12, frameon=False);
plt.title("Three-regime model", fontsize=14)

##########################@@@@ Modified Greenberg model@@@@@@###########################################################
plt.subplot(423)
plt.scatter(occupancy,speed, color='m', s=50, label = 'Data scatter')
#plt.xlabel("Occupancy (%)")
plt.ylabel("Traffic speed (mph)")

# plot posterior predictive lines
x = np.linspace(occupancy.min(), occupancy.max(), 1000)
for i in range(100):
    sns.lineplot(x, ModifiedGreenberg_predi(ModifiedGreenbergsamples['switch_point1'][i],
                                   ModifiedGreenbergsamples['α1'][i],
                                   ModifiedGreenbergsamples['β1'][i],
                                   ModifiedGreenbergsamples['β2'][i],
                           
                                   x),
                 color="k",
                 alpha=0.3)
occupancyBreakdown = ModifiedGreenbergsamples['switch_point1'].mean()



BreakpointSpeed = ModifiedGreenberg_predi(ModifiedGreenbergsamples['switch_point1'].mean(),
                                         ModifiedGreenbergsamples['α1'].mean(),
                                         ModifiedGreenbergsamples['β1'].mean(),
                                         ModifiedGreenbergsamples['β2'].mean(),
                                         occupancyBreakdown)

plt.vlines(x=occupancyBreakdown, ymin=0, ymax=BreakpointSpeed, color='r',label='Occupancy threshold = {:.2f} %'.format(occupancyBreakdown),linewidth=2,linestyle='-.')

plt.hlines(BreakpointSpeed, xmin=0, xmax=occupancyBreakdown, color='b',label='Breakpoint speed = {:.2f} mph'.format(BreakpointSpeed),linewidth=2,linestyle='--')
    
plt.xlim(0, 65)
plt.ylim(0, 85)
plt.legend(loc=1,fontsize=12, frameon=False);
plt.title("Modified Greenberg model", fontsize=14)

##########################@@@@ Eddies model@@@@@@###########################################################
plt.subplot(424)
plt.scatter(occupancy,speed, color='tan', s=50, label = 'Data scatter')
#plt.xlabel("Occupancy (%)"), plt.ylabel("Traffic speed")

# plot posterior predictive lines
x = np.linspace(occupancy.min(), occupancy.max(), 1000)
for i in range(100):
    sns.lineplot(x, Eddiesmodel_predi(Eddies_samples['switch_point1'][i],
                                   Eddies_samples['α1'][i],
                                   Eddies_samples['theta'][i],
                                   Eddies_samples['β1'][i],
                                   Eddies_samples['β2'][i],
                           
                                   x),
                 color="k",
                 alpha=0.3)
occupancyBreakdown = Eddies_samples['switch_point1'].mean()



BreakpointSpeed = Eddiesmodel_predi(Eddies_samples['switch_point1'].mean(),
                                         Eddies_samples['α1'].mean(),
                                         Eddies_samples['theta'].mean(),
                                         Eddies_samples['β1'].mean(),
                                         Eddies_samples['β2'].mean(),
                                         occupancyBreakdown)

plt.vlines(x=occupancyBreakdown, ymin=0, ymax=BreakpointSpeed, color='r',label='Occupancy threshold = {:.2f} %'.format(occupancyBreakdown),linewidth=2,linestyle='-.')

plt.hlines(BreakpointSpeed, xmin=0, xmax=occupancyBreakdown, color='b',label='Breakpoint speed = {:.2f} mph'.format(BreakpointSpeed),linewidth=2,linestyle='--')
    
plt.xlim(0, 65)
plt.ylim(0, 85)
plt.legend(loc=1,fontsize=12, frameon=False);
plt.title("Edie model", fontsize=14)


##########################@@@@ Constant-Exponential model@@@@@@###########################################################
plt.subplot(425)
plt.scatter(occupancy,speed, color='k', s=50, label = 'Data scatter')
#plt.xlabel("Occupancy (%)")
plt.ylabel("Traffic speed (mph)")

# plot posterior predictive lines
x = np.linspace(occupancy.min(), occupancy.max(), 1000)
for i in range(100):
    sns.lineplot(x, ConstantExponential_predi(ConstantExponentials['switch_point1'][i],
                                   ConstantExponentials['α1'][i],
                                   ConstantExponentials['theta'][i],
                                   ConstantExponentials['β1'][i],
                                   x),
                 color="chocolate",
                 alpha=0.3)

occupancyBreakdown = ConstantExponentials['switch_point1'].mean()



BreakpointSpeed = ConstantExponential_predi(ConstantExponentials['switch_point1'].mean(),
                                         ConstantExponentials['α1'].mean(),
                                         ConstantExponentials['theta'].mean(),
                                         ConstantExponentials['β1'].mean(),
                                         occupancyBreakdown)


plt.vlines(x=occupancyBreakdown, ymin=0, ymax=BreakpointSpeed, color='r',label='Occupancy threshold = {:.2f} %'.format(occupancyBreakdown),linewidth=2,linestyle='-.')

plt.hlines(BreakpointSpeed, xmin=0, xmax=occupancyBreakdown, color='b',label='Breakpoint speed = {:.2f} mph'.format(BreakpointSpeed),linewidth=2,linestyle='--')
    
plt.xlim(0, 65)
plt.ylim(0, 85)
plt.legend(loc=1,fontsize=12, frameon=False);
plt.title("Constant-Exponential model", fontsize=14)

##########################@@@@ Linear-Exponential model@@@@@@###########################################################
plt.subplot(426)
plt.scatter(occupancy,speed, color='g', s=50, label = 'Data scatter')
plt.xlabel("Occupancy (%)"), #plt.ylabel("Traffic speed")

# plot posterior predictive lines
x = np.linspace(occupancy.min(), occupancy.max(), 1000)
for i in range(100):
    sns.lineplot(x, LinearExponential_predi(LinearExponentialS['switch_point1'][i],
                                   LinearExponentialS['α1'][i],
                                   LinearExponentialS['theta'][i],
                                   LinearExponentialS['β1'][i],
                                   LinearExponentialS['β2'][i],
                                   x),
                 color="r",
                 alpha=0.3)

occupancyBreakdown = LinearExponentialS['switch_point1'].mean()



BreakpointSpeed = LinearExponential_predi(LinearExponentialS['switch_point1'].mean(),
                                         LinearExponentialS['α1'].mean(),
                                         LinearExponentialS['theta'].mean(),
                                         LinearExponentialS['β1'].mean(),
                                         LinearExponentialS['β2'].mean(),
                                         occupancyBreakdown)


plt.vlines(x=occupancyBreakdown, ymin=0, ymax=BreakpointSpeed, color='r',label='Occupancy threshold = {:.2f} %'.format(occupancyBreakdown),linewidth=2,linestyle='-.')

plt.hlines(BreakpointSpeed, xmin=0, xmax=occupancyBreakdown, color='b',label='Breakpoint speed = {:.2f} mph'.format(BreakpointSpeed),linewidth=2,linestyle='--')
    
plt.xlim(0, 65)
plt.ylim(0, 85)
plt.legend(loc=1,fontsize=12, frameon=False);
plt.title("Linear-Exponential model", fontsize=14)


##########################@@@@ Linear-Logarithmic model@@@@@@###########################################################
plt.subplot(427)
plt.scatter(occupancy,speed, color='grey', s=50, label = 'Data scatter')
plt.xlabel("Occupancy (%)"), plt.ylabel("Traffic speed (mph)")

# plot posterior predictive lines
x = np.linspace(occupancy.min(), occupancy.max(), 1000)
for i in range(100):
    sns.lineplot(x, Linearlogarithm_predi(Linearlogarithmsamples['switch_point1'][i],
                                          Linearlogarithmsamples['α1'][i],
                                          Linearlogarithmsamples['α2'][i],
                                          Linearlogarithmsamples['β1'][i],
                                          Linearlogarithmsamples['β2'][i],
                                         x),
                 color="chocolate",
                 alpha=0.3)

occupancyBreakdown = Linearlogarithmsamples['switch_point1'].mean()



BreakpointSpeed = Linearlogarithm_predi(Linearlogarithmsamples['switch_point1'][i],
                                        Linearlogarithmsamples['α1'][i],
                                        Linearlogarithmsamples['α2'][i],
                                        Linearlogarithmsamples['β1'][i],
                                        Linearlogarithmsamples['β2'][i],
                                        occupancyBreakdown)


plt.vlines(x=occupancyBreakdown, ymin=0, ymax=BreakpointSpeed, color='r',label='Occupancy threshold = {:.2f} %'.format(occupancyBreakdown),linewidth=2,linestyle='-.')

plt.hlines(BreakpointSpeed, xmin=0, xmax=occupancyBreakdown, color='b',label='Breakpoint speed = {:.2f} mph'.format(BreakpointSpeed),linewidth=2,linestyle='--')
    
plt.xlim(0, 65)
plt.ylim(0, 85)
plt.legend(loc=1,fontsize=12, frameon=False);
plt.title("Linear-Logarithmic model", fontsize=14)


plt.tight_layout()

Posterior distribution of Breakpoints¶

from numpyro.diagnostics import hpdi
plt.style.use("seaborn-dark")

f, ax = plt.subplots(figsize=(14, 16))

plt.subplot(421)
cred_mass = 0.95
breakpoints = TwoRegime_predi(samples_twoRegime['switch_point'],
                              samples_twoRegime['α1'],
                              samples_twoRegime['α2'],
                              samples_twoRegime['β1'],
                              samples_twoRegime['β2'],
                              samples_twoRegime['switch_point'])
MEAN = breakpoints.mean()

# Computes “highest posterior density interval” (HPDI)
HDI = hpdi(breakpoints, cred_mass)

sns.distplot(breakpoints, hist = False, kde = True,
            kde_kws={"lw": 2})
#plt.ylabel("Data density")
plt.title("Two-regime speed threshold")

frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
plot_height = frame.get_ylim()[1]
plt.plot(HDI, [0, 0], linewidth=6, color='r') 
plt.text(HDI[1],
            plot_height * 0.07,
            HDI[1].round(2),
           
            horizontalalignment="center",
        )
plt.text(HDI[0],
            plot_height * 0.07,
            HDI[0].round(2),
           
            horizontalalignment="center",
        )
plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.3,
            '95%' + " HPD",
            
            horizontalalignment="center",
        )

plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.8,
            'Mean = %.2f' % MEAN,
            
            horizontalalignment="center",
        )

    
# plt.xlim(0, 65)
# plt.ylim(-0.01, 1.30)

# ##########################@@@@ Three-regime model @@@@@@###########################################################
plt.subplot(422)
cred_mass = 0.95
breakpoints1 = ThreeRegime_predi(samples_threeRegime['switch_point1'],
                                   samples_threeRegime['switch_point2'],
                                   samples_threeRegime['α1'],
                                   samples_threeRegime['α2'],
                                   samples_threeRegime['α3'],
                                   samples_threeRegime['β1'],
                                   samples_threeRegime['β2'],
                                   samples_threeRegime['β3'],
                                   samples_threeRegime['switch_point1'])



MEAN = breakpoints1.mean()
plot_height = ax.get_ylim()[1]
# Computes “highest posterior density interval” (HPDI)
HDI = hpdi(breakpoints1, cred_mass)

sns.distplot(breakpoints1, hist = False, kde = True,
            kde_kws={"lw": 2})

frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
plot_height = frame.get_ylim()[1]

plt.title("Three-regime speed threshold 1")
plt.plot(HDI, [0, 0], linewidth=6, color='r') 
plt.text(HDI[1],
            plot_height * 0.07,
            HDI[1].round(2),
           
            horizontalalignment="center",
        )
plt.text(HDI[0],
            plot_height * 0.07,
            HDI[0].round(2),
           
            horizontalalignment="center",
        )
plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.3,
            '95%' + " HPD",
            
            horizontalalignment="center",
        )

plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.8,
            'Mean = %.2f' % MEAN,
            
            horizontalalignment="center",
        )
frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
    
# plt.xlim(0, 65)
# plt.ylim(-0.01, 1.75)

# ##########################@@@@ Three-regime model @@@@@@###########################################################
plt.subplot(423)
cred_mass = 0.95
breakpoints2 = ThreeRegime_predi(samples_threeRegime['switch_point1'],
                                   samples_threeRegime['switch_point2'],
                                   samples_threeRegime['α1'],
                                   samples_threeRegime['α2'],
                                   samples_threeRegime['α3'],
                                   samples_threeRegime['β1'],
                                   samples_threeRegime['β2'],
                                   samples_threeRegime['β3'],
                                   samples_threeRegime['switch_point2'])



MEAN = breakpoints2.mean()
# plot_height = ax.get_ylim()[1]
# Computes “highest posterior density interval” (HPDI)
HDI = hpdi(breakpoints2, cred_mass)

sns.distplot(breakpoints2, hist = False, kde = True,
            kde_kws={"lw": 2})


frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
plot_height = frame.get_ylim()[1]

plt.title("Three-regime speed threshold 2")
plt.plot(HDI, [0, 0], linewidth=6, color='r') 
plt.text(HDI[1],
            plot_height * 0.07,
            HDI[1].round(2),
           
            horizontalalignment="center",
        )
plt.text(HDI[0],
            plot_height * 0.07,
            HDI[0].round(2),
           
            horizontalalignment="center",
        )
plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.3,
            '95%' + " HPD",
            
            horizontalalignment="center",
        )

plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.8,
            'Mean = %.2f' % MEAN,
            
            horizontalalignment="center",
        )
frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
    
# plt.xlim(0, 65)
# plt.ylim(-0.01, 1.75)

# ##########################@@@@ Modified Greenberg model@@@@@@###########################################################
plt.subplot(424)
cred_mass = 0.95
breakpoints = ModifiedGreenberg_predi(ModifiedGreenbergsamples['switch_point1'],
                                         ModifiedGreenbergsamples['α1'],
                                         ModifiedGreenbergsamples['β1'],
                                         ModifiedGreenbergsamples['β2'],
                                         ModifiedGreenbergsamples['switch_point1'])



MEAN = breakpoints.mean()
# plot_height = ax.get_ylim()[1]
# Computes “highest posterior density interval” (HPDI)
HDI = hpdi(breakpoints, cred_mass)

sns.distplot(breakpoints, hist = False, kde = True,
            kde_kws={"lw": 2})

frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
plot_height = frame.get_ylim()[1]

plt.title("Modified Greenberg model")
plt.plot(HDI, [0, 0], linewidth=6, color='r') 
plt.text(HDI[1],
            plot_height * 0.07,
            HDI[1].round(2),
           
            horizontalalignment="center",
        )
plt.text(HDI[0],
            plot_height * 0.07,
            HDI[0].round(2),
           
            horizontalalignment="center",
        )
plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.3,
            '95%' + " HPD",
            
            horizontalalignment="center",
        )

plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.8,
            'Mean = %.2f' % MEAN,
            
            horizontalalignment="center",
        )
frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
    
# plt.xlim(0, 65)
# plt.ylim(-0.01, 1.85)


# ##########################@@@@ Eddies model@@@@@@###########################################################
plt.subplot(425)
breakpoints = Eddiesmodel_predi(Eddies_samples['switch_point1'],
                                         Eddies_samples['α1'],
                                         Eddies_samples['theta'],
                                         Eddies_samples['β1'],
                                         Eddies_samples['β2'],
                                         Eddies_samples['switch_point1'])


MEAN = breakpoints.mean()
# plot_height = ax.get_ylim()[1]
# Computes “highest posterior density interval” (HPDI)
HDI = hpdi(breakpoints, cred_mass)

sns.distplot(breakpoints, hist = False, kde = True,
            kde_kws={"lw": 2})

frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
plot_height = frame.get_ylim()[1]

plt.plot(HDI, [0, 0], linewidth=6, color='r') 
plt.text(HDI[1],
            plot_height * 0.07,
            HDI[1].round(2),
           
            horizontalalignment="center",
        )
plt.text(HDI[0],
            plot_height * 0.07,
            HDI[0].round(2),
           
            horizontalalignment="center",
        )
plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.3,
            '95%' + " HPD",
            
            horizontalalignment="center",
        )

plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.8,
            'Mean = %.2f' % MEAN,
            
            horizontalalignment="center",
        )
frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
    
# plt.xlim(0, 65)
#plt.ylim(-0.01, 1.75)
plt.title("Edie model")


# ##########################@@@@ Constant-Exponential model@@@@@@###########################################################
plt.subplot(426)
breakpoints = ConstantExponential_predi(ConstantExponentials['switch_point1'],
                                         ConstantExponentials['α1'],
                                         ConstantExponentials['theta'],
                                         ConstantExponentials['β1'],
                                         ConstantExponentials['switch_point1'])


MEAN = breakpoints.mean()
plot_height = ax.get_ylim()[1]
# Computes “highest posterior density interval” (HPDI)
HDI = hpdi(breakpoints, cred_mass)

sns.distplot(breakpoints, hist = False, kde = True,
            kde_kws={"lw": 2})

frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
plot_height = frame.get_ylim()[1]

plt.plot(HDI, [0, 0], linewidth=6, color='r') 
plt.text(HDI[1],
            plot_height * 0.07,
            HDI[1].round(2),
           
            horizontalalignment="center",
        )
plt.text(HDI[0],
            plot_height * 0.07,
            HDI[0].round(2),
           
            horizontalalignment="center",
        )
plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.3,
            '95%' + " HPDI",
            
            horizontalalignment="center",
        )

plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.8,
            'Mean = %.2f' % MEAN,
            
            horizontalalignment="center",
        )
frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
    
# plt.xlim(0, 65)
#plt.ylim(-0.01, 1.75)
plt.title("Constant-Exponential model")

# ##########################@@@@ Linear-Exponential model@@@@@@###########################################################
plt.subplot(427)
breakpoints = LinearExponential_predi(LinearExponentialS['switch_point1'],
                                         LinearExponentialS['α1'],
                                         LinearExponentialS['theta'],
                                         LinearExponentialS['β1'],
                                         LinearExponentialS['β2'],
                                         LinearExponentialS['switch_point1'])


MEAN = breakpoints.mean()
# plot_height = ax.get_ylim()[1]
# Computes “highest posterior density interval” (HPDI)
HDI = hpdi(breakpoints, cred_mass)

sns.distplot(breakpoints, hist = False, kde = True,
            kde_kws={"lw": 2})

frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
plot_height = frame.get_ylim()[1]

plt.xlabel("Traffic speed (mph)")
plt.plot(HDI, [0, 0], linewidth=6, color='r') 
plt.text(HDI[1],
            plot_height * 0.07,
            HDI[1].round(2),
           
            horizontalalignment="center",
        )
plt.text(HDI[0],
            plot_height * 0.07,
            HDI[0].round(2),
           
            horizontalalignment="center",
        )
plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.3,
            '95%' + " HPD",
            
            horizontalalignment="center",
        )

plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.8,
            'Mean = %.2f' % MEAN,
            
            horizontalalignment="center",
        )
frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
    
# plt.xlim(0, 65)
#plt.ylim(-0.01, 1.75)
plt.title("Linear-Exponential model")


# ##########################@@@@ Linear-Logarithmic model@@@@@@###########################################################
plt.subplot(428)
breakpoints = Linearlogarithm_predi(Linearlogarithmsamples['switch_point1'],
                                        Linearlogarithmsamples['α1'],
                                        Linearlogarithmsamples['α2'],
                                        Linearlogarithmsamples['β1'],
                                        Linearlogarithmsamples['β2'],
                                        Linearlogarithmsamples['switch_point1'])


MEAN = breakpoints.mean()

# Computes “highest posterior density interval” (HPDI)
HDI = hpdi(breakpoints, cred_mass)

sns.distplot(breakpoints, hist = False, kde = True,
            kde_kws={"lw": 2})

# plot_height = ax.get_ylim()[1]
frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
plot_height = frame.get_ylim()[1]

plt.xlabel("Traffic speed (mph)")
plt.plot(HDI, [0, 0], linewidth=8, color='r') 
plt.text(HDI[1],
            plot_height * 0.07,
            HDI[1].round(2),
           
            horizontalalignment="center",
        )
plt.text(HDI[0],
            plot_height * 0.07,
            HDI[0].round(2),
           
            horizontalalignment="center",
        )
plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.3,
            '95%' + " HPD",
            
            horizontalalignment="center",
        )

plt.text(
            (HDI[0] + HDI[1]) / 2,
            plot_height * 0.8,
            'Mean = %.2f' % MEAN,
            
            horizontalalignment="center",
        )
frame = plt.gca()
frame.axes.get_yaxis().set_ticks([])
    
# plt.xlim(0, 65)

plt.title("Linear-Logarithmic model")

plt.tight_layout()

Prediction Accuracy¶

import scipy

def rsquared(x, y):
    """ Return R^2 where x and y are array-like."""

    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
    return r_value**2

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

sns.set_context("notebook", font_scale= 1.7)

f, ax = plt.subplots(figsize=(16, 16))

plt.subplot(331)

posterior_mu = TwoRegime_predi(samples_twoRegime['switch_point'].mean(),
                                         samples_twoRegime['α1'].mean(),
                                         samples_twoRegime['α2'].mean(),
                                         samples_twoRegime['β1'].mean(),
                                         samples_twoRegime['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})



R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'r'},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2,fontsize=13);

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("Observed traffic speed $(mph)$", fontsize=18)
plt.title("Two-regime model", fontsize=18)
plt.xlabel('')

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)
Accuracy = pd.DataFrame({'Metrics':['Mean squared error',
                                            'Mean absolute error',
                                    'Coefficient of determination',
                                    'Pearson correlation']})
Accuracy['TwoRegime']= [mean_squarederror, meanabsolut, R2, r2]

#################################ThreeRegime###########################################
plt.subplot(332)

posterior_mu = ThreeRegime_predi(samples_threeRegime['switch_point1'].mean(),
                                 samples_threeRegime['switch_point2'].mean(),
                                         samples_threeRegime['α1'].mean(),
                                         samples_threeRegime['α2'].mean(),
                                         samples_threeRegime['α3'].mean(),
                                         samples_threeRegime['β1'].mean(),
                                         samples_threeRegime['β2'].mean(),
                                         samples_threeRegime['β3'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

sns.set_context("notebook", font_scale= 1.7)

R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'b'},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2,fontsize=13);

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("", fontsize=18)
plt.title("Three-regime model", fontsize=18)

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['ThreeRegime'] = [mean_squarederror, meanabsolut, R2, r2]


#################################ModifiedGreenberg###########################################
plt.subplot(333)

posterior_mu = ModifiedGreenberg_predi(ModifiedGreenbergsamples['switch_point1'].mean(),
                                         ModifiedGreenbergsamples['α1'].mean(),
                                         ModifiedGreenbergsamples['β1'].mean(),
                                         ModifiedGreenbergsamples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

sns.set_context("notebook", font_scale= 1.7)

R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'m'},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2,fontsize=13);

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("", fontsize=18)
plt.title("Modified Greenberg model", fontsize=18)

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['ModifiedGreenberg'] = [mean_squarederror, meanabsolut, R2, r2]

#################################Eddiesmodel###########################################
plt.subplot(334)

posterior_mu = Eddiesmodel_predi(Eddies_samples['switch_point1'].mean(),
                                         Eddies_samples['α1'].mean(),
                                         Eddies_samples['theta'].mean(),
                                         Eddies_samples['β1'].mean(),
                                         Eddies_samples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

sns.set_context("notebook", font_scale= 1.7)


R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'m'},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2,fontsize=13);

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("Observed traffic speed $(mph)$", fontsize=18)
plt.xlabel("", fontsize=18)
plt.title("Eddies model", fontsize=18)

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['Eddiesmodel'] = [mean_squarederror, meanabsolut, R2, r2]

#################################ConstantExponential###########################################
plt.subplot(335)

posterior_mu = ConstantExponential_predi(ConstantExponentials['switch_point1'].mean(),
                                         ConstantExponentials['α1'].mean(),
                                         ConstantExponentials['theta'].mean(),
                                         ConstantExponentials['β1'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

sns.set_context("notebook", font_scale= 1.7)


R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'k', 'alpha':1},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'chocolate', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2,fontsize=13);

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("Predicted traffic speed $(mph)$", fontsize=18)
plt.title("Constant-Exponential model", fontsize=18)

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['ConstantExponential'] = [mean_squarederror, meanabsolut, R2, r2]

#################################LinearExponential###########################################
plt.subplot(336)

posterior_mu = LinearExponential_predi(LinearExponentialS['switch_point1'].mean(),
                                         LinearExponentialS['α1'].mean(),
                                         LinearExponentialS['theta'].mean(),
                                         LinearExponentialS['β1'].mean(),
                                         LinearExponentialS['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

sns.set_context("notebook", font_scale= 1.7)

R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'g', 'alpha':1},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2,fontsize=13);

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("Predicted traffic speed $(mph)$", fontsize=18)
plt.title("Linear-Exponential model", fontsize=18)

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['LinearExponential'] = [mean_squarederror, meanabsolut, R2, r2]

#################################Linearlogarithm###########################################
plt.subplot(337)

posterior_mu = Linearlogarithm_predi(Linearlogarithmsamples['switch_point1'].mean(),
                                        Linearlogarithmsamples['α1'].mean(),
                                        Linearlogarithmsamples['α2'].mean(),
                                        Linearlogarithmsamples['β1'].mean(),
                                        Linearlogarithmsamples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

sns.set_context("notebook", font_scale= 1.7)

R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'grey', 'alpha':1},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=4,fontsize=13);
plt.title("Linear-Logarithm model", fontsize=18)
plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("Predicted traffic speed $(mph)$", fontsize=18)
plt.ylabel("Observed traffic speed $(mph)$", fontsize=18)

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['Linearlogarithm'] = [mean_squarederror, meanabsolut, R2, r2]

Accuracy

sns.set_context("notebook", font_scale= 2.2)

f, ax = plt.subplots(figsize=(30, 30))

plt.subplot(441)

posterior_mu = TwoRegime_predi(samples_twoRegime['switch_point'].mean(),
                                         samples_twoRegime['α1'].mean(),
                                         samples_twoRegime['α2'].mean(),
                                         samples_twoRegime['β1'].mean(),
                                         samples_twoRegime['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})



R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'r'},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2#,fontsize=13
          );

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("Observed traffic speed $(mph)$"#, fontsize=18
          )
plt.title("Two-regime model"#, fontsize=18
         )
plt.xlabel('')

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)
Accuracy = pd.DataFrame({'Metrics':['Mean squared error',
                                            'Mean absolute error',
                                    'Coefficient of determination',
                                    'Pearson correlation']})
Accuracy['TwoRegime']= [mean_squarederror, meanabsolut, R2, r2]

#################################ThreeRegime###########################################
plt.subplot(442)

posterior_mu = ThreeRegime_predi(samples_threeRegime['switch_point1'].mean(),
                                 samples_threeRegime['switch_point2'].mean(),
                                         samples_threeRegime['α1'].mean(),
                                         samples_threeRegime['α2'].mean(),
                                         samples_threeRegime['α3'].mean(),
                                         samples_threeRegime['β1'].mean(),
                                         samples_threeRegime['β2'].mean(),
                                         samples_threeRegime['β3'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})


R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'b'},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2#,fontsize=13
          );

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("", fontsize=18)
plt.title("Three-regime model"#, fontsize=18
         )

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['ThreeRegime'] = [mean_squarederror, meanabsolut, R2, r2]


#################################ModifiedGreenberg###########################################
plt.subplot(443)

posterior_mu = ModifiedGreenberg_predi(ModifiedGreenbergsamples['switch_point1'].mean(),
                                         ModifiedGreenbergsamples['α1'].mean(),
                                         ModifiedGreenbergsamples['β1'].mean(),
                                         ModifiedGreenbergsamples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})


R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'m'},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2#,fontsize=13
          );

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("", fontsize=18)
plt.title("Modified Greenberg model"#, fontsize=18
         )

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['ModifiedGreenberg'] = [mean_squarederror, meanabsolut, R2, r2]

#################################Eddiesmodel###########################################
plt.subplot(444)

posterior_mu = Eddiesmodel_predi(Eddies_samples['switch_point1'].mean(),
                                         Eddies_samples['α1'].mean(),
                                         Eddies_samples['theta'].mean(),
                                         Eddies_samples['β1'].mean(),
                                         Eddies_samples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})


R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'tan'},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2#,fontsize=13
          );

plt.xlim(0, 80)
plt.ylim(0, 80)
#plt.ylabel("Observed traffic speed $(mph)$", fontsize=18)
plt.xlabel("Predicted traffic speed $(mph)$"#, fontsize=18
          )

plt.title("Eddies model"#, fontsize=18
         )

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['Eddiesmodel'] = [mean_squarederror, meanabsolut, R2, r2]

#################################ConstantExponential###########################################
plt.subplot(445)

posterior_mu = ConstantExponential_predi(ConstantExponentials['switch_point1'].mean(),
                                         ConstantExponentials['α1'].mean(),
                                         ConstantExponentials['theta'].mean(),
                                         ConstantExponentials['β1'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})



R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'k', 'alpha':1},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'chocolate', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2#,fontsize=13
          );

plt.xlim(0, 80)
plt.ylim(0, 80)
#plt.ylabel("", fontsize=18)
plt.xlabel("Predicted traffic speed $(mph)$"#, fontsize=18
          )
plt.ylabel("Observed traffic speed $(mph)$"#, fontsize=18
          )
plt.title("Constant-Exponential model"#, fontsize=18
         )

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['ConstantExponential'] = [mean_squarederror, meanabsolut, R2, r2]

#################################LinearExponential###########################################
plt.subplot(446)

posterior_mu = LinearExponential_predi(LinearExponentialS['switch_point1'].mean(),
                                         LinearExponentialS['α1'].mean(),
                                         LinearExponentialS['theta'].mean(),
                                         LinearExponentialS['β1'].mean(),
                                         LinearExponentialS['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})


R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'g', 'alpha':1},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=2#,fontsize=13
          );

plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("Predicted traffic speed $(mph)$"#, fontsize=18
          )
plt.title("Linear-Exponential model"#, fontsize=18
         )

mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['LinearExponential'] = [mean_squarederror, meanabsolut, R2, r2]

#################################Linearlogarithm###########################################
plt.subplot(447)

posterior_mu = Linearlogarithm_predi(Linearlogarithmsamples['switch_point1'].mean(),
                                        Linearlogarithmsamples['α1'].mean(),
                                        Linearlogarithmsamples['α2'].mean(),
                                        Linearlogarithmsamples['β1'].mean(),
                                        Linearlogarithmsamples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})


R2 = r2_score(Pred.pred,
        Pred.Observed).round(2)

r2 = rsquared(Pred.pred,
        Pred.Observed).round(2)


sns.regplot(x="pred", y="Observed", data=Pred, 
           scatter_kws={'color':'grey', 'alpha':1},
           line_kws={"lw":4,
                     'ls':'-',
                     'color':'k', 'label':'Pearson correlation: {} '.format(r2)});
plt.legend(loc=4#,fontsize=13
          );
plt.title("Linear-Logarithm model"#, fontsize=18
         )
plt.xlim(0, 80)
plt.ylim(0, 80)
plt.ylabel("", fontsize=18)
plt.xlabel("Predicted traffic speed $(mph)$"#, fontsize=18
          )
# plt.ylabel("Observed traffic speed $(mph)$"#, fontsize=18
#           )
plt.tight_layout()
mean_squarederror = mean_squared_error(posterior_mu, speed).round(2)
meanabsolut = mean_absolute_error(posterior_mu, speed).round(2)

Accuracy['Linearlogarithm'] = [mean_squarederror, meanabsolut, R2, r2]

dashList = [(5,2),(2,5),(4,10),(3,3,2,2),(5,2,20,2)]

Model Accuracy¶

sns.set_context("notebook", font_scale= 1.7)

f, ax = plt.subplots(figsize=(10, 7))

# plt.subplot(441)

posterior_mu = TwoRegime_predi(samples_twoRegime['switch_point'].mean(),
                                         samples_twoRegime['α1'].mean(),
                                         samples_twoRegime['α2'].mean(),
                                         samples_twoRegime['β1'].mean(),
                                         samples_twoRegime['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

Pred['oupany_ran'] = [0 if 0 <= i <= 20 else
                      1 if 20 < i <= 30 else
                      2 if 30 < i <= 40 else
                      3 if 40 < i <= 50 else
                      4 if 50 < i <= 60 else
                      5 if 60 < i <= 70 else
                      6 for i in Pred.Observed
                     ]
Pred['Label'] = ['0-20' if 0 <= i <= 20 else
                 '20-30' if 20 < i <= 30 else
                 '30-40' if 30 < i <= 40 else
                 '40-50'  if 40 < i <= 50 else
                 '50-60' if 50 < i <= 60 else
                 '60-70' if 60 < i <= 70 else
                 '> 70'  for i in Pred.Observed
                     ]


cols = ['Mean_squared_error',
 'Mean_absolute_error',
 'Coefficient_of_determination',
 'Pearson_correlation',
 'Number_of_observations (n) ']
Mod_a = []
for i in arange(7):
    dxxf = Pred[Pred.oupany_ran==i]
    observ = len(dxxf)
    #print(i)
    R2 = r2_score(dxxf.pred,
        dxxf.Observed).round(2)
    r2 = rsquared(dxxf.pred,
        dxxf.Observed).round(2)
    mean_squarederror = mean_squared_error(dxxf.pred, dxxf.Observed).round(2)
    meanabsolut = mean_absolute_error(dxxf.pred, dxxf.Observed).round(2)
    #print(mean_squarederror, meanabsolut, R2, r2, observ)
    Mod_a.append([mean_squarederror, meanabsolut, R2, r2, observ])

Accuracy = pd.DataFrame({'Metrics':['0-20',
                                     '20-30',
                                     '30-40',
                                     '40-50',
                                     '50-60',
                                     '60-70',
                                     '> 70']})    
    
TwoRegime_Accuracy = pd.concat([Accuracy,pd.DataFrame(Mod_a, columns=cols)], axis=1)

#################################ThreeRegime###########################################

posterior_mu = ThreeRegime_predi(samples_threeRegime['switch_point1'].mean(),
                                 samples_threeRegime['switch_point2'].mean(),
                                         samples_threeRegime['α1'].mean(),
                                         samples_threeRegime['α2'].mean(),
                                         samples_threeRegime['α3'].mean(),
                                         samples_threeRegime['β1'].mean(),
                                         samples_threeRegime['β2'].mean(),
                                         samples_threeRegime['β3'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

Pred['oupany_ran'] = [0 if 0 <= i <= 20 else
                      1 if 20 < i <= 30 else
                      2 if 30 < i <= 40 else
                      3 if 40 < i <= 50 else
                      4 if 50 < i <= 60 else
                      5 if 60 < i <= 70 else
                      6 for i in Pred.Observed
                     ]
Pred['Label'] = ['0-20' if 0 <= i <= 20 else
                 '20-30' if 20 < i <= 30 else
                 '30-40' if 30 < i <= 40 else
                 '40-50'  if 40 < i <= 50 else
                 '50-60' if 50 < i <= 60 else
                 '60-70' if 60 < i <= 70 else
                 '> 70'  for i in Pred.Observed
                     ]


cols = ['Mean_squared_error',
 'Mean_absolute_error',
 'Coefficient_of_determination',
 'Pearson_correlation',
 'Number_of_observations (n) ']
Mod_a = []
for i in arange(7):
    dxxf = Pred[Pred.oupany_ran==i]
    observ = len(dxxf)
    #print(i)
    R2 = r2_score(dxxf.pred,
        dxxf.Observed).round(2)
    r2 = rsquared(dxxf.pred,
        dxxf.Observed).round(2)
    mean_squarederror = mean_squared_error(dxxf.pred, dxxf.Observed).round(2)
    meanabsolut = mean_absolute_error(dxxf.pred, dxxf.Observed).round(2)
    #print(mean_squarederror, meanabsolut, R2, r2, observ)
    Mod_a.append([mean_squarederror, meanabsolut, R2, r2, observ])

Accuracy = pd.DataFrame({'Metrics':['0-20',
                                     '20-30',
                                     '30-40',
                                     '40-50',
                                     '50-60',
                                     '60-70',
                                     '> 70']})    
    
ThreeRegime_Accuracy = pd.concat([Accuracy,pd.DataFrame(Mod_a, columns=cols)], axis=1)


#################################ModifiedGreenberg###########################################

posterior_mu = ModifiedGreenberg_predi(ModifiedGreenbergsamples['switch_point1'].mean(),
                                         ModifiedGreenbergsamples['α1'].mean(),
                                         ModifiedGreenbergsamples['β1'].mean(),
                                         ModifiedGreenbergsamples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})

Pred['oupany_ran'] = [0 if 0 <= i <= 20 else
                      1 if 20 < i <= 30 else
                      2 if 30 < i <= 40 else
                      3 if 40 < i <= 50 else
                      4 if 50 < i <= 60 else
                      5 if 60 < i <= 70 else
                      6 for i in Pred.Observed
                     ]
Pred['Label'] = ['0-20' if 0 <= i <= 20 else
                 '20-30' if 20 < i <= 30 else
                 '30-40' if 30 < i <= 40 else
                 '40-50'  if 40 < i <= 50 else
                 '50-60' if 50 < i <= 60 else
                 '60-70' if 60 < i <= 70 else
                 '> 70'  for i in Pred.Observed
                     ]


cols = ['Mean_squared_error',
 'Mean_absolute_error',
 'Coefficient_of_determination',
 'Pearson_correlation',
 'Number_of_observations (n) ']
Mod_a = []
for i in arange(7):
    dxxf = Pred[Pred.oupany_ran==i]
    observ = len(dxxf)
    #print(i)
    R2 = r2_score(dxxf.pred,
        dxxf.Observed).round(2)
    r2 = rsquared(dxxf.pred,
        dxxf.Observed).round(2)
    mean_squarederror = mean_squared_error(dxxf.pred, dxxf.Observed).round(2)
    meanabsolut = mean_absolute_error(dxxf.pred, dxxf.Observed).round(2)
    #print(mean_squarederror, meanabsolut, R2, r2, observ)
    Mod_a.append([mean_squarederror, meanabsolut, R2, r2, observ])

Accuracy = pd.DataFrame({'Metrics':['0-20',
                                     '20-30',
                                     '30-40',
                                     '40-50',
                                     '50-60',
                                     '60-70',
                                     '> 70']})    
    
ModifiedGreenberg_Accuracy = pd.concat([Accuracy,pd.DataFrame(Mod_a, columns=cols)], axis=1)

#################################Eddiesmodel###########################################

posterior_mu = Eddiesmodel_predi(Eddies_samples['switch_point1'].mean(),
                                         Eddies_samples['α1'].mean(),
                                         Eddies_samples['theta'].mean(),
                                         Eddies_samples['β1'].mean(),
                                         Eddies_samples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})
Pred['oupany_ran'] = [0 if 0 <= i <= 20 else
                      1 if 20 < i <= 30 else
                      2 if 30 < i <= 40 else
                      3 if 40 < i <= 50 else
                      4 if 50 < i <= 60 else
                      5 if 60 < i <= 70 else
                      6 for i in Pred.Observed
                     ]
Pred['Label'] = ['0-20' if 0 <= i <= 20 else
                 '20-30' if 20 < i <= 30 else
                 '30-40' if 30 < i <= 40 else
                 '40-50'  if 40 < i <= 50 else
                 '50-60' if 50 < i <= 60 else
                 '60-70' if 60 < i <= 70 else
                 '> 70'  for i in Pred.Observed
                     ]


cols = ['Mean_squared_error',
 'Mean_absolute_error',
 'Coefficient_of_determination',
 'Pearson_correlation',
 'Number_of_observations (n) ']
Mod_a = []
for i in arange(7):
    dxxf = Pred[Pred.oupany_ran==i]
    observ = len(dxxf)
    #print(i)
    R2 = r2_score(dxxf.pred,
        dxxf.Observed).round(2)
    r2 = rsquared(dxxf.pred,
        dxxf.Observed).round(2)
    mean_squarederror = mean_squared_error(dxxf.pred, dxxf.Observed).round(2)
    meanabsolut = mean_absolute_error(dxxf.pred, dxxf.Observed).round(2)
    #print(mean_squarederror, meanabsolut, R2, r2, observ)
    Mod_a.append([mean_squarederror, meanabsolut, R2, r2, observ])

Accuracy = pd.DataFrame({'Metrics':['0-20',
                                     '20-30',
                                     '30-40',
                                     '40-50',
                                     '50-60',
                                     '60-70',
                                     '> 70']})    
    
Eddiesmodel_Accuracy = pd.concat([Accuracy,pd.DataFrame(Mod_a, columns=cols)], axis=1)


#################################ConstantExponential###########################################
posterior_mu = ConstantExponential_predi(ConstantExponentials['switch_point1'].mean(),
                                         ConstantExponentials['α1'].mean(),
                                         ConstantExponentials['theta'].mean(),
                                         ConstantExponentials['β1'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})
Pred['oupany_ran'] = [0 if 0 <= i <= 20 else
                      1 if 20 < i <= 30 else
                      2 if 30 < i <= 40 else
                      3 if 40 < i <= 50 else
                      4 if 50 < i <= 60 else
                      5 if 60 < i <= 70 else
                      6 for i in Pred.Observed
                     ]
Pred['Label'] = ['0-20' if 0 <= i <= 20 else
                 '20-30' if 20 < i <= 30 else
                 '30-40' if 30 < i <= 40 else
                 '40-50'  if 40 < i <= 50 else
                 '50-60' if 50 < i <= 60 else
                 '60-70' if 60 < i <= 70 else
                 '> 70'  for i in Pred.Observed
                     ]


cols = ['Mean_squared_error',
 'Mean_absolute_error',
 'Coefficient_of_determination',
 'Pearson_correlation',
 'Number_of_observations (n) ']
Mod_a = []
for i in arange(7):
    dxxf = Pred[Pred.oupany_ran==i]
    observ = len(dxxf)
    #print(i)
    R2 = r2_score(dxxf.pred,
        dxxf.Observed).round(2)
    r2 = rsquared(dxxf.pred,
        dxxf.Observed).round(2)
    mean_squarederror = mean_squared_error(dxxf.pred, dxxf.Observed).round(2)
    meanabsolut = mean_absolute_error(dxxf.pred, dxxf.Observed).round(2)
    #print(mean_squarederror, meanabsolut, R2, r2, observ)
    Mod_a.append([mean_squarederror, meanabsolut, R2, r2, observ])

Accuracy = pd.DataFrame({'Metrics':['0-20',
                                     '20-30',
                                     '30-40',
                                     '40-50',
                                     '50-60',
                                     '60-70',
                                     '> 70']})    
    
ConstantExponential_Accuracy = pd.concat([Accuracy,pd.DataFrame(Mod_a, columns=cols)], axis=1)


#################################LinearExponential###########################################

posterior_mu = LinearExponential_predi(LinearExponentialS['switch_point1'].mean(),
                                         LinearExponentialS['α1'].mean(),
                                         LinearExponentialS['theta'].mean(),
                                         LinearExponentialS['β1'].mean(),
                                         LinearExponentialS['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})
Pred['oupany_ran'] = [0 if 0 <= i <= 20 else
                      1 if 20 < i <= 30 else
                      2 if 30 < i <= 40 else
                      3 if 40 < i <= 50 else
                      4 if 50 < i <= 60 else
                      5 if 60 < i <= 70 else
                      6 for i in Pred.Observed
                     ]
Pred['Label'] = ['0-20' if 0 <= i <= 20 else
                 '20-30' if 20 < i <= 30 else
                 '30-40' if 30 < i <= 40 else
                 '40-50'  if 40 < i <= 50 else
                 '50-60' if 50 < i <= 60 else
                 '60-70' if 60 < i <= 70 else
                 '> 70'  for i in Pred.Observed
                     ]


cols = ['Mean_squared_error',
 'Mean_absolute_error',
 'Coefficient_of_determination',
 'Pearson_correlation',
 'Number_of_observations (n) ']
Mod_a = []
for i in arange(7):
    dxxf = Pred[Pred.oupany_ran==i]
    observ = len(dxxf)
    #print(i)
    R2 = r2_score(dxxf.pred,
        dxxf.Observed).round(2)
    r2 = rsquared(dxxf.pred,
        dxxf.Observed).round(2)
    mean_squarederror = mean_squared_error(dxxf.pred, dxxf.Observed).round(2)
    meanabsolut = mean_absolute_error(dxxf.pred, dxxf.Observed).round(2)
    #print(mean_squarederror, meanabsolut, R2, r2, observ)
    Mod_a.append([mean_squarederror, meanabsolut, R2, r2, observ])

Accuracy = pd.DataFrame({'Metrics':['0-20',
                                     '20-30',
                                     '30-40',
                                     '40-50',
                                     '50-60',
                                     '60-70',
                                     '> 70']})    
    
LinearExponential_Accuracy = pd.concat([Accuracy,pd.DataFrame(Mod_a, columns=cols)], axis=1)



#################################Linearlogarithm###########################################
posterior_mu = Linearlogarithm_predi(Linearlogarithmsamples['switch_point1'].mean(),
                                        Linearlogarithmsamples['α1'].mean(),
                                        Linearlogarithmsamples['α2'].mean(),
                                        Linearlogarithmsamples['β1'].mean(),
                                        Linearlogarithmsamples['β2'].mean(),
                                         occupancy.values)

Pred = pd.DataFrame({'pred':posterior_mu, 'Observed':speed})
Pred['oupany_ran'] = [0 if 0 <= i <= 20 else
                      1 if 20 < i <= 30 else
                      2 if 30 < i <= 40 else
                      3 if 40 < i <= 50 else
                      4 if 50 < i <= 60 else
                      5 if 60 < i <= 70 else
                      6 for i in Pred.Observed
                     ]
Pred['Label'] = ['0-20' if 0 <= i <= 20 else
                 '20-30' if 20 < i <= 30 else
                 '30-40' if 30 < i <= 40 else
                 '40-50'  if 40 < i <= 50 else
                 '50-60' if 50 < i <= 60 else
                 '60-70' if 60 < i <= 70 else
                 '> 70'  for i in Pred.Observed
                     ]


cols = ['Mean_squared_error',
 'Mean_absolute_error',
 'Coefficient_of_determination',
 'Pearson_correlation',
 'Number_of_observations (n) ']
Mod_a = []
for i in arange(7):
    dxxf = Pred[Pred.oupany_ran==i]
    observ = len(dxxf)
    #print(i)
    R2 = r2_score(dxxf.pred,
        dxxf.Observed).round(2)
    r2 = rsquared(dxxf.pred,
        dxxf.Observed).round(2)
    mean_squarederror = mean_squared_error(dxxf.pred, dxxf.Observed).round(2)
    meanabsolut = mean_absolute_error(dxxf.pred, dxxf.Observed).round(2)
    #print(mean_squarederror, meanabsolut, R2, r2, observ)
    Mod_a.append([mean_squarederror, meanabsolut, R2, r2, observ])

Accuracy = pd.DataFrame({'Metrics':['0-20',
                                     '20-30',
                                     '30-40',
                                     '40-50',
                                     '50-60',
                                     '60-70',
                                     '> 70']})    
    
Linearlogarithm_Accuracy = pd.concat([Accuracy,pd.DataFrame(Mod_a, columns=cols)], axis=1)

########################################################################################################
plt.plot(TwoRegime_Accuracy.Metrics, TwoRegime_Accuracy.Mean_absolute_error, 
         label='Two-regime', linewidth=2, linestyle =':',marker='*')
plt.plot(ThreeRegime_Accuracy.Metrics, ThreeRegime_Accuracy.Mean_absolute_error,
         color='r',label='Three Regime', linewidth=2, dashes=dashList[1],marker='s')
plt.plot(ModifiedGreenberg_Accuracy.Metrics, ModifiedGreenberg_Accuracy.Mean_absolute_error,
         color='k',label='Modified Greenberg', linewidth=2, linestyle='-.',marker='+')
plt.plot(Eddiesmodel_Accuracy.Metrics, Eddiesmodel_Accuracy.Mean_absolute_error,
         label='Eddies model', linewidth=2, dashes=dashList[3],marker='o',)
# plt.plot(ConstantExponential_Accuracy.Metrics, ConstantExponential_Accuracy.Mean_squared_error, 
#          label='Constant-Exponential')
plt.plot(LinearExponential_Accuracy.Metrics, LinearExponential_Accuracy.Mean_absolute_error,
         label='Linear-Exponential',marker='v',)
# plt.plot(Linearlogarithm_Accuracy.Metrics, Linearlogarithm_Accuracy.Mean_squared_error, 
#          color='k', label='Linear-logarithm', linewidth=2, dashes=dashList[4],)
plt.legend(loc=2, fontsize=14)

plt.xlabel("Traffic speed $(mph)$"#, fontsize=18
          )
plt.ylabel("Mean squared error"#, fontsize=18
          )
plt.legend(loc=1, fontsize=12)

<matplotlib.legend.Legend at 0x7ff3babcfc18>

#plt.style.use("seaborn-dark") 
plt.style.use("seaborn-darkgrid")

f, ax = plt.subplots(figsize=(10, 7))
plt.plot(TwoRegime_Accuracy.Metrics, TwoRegime_Accuracy.Mean_squared_error, 
         label='Two-regime', linewidth=2, linestyle =':',marker='*')
plt.plot(ThreeRegime_Accuracy.Metrics, ThreeRegime_Accuracy.Mean_squared_error,
         color='r',label='Three Regime', linewidth=2, dashes=dashList[1],marker='s')
plt.plot(ModifiedGreenberg_Accuracy.Metrics, ModifiedGreenberg_Accuracy.Mean_squared_error,
         color='k',label='Modified Greenberg', linewidth=2, linestyle='-.',marker='+')
plt.plot(Eddiesmodel_Accuracy.Metrics, Eddiesmodel_Accuracy.Mean_squared_error,
         label='Eddies model', linewidth=2, dashes=dashList[3],marker='o',)
# plt.plot(ConstantExponential_Accuracy.Metrics, ConstantExponential_Accuracy.Mean_squared_error, 
#          label='Constant-Exponential')
plt.plot(LinearExponential_Accuracy.Metrics, LinearExponential_Accuracy.Mean_squared_error,
         label='Linear-Exponential',marker='v',)
# plt.plot(Linearlogarithm_Accuracy.Metrics, Linearlogarithm_Accuracy.Mean_squared_error, 
#          color='k', label='Linear-logarithm', linewidth=2, dashes=dashList[4],)
plt.legend(loc=2, fontsize=14)

plt.xlabel("Traffic speed $(mph)$"#, fontsize=18
          )
plt.ylabel("Mean squared error"#, fontsize=18
          )
plt.legend(loc=1, fontsize=12)

<matplotlib.legend.Legend at 0x7ff3b9fdf128>

Information Criterion¶

def log_lk(rng, params, model, *args, **kwargs):
    model = substitute(seed(model, rng), params)
    model_trace = trace(model).get_trace(*args, **kwargs)
    obs_node = model_trace['obs']
    #print(obs_node)
    return obs_node['fn'].log_prob(obs_node['value']).sum()

def expected_log_likelihood(rng, params, model, *args, **kwargs):
    p_waic = list(params.values())[0].shape[0]
    log_lk_fn = vmap(lambda rng, params: log_lk(rng, params, model, *args, **kwargs))
    log_lk_vals = log_lk_fn(random.split(rng, p_waic), params)
    
    return log_lk_vals
      
def waic(log_like, return_lppd_p_waic=False):
    """Compute WAIC from a set of log-likelihoods."""
    lppd = np.sum(scipy.special.logsumexp(log_like, axis=0, b=1/log_like.shape[0]))
    p_waic = np.sum(np.var(log_like, axis=0))
    
    if return_lppd_p_waic:
        
        return onp.array(-2*(lppd - p_waic)).tolist(), onp.array(lppd).tolist(), onp.array(p_waic).tolist()
    
    else:
        return onp.array(-2*(lppd - p_waic)).tolist()  
    
def log_likelihood(rng, params, model, *args, **kwargs):
    n = list(params.values())[0].shape[0]
    log_lk_fn = vmap(lambda rng, params: log_lk(rng, params, model, *args, **kwargs))
    log_lk_vals = log_lk_fn(random.split(rng, n), params)
    return logsumexp(log_lk_vals) - np.log(n)

from jax import random, vmap
from numpyro.handlers import sample, seed, substitute, trace
from jax.scipy.special import logsumexp
import scipy.special

rng, rng_ = random.split(random.PRNGKey(1))
TwoRegime = expected_log_likelihood(rng_,
                           samples_twoRegime,
                           model_twoRegime,
                           occupancy.values, speed.values)

rng, rng_ = random.split(random.PRNGKey(5))             
ThreeRegime = expected_log_likelihood(rng_,
                           samples_threeRegime,
                           model_threeRegime,
                           occupancy.values, speed.values)

rng, rng_ = random.split(random.PRNGKey(10))
ConstantExponentiall = expected_log_likelihood(rng_,
                           ConstantExponentials,
                           ConstantExponential,
                           occupancy.values, speed.values)

rng, rng_ = random.split(random.PRNGKey(10))             
LinearExponentiall = expected_log_likelihood(rng_,
                           LinearExponentialS,
                           LinearExponential,
                           occupancy.values, speed.values)

rng, rng_ = random.split(random.PRNGKey(5))
ModifiedGreenberg = expected_log_likelihood(rng_,
                           ModifiedGreenbergsamples,
                           ModifiedGreenbergmodel,
                           occupancy.values, speed.values)

rng, rng_ = random.split(random.PRNGKey(2))
Eddiesmodell = expected_log_likelihood(rng_,
                           Eddies_samples,
                           Eddiesmodel,
                           occupancy.values, speed.values)

rng, rng_ = random.split(random.PRNGKey(4))
Linearlogarithmm = expected_log_likelihood(rng_,
                           Linearlogarithmsamples,
                           Linearlogarithm,
                           occupancy.values, speed.values)

log_likes = {'Two - Regime': TwoRegime,
             
             'Three - Regime':ThreeRegime,

             'Constant - Exponential': ConstantExponentiall,
             
             'Linear - Exponential': LinearExponentiall,
             
             'Modified Greenberg': ModifiedGreenberg,
             
             'Eddies model': Eddiesmodell,
             
             'Linear logarithm': Linearlogarithmm,
            }

waics = {key: onp.round(waic(val, return_lppd_p_waic=True), 1) for key, val in log_likes.items()}

model_compare = pd.DataFrame(waics.items(), columns=['Model','WAIC'])
outcompare = model_compare.merge(model_compare.WAIC.apply(pd.Series),
                    left_index = True, right_index = True).drop(["WAIC"], axis = 1)
outcompare.columns = ['Model', 'waic', 'lppd', 'p_waic']

outcompare = outcompare.sort_values(by="waic")
outcompare["dWAIC"] = outcompare["waic"] - outcompare.iloc[0, 1]
outcompare["weight"] = onp.round(torch.nn.functional.softmax(-1/2 * torch.tensor(outcompare["dWAIC"]),
                                                                dim=0), 1)
outcompare

# https://tonysyu.github.io/raw_content/matplotlib-style-gallery/gallery.html

	occupancy	speed
0	14.733333	67.666667
1	16.466667	67.133333
2	16.800000	64.666667
3	17.866667	62.000000
4	15.933333	63.266667

	Metrics	TwoRegime	ThreeRegime	ModifiedGreenberg	Eddiesmodel	ConstantExponential	LinearExponential	Linearlogarithm
0	Mean squared error	21.97	21.20	22.73	21.05	22.60	20.75	21.26
1	Mean absolute error	3.64	3.47	3.68	3.46	3.65	3.42	3.48
2	Coefficient of determination	0.88	0.89	0.88	0.89	0.88	0.89	0.88
3	Pearson correlation	0.89	0.90	0.89	0.90	0.89	0.90	0.90

	Model	waic	lppd	p_waic	dWAIC	weight
3	Linear - Exponential	43516.1	-21746.8	11.3	0.0	1.0
6	Linear logarithm	43657.1	-21817.1	11.5	141.0	0.0
5	Eddies model	43692.2	-21824.7	21.4	176.1	0.0
1	Three - Regime	43853.1	-21920.4	6.2	337.0	0.0
0	Two - Regime	44174.1	-22083.3	3.7	658.0	0.0
2	Constant - Exponential	44333.5	-22159.8	6.9	817.4	0.0
4	Modified Greenberg	44356.3	-22171.2	7.0	840.2	0.0