Accuracy of BMLiNGAM

This notebook shows accuracy of BMLiNGAM for causal inference.

In [1]:
%matplotlib inline
%autosave 0
import sys, os
sys.path.insert(0, os.path.expanduser('~/work/git/github/taku-y/bmlingam'))
sys.path.insert(0, os.path.expanduser('~/work/git/github/pymc-devs/pymc3'))

import theano
theano.config.floatX = 'float64'

from copy import deepcopy
import hashlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import time

from expr1 import run_trial
from bmlingam import load_pklz, save_pklz
# from bmlingam import do_mcmc_bmlingam, InferParams, MCMCParams, save_pklz, load_pklz, define_hparam_searchspace, find_best_model
# from bmlingam.utils.gendata import GenDataParams, gen_artificial_data
Autosave disabled
/opt/conda/lib/python3.5/site-packages/matplotlib/__init__.py:1357: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)

Experimental condition

The experimental conditions are as follows.

  • Parameter of artiifcial data
    • The number of observed samples (n_samples): [100]
    • The number of confounding factors (n_confs or \(Q\)): [1, 3, 5, 10]
    • Scale of total noise: \(c=0.25, 0.5, 1.0, 3.0\).
    • Scale of confounding factors: \(c/\sqrt{Q}\)
    • Distribution of observation noise (data_noise_type): ['laplace', 'uniform']
    • Scale of observation noise: 3
  • Hyperparameter range
    • Correlation coefficient of confounding factors: (L_cov_21s): [[-.9, -.7, -.5, -.3, 0, .3, .5, .7, .9]]
    • Distribution of observation noise (model_noise_type): ['gg']
In [2]:
conds = [
        {
            'totalnoise': totalnoise,
            'L_cov_21s': L_cov_21s,
            'n_samples': n_samples,
            'n_confs': n_confs,
            'data_noise_type': data_noise_type,
            'model_noise_type': model_noise_type
        }
        for totalnoise in [0.25, 0.5, 1.0, 3.0]
        for L_cov_21s in [[-.9, -.7, -.5, -.3, 0, .3, .5, .7, .9]]
        for n_samples in [100]
        for n_confs in [1, 3, 5, 10] # [1, 3, 5, 10]
        for data_noise_type in ['laplace', 'uniform']
        for model_noise_type in ['gg']
    ]

Program

Identifier of trial

Identifier of a trial is determined based on:

  • Trial index (ix_trial)
  • The number of samples (n_samples)
  • The number of confounders (n_confs)
  • Distribution of observation noise of artificial data (data_noise_type)
  • Distribution of observation noise of BMLiNGAM model (model_noise_type)
  • Correlation coefficient of confounding effect (L_cov_21s)
  • Total noise scale (totalnoise)

We use identifiers to store results of trials.

In [3]:
def make_id(ix_trial, n_samples, n_confs, data_noise_type, model_noise_type, L_cov_21s, totalnoise):
    L_cov_21s_ = ' '.join([str(v) for v in L_cov_21s])

    return hashlib.md5(
        str((L_cov_21s_, ix_trial, n_samples, n_confs, data_noise_type, model_noise_type, totalnoise)).encode('utf-8')
    ).hexdigest()

# Test
print(make_id(55, 100, 12, 'all', 'gg', [1, 2, 3], 0.3))
8eb07d5dcb68e8163a7a2a0582b59d1f

Append results to dataframe

In [4]:
def add_result_to_df(df, result):
    if df is None:
        return pd.DataFrame({k: [v] for k, v in result.items()})
    else:
        return df.append(result, ignore_index=True)

# Test
result1 = {'col1': 10, 'col2': 20}
result2 = {'col1': 30, 'col2': -10}
df1 = add_result_to_df(None, result1)
print('--- df1 ---')
print(df1)
df2 = add_result_to_df(df1, result2)
print('--- df2 ---')
print(df2)
--- df1 ---
   col1  col2
0    10    20
--- df2 ---
   col1  col2
0    10    20
1    30   -10

Save and load data frame

In [5]:
def load_df(df_file):
    if os.path.exists(df_file):
        return load_pklz(df_file)
    else:
        return None

def save_df(df_file, df):
    save_pklz(df_file, df)

Perform experiment over conditions and trials

In [6]:
def df_exist_result_id(df, result_id):
    if df is not None:
        return result_id in np.array(df['result_id'])
    else:
        False

def run_expr(conds, n_trials_per_cond=50):
    """Perform evaluation of BMLiNGAM given a set of experimental conditions.

    For each condition, several trials are executed.
    In a trial, BMLiNGAM is applied to causal inference for artificial data.
    The average accuracy is computed for each condition.
    """
    # Filename of dataframe
    data_dir = '.'
    df_file = data_dir + '/20160822-eval-bml-results.pklz'

    # Load results computed in previous
    df = load_df(df_file)

    # Loop over experimental conditions
    n_skip = 0
    for cond in conds:
        print(cond)

        # Loop over trials
        for ix_trial in range(n_trials_per_cond):
            # Identifier of a trial for (cond, ix_trial)
            result_id = make_id(ix_trial, **cond)

            # Check if the result has been already stored in the data frame
            if df_exist_result_id(df, result_id):
                n_skip += 1
            else:
                # `result` is a dict including results of trials.
                # `ix_trial` is used as the random seed of the corresponding trial.
                result = run_trial(ix_trial, cond)
                result.update({'result_id': result_id})

                df = add_result_to_df(df, result)
                save_df(df_file, df)

    print('Number of skipped trials = {}'.format(n_skip))
    return df

Run program

In [7]:
df = run_expr(conds)
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
Number of skipped trials = 1600

Result table

In [8]:
import pandas as pd

df_file = './20160822-eval-bml-results.pklz'
df = load_pklz(df_file)
df = pd.concat(
    {
        '2log(bf)': df['log_bf'],
        'correct rate': df['correct_rate'],
        'totalnoise': df['totalnoise'],
        'data noise type': df['data_noise_type'],
        'n_confs': df['n_confs']
    }, axis=1
)
sg = df.groupby(['data noise type', 'n_confs', 'totalnoise'])
sg1 = sg['correct rate'].mean()
sg2 = sg['2log(bf)'].mean()

pd.concat(
    {
        'correct_rate': sg1,
        '2log(bf)': sg2,
    }, axis=1
)
Out[8]:
2log(bf) correct_rate
data noise type n_confs totalnoise
laplace 1 0.25 10.658543 0.84
0.50 9.956851 0.76
1.00 8.132585 0.78
3.00 6.067567 0.44
3 0.25 9.701838 0.82
0.50 8.965892 0.80
1.00 6.592364 0.64
3.00 5.585870 0.38
5 0.25 9.973692 0.88
0.50 9.525252 0.80
1.00 7.731706 0.70
3.00 6.341696 0.60
10 0.25 10.690053 0.86
0.50 10.349471 0.86
1.00 8.007714 0.74
3.00 5.491167 0.52
uniform 1 0.25 11.557302 0.88
0.50 10.521002 0.76
1.00 7.951135 0.68
3.00 5.253098 0.48
3 0.25 12.815920 0.90
0.50 11.660920 0.86
1.00 7.951725 0.74
3.00 6.008483 0.56
5 0.25 11.016579 0.86
0.50 10.214459 0.78
1.00 6.845886 0.66
3.00 5.512882 0.52
10 0.25 11.683218 0.88
0.50 10.147768 0.82
1.00 7.335032 0.62
3.00 6.205580 0.50

Bayes factor and accuracy

In [9]:
import pandas as pd

def count(x): return np.sum(x.astype(int))
data_dir = '.'
df_file = data_dir + '/20160822-eval-bml-results.pklz'
df = load_pklz(df_file)

df = pd.concat(
    {
        '2log(bf)': df['log_bf'],
        'correct rate': df['correct_rate'],
        'count': df['correct_rate'],
        'totalnoise': df['totalnoise'],
        'data noise type': df['data_noise_type']
    }, axis=1
)

df = df.pivot_table(values=['correct rate', 'count'],
                    index=['totalnoise', pd.cut(df['2log(bf)'], [0., 2., 6., 10., 100.])],
                    columns='data noise type',
                    aggfunc={'correct rate': np.mean, 'count': np.sum})
df
Out[9]:
count correct rate
data noise type laplace uniform laplace uniform
totalnoise 2log(bf)
0.25 (0, 2] 21.0 18.0 0.636364 0.642857
(2, 6] 26.0 31.0 0.722222 0.794872
(6, 10] 37.0 20.0 0.822222 0.869565
(10, 100] 86.0 107.0 1.000000 0.972727
0.50 (0, 2] 13.0 16.0 0.481481 0.533333
(2, 6] 37.0 27.0 0.711538 0.613636
(6, 10] 37.0 24.0 0.840909 0.800000
(10, 100] 74.0 94.0 0.961039 0.979167
1.00 (0, 2] 21.0 18.0 0.567568 0.600000
(2, 6] 36.0 39.0 0.631579 0.520000
(6, 10] 27.0 34.0 0.600000 0.723404
(10, 100] 59.0 44.0 0.967213 0.916667
3.00 (0, 2] 28.0 30.0 0.509091 0.600000
(2, 6] 34.0 36.0 0.465753 0.486486
(6, 10] 17.0 20.0 0.515152 0.500000
(10, 100] 18.0 17.0 0.461538 0.472222
In [ ]: