Discrete Choice Models Overview

In [1]:
from __future__ import print_function
import numpy as np
import statsmodels.api as sm
/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

Data

Load data from Spector and Mazzeo (1980). Examples follow Greene's Econometric Analysis Ch. 21 (5th Edition).

In [2]:
spector_data = sm.datasets.spector.load()
spector_data.exog = sm.add_constant(spector_data.exog, prepend=False)

Inspect the data:

In [3]:
print(spector_data.exog[:5,:])
print(spector_data.endog[:5])
[[ 2.66 20.    0.    1.  ]
 [ 2.89 22.    0.    1.  ]
 [ 3.28 24.    0.    1.  ]
 [ 2.92 12.    0.    1.  ]
 [ 4.   21.    0.    1.  ]]
[0. 0. 0. 0. 1.]

Linear Probability Model (OLS)

In [4]:
lpm_mod = sm.OLS(spector_data.endog, spector_data.exog)
lpm_res = lpm_mod.fit()
print('Parameters: ', lpm_res.params[:-1])
Parameters:  [0.46385168 0.01049512 0.37855479]

Logit Model

In [5]:
logit_mod = sm.Logit(spector_data.endog, spector_data.exog)
logit_res = logit_mod.fit(disp=0)
print('Parameters: ', logit_res.params)
Parameters:  [  2.82611259   0.09515766   2.37868766 -13.02134686]

Marginal Effects

In [6]:
margeff = logit_res.get_margeff()
print(margeff.summary())
        Logit Marginal Effects       
=====================================
Dep. Variable:                      y
Method:                          dydx
At:                           overall
==============================================================================
                dy/dx    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.3626      0.109      3.313      0.001       0.148       0.577
x2             0.0122      0.018      0.686      0.493      -0.023       0.047
x3             0.3052      0.092      3.304      0.001       0.124       0.486
==============================================================================

As in all the discrete data models presented below, we can print a nice summary of results:

In [7]:
print(logit_res.summary())
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-7-2a0dcbd2c000> in <module>()
----> 1 print(logit_res.summary())

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in summary(self, yname, xname, title, alpha, yname_list)
   2756                 yname_list=None):
   2757         smry = super(BinaryResults, self).summary(yname, xname, title, alpha,
-> 2758                      yname_list)
   2759         fittedvalues = self.model.cdf(self.fittedvalues)
   2760         absprederror = np.abs(self.model.endog - fittedvalues)

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in summary(self, yname, xname, title, alpha, yname_list)
   2548                      ('Log-Likelihood:', None),
   2549                      ('LL-Null:', ["%#8.5g" % self.llnull]),
-> 2550                      ('LLR p-value:', ["%#6.4g" % self.llr_pvalue])
   2551                      ]
   2552 

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/tools/decorators.py in __get__(self, obj, type)
     95         if _cachedval is None:
     96             # Call the "fget" function
---> 97             _cachedval = self.fget(obj)
     98             # Set the attribute in obj
     99             # print("Setting %s in cache to %s" % (name, _cachedval))

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in llr_pvalue(self)
   2403     @cache_readonly
   2404     def llr_pvalue(self):
-> 2405         return stats.chisqprob(self.llr, self.df_model)
   2406 
   2407     @cache_readonly

AttributeError: module 'scipy.stats' has no attribute 'chisqprob'

Probit Model

In [8]:
probit_mod = sm.Probit(spector_data.endog, spector_data.exog)
probit_res = probit_mod.fit()
probit_margeff = probit_res.get_margeff()
print('Parameters: ', probit_res.params)
print('Marginal effects: ')
print(probit_margeff.summary())
Optimization terminated successfully.
         Current function value: 0.400588
         Iterations 6
Parameters:  [ 1.62581004  0.05172895  1.42633234 -7.45231965]
Marginal effects: 
       Probit Marginal Effects       
=====================================
Dep. Variable:                      y
Method:                          dydx
At:                           overall
==============================================================================
                dy/dx    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.3608      0.113      3.182      0.001       0.139       0.583
x2             0.0115      0.018      0.624      0.533      -0.025       0.048
x3             0.3165      0.090      3.508      0.000       0.140       0.493
==============================================================================

Multinomial Logit

Load data from the American National Election Studies:

In [9]:
anes_data = sm.datasets.anes96.load()
anes_exog = anes_data.exog
anes_exog = sm.add_constant(anes_exog, prepend=False)

Inspect the data:

In [10]:
print(anes_data.exog[:5,:])
print(anes_data.endog[:5])
[[-2.30258509  7.         36.          3.          1.        ]
 [ 5.24755025  3.         20.          4.          1.        ]
 [ 3.43720782  2.         24.          6.          1.        ]
 [ 4.4200447   3.         28.          6.          1.        ]
 [ 6.46162441  5.         68.          6.          1.        ]]
[6. 1. 1. 1. 0.]

Fit MNL model:

In [11]:
mlogit_mod = sm.MNLogit(anes_data.endog, anes_exog)
mlogit_res = mlogit_mod.fit()
print(mlogit_res.params)
Optimization terminated successfully.
         Current function value: 1.548647
         Iterations 7
[[-1.15359746e-02 -8.87506530e-02 -1.05966699e-01 -9.15567017e-02
  -9.32846040e-02 -1.40880692e-01]
 [ 2.97714352e-01  3.91668642e-01  5.73450508e-01  1.27877179e+00
   1.34696165e+00  2.07008014e+00]
 [-2.49449954e-02 -2.28978371e-02 -1.48512069e-02 -8.68134503e-03
  -1.79040689e-02 -9.43264870e-03]
 [ 8.24914421e-02  1.81042758e-01 -7.15241904e-03  1.99827955e-01
   2.16938850e-01  3.21925702e-01]
 [ 5.19655317e-03  4.78739761e-02  5.75751595e-02  8.44983753e-02
   8.09584122e-02  1.08894083e-01]
 [-3.73401677e-01 -2.25091318e+00 -3.66558353e+00 -7.61384309e+00
  -7.06047825e+00 -1.21057509e+01]]

Poisson

Load the Rand data. Note that this example is similar to Cameron and Trivedi's Microeconometrics Table 20.5, but it is slightly different because of minor changes in the data.

In [12]:
rand_data = sm.datasets.randhie.load()
rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1)
rand_exog = sm.add_constant(rand_exog, prepend=False)

Fit Poisson model:

In [13]:
poisson_mod = sm.Poisson(rand_data.endog, rand_exog)
poisson_res = poisson_mod.fit(method="newton")
print(poisson_res.summary())
Optimization terminated successfully.
         Current function value: 3.091609
         Iterations 12
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-13-7bb737b10231> in <module>()
      1 poisson_mod = sm.Poisson(rand_data.endog, rand_exog)
      2 poisson_res = poisson_mod.fit(method="newton")
----> 3 print(poisson_res.summary())

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in summary(self, yname, xname, title, alpha, yname_list)
   2548                      ('Log-Likelihood:', None),
   2549                      ('LL-Null:', ["%#8.5g" % self.llnull]),
-> 2550                      ('LLR p-value:', ["%#6.4g" % self.llr_pvalue])
   2551                      ]
   2552 

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/tools/decorators.py in __get__(self, obj, type)
     95         if _cachedval is None:
     96             # Call the "fget" function
---> 97             _cachedval = self.fget(obj)
     98             # Set the attribute in obj
     99             # print("Setting %s in cache to %s" % (name, _cachedval))

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in llr_pvalue(self)
   2403     @cache_readonly
   2404     def llr_pvalue(self):
-> 2405         return stats.chisqprob(self.llr, self.df_model)
   2406 
   2407     @cache_readonly

AttributeError: module 'scipy.stats' has no attribute 'chisqprob'

Negative Binomial

The negative binomial model gives slightly different results.

In [14]:
mod_nbin = sm.NegativeBinomial(rand_data.endog, rand_exog)
res_nbin = mod_nbin.fit(disp=False)
print(res_nbin.summary())
/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/base/model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-14-dfe9a556c585> in <module>()
      1 mod_nbin = sm.NegativeBinomial(rand_data.endog, rand_exog)
      2 res_nbin = mod_nbin.fit(disp=False)
----> 3 print(res_nbin.summary())

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in summary(self, yname, xname, title, alpha, yname_list)
   2548                      ('Log-Likelihood:', None),
   2549                      ('LL-Null:', ["%#8.5g" % self.llnull]),
-> 2550                      ('LLR p-value:', ["%#6.4g" % self.llr_pvalue])
   2551                      ]
   2552 

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/tools/decorators.py in __get__(self, obj, type)
     95         if _cachedval is None:
     96             # Call the "fget" function
---> 97             _cachedval = self.fget(obj)
     98             # Set the attribute in obj
     99             # print("Setting %s in cache to %s" % (name, _cachedval))

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in llr_pvalue(self)
   2403     @cache_readonly
   2404     def llr_pvalue(self):
-> 2405         return stats.chisqprob(self.llr, self.df_model)
   2406 
   2407     @cache_readonly

AttributeError: module 'scipy.stats' has no attribute 'chisqprob'

Alternative solvers

The default method for fitting discrete data MLE models is Newton-Raphson. You can use other solvers by using the method argument:

In [15]:
mlogit_res = mlogit_mod.fit(method='bfgs', maxiter=100)
print(mlogit_res.summary())
Warning: Maximum number of iterations has been exceeded.
         Current function value: 1.548650
         Iterations: 100
         Function evaluations: 106
         Gradient evaluations: 106
/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/base/model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-15-7c8a76def398> in <module>()
      1 mlogit_res = mlogit_mod.fit(method='bfgs', maxiter=100)
----> 2 print(mlogit_res.summary())

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in summary(self, yname, xname, title, alpha, yname_list)
   2548                      ('Log-Likelihood:', None),
   2549                      ('LL-Null:', ["%#8.5g" % self.llnull]),
-> 2550                      ('LLR p-value:', ["%#6.4g" % self.llr_pvalue])
   2551                      ]
   2552 

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/tools/decorators.py in __get__(self, obj, type)
     95         if _cachedval is None:
     96             # Call the "fget" function
---> 97             _cachedval = self.fget(obj)
     98             # Set the attribute in obj
     99             # print("Setting %s in cache to %s" % (name, _cachedval))

/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/discrete/discrete_model.py in llr_pvalue(self)
   2403     @cache_readonly
   2404     def llr_pvalue(self):
-> 2405         return stats.chisqprob(self.llr, self.df_model)
   2406 
   2407     @cache_readonly

AttributeError: module 'scipy.stats' has no attribute 'chisqprob'