Welcome to Statsmodels’s Documentation¶
statsmodels
is a Python module that provides classes and functions for the estimation
of many different statistical models, as well as for conducting statistical tests, and statistical
data exploration. An extensive list of result statistics are available for each estimator.
The results are tested against existing statistical packages to ensure that they are correct. The
package is released under the open source Modified BSD (3-clause) license.
The online documentation is hosted at statsmodels.org.
Minimal Examples¶
Since version 0.5.0
of statsmodels
, you can use R-style formulas
together with pandas
data frames to fit your models. Here is a simple
example using ordinary least squares:
In [1]: import numpy as np
In [2]: import statsmodels.api as sm
In [3]: import statsmodels.formula.api as smf
# Load data
In [4]: dat = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1317 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318 encode_chunked=req.has_header('Transfer-encoding'))
1319 except OSError as err: # timeout error
/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
1238 """Send a complete request to the server."""
-> 1239 self._send_request(method, url, body, headers, encode_chunked)
1240
/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1284 body = _encode(body, 'body')
-> 1285 self.endheaders(body, encode_chunked=encode_chunked)
1286
/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
1233 raise CannotSendHeader()
-> 1234 self._send_output(message_body, encode_chunked=encode_chunked)
1235
/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
/usr/lib/python3.6/http/client.py in send(self, data)
963 if self.auto_open:
--> 964 self.connect()
965 else:
/usr/lib/python3.6/http/client.py in connect(self)
1391
-> 1392 super().connect()
1393
/usr/lib/python3.6/http/client.py in connect(self)
935 self.sock = self._create_connection(
--> 936 (self.host,self.port), self.timeout, self.source_address)
937 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
703 err = None
--> 704 for res in getaddrinfo(host, port, 0, SOCK_STREAM):
705 af, socktype, proto, canonname, sa = res
/usr/lib/python3.6/socket.py in getaddrinfo(host, port, family, type, proto, flags)
744 addrlist = []
--> 745 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
746 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-4-ba67cff39b0d> in <module>()
----> 1 dat = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/datasets/utils.py in get_rdataset(dataname, package, cache)
288 "master/doc/"+package+"/rst/")
289 cache = _get_cache(cache)
--> 290 data, from_cache = _get_data(data_base_url, dataname, cache)
291 data = read_csv(data, index_col=0)
292 data = _maybe_reset_index(data)
/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/datasets/utils.py in _get_data(base_url, dataname, cache, extension)
219 url = base_url + (dataname + ".%s") % extension
220 try:
--> 221 data, from_cache = _urlopen_cached(url, cache)
222 except HTTPError as err:
223 if '404' in str(err):
/build/statsmodels-JytjB9/statsmodels-0.8.0/.pybuild/cpython3_3.6_statsmodels/build/statsmodels/datasets/utils.py in _urlopen_cached(url, cache)
210 # not using the cache or didn't find it in cache
211 if not from_cache:
--> 212 data = urlopen(url).read()
213 if cache is not None: # then put it in the cache
214 _cache_it(data, cache_path)
/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
524 req = meth(req)
525
--> 526 response = self._open(req, data)
527
528 # post-process response
/usr/lib/python3.6/urllib/request.py in _open(self, req, data)
542 protocol = req.type
543 result = self._call_chain(self.handle_open, protocol, protocol +
--> 544 '_open', req)
545 if result:
546 return result
/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
/usr/lib/python3.6/urllib/request.py in https_open(self, req)
1359 def https_open(self, req):
1360 return self.do_open(http.client.HTTPSConnection, req,
-> 1361 context=self._context, check_hostname=self._check_hostname)
1362
1363 https_request = AbstractHTTPHandler.do_request_
/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1318 encode_chunked=req.has_header('Transfer-encoding'))
1319 except OSError as err: # timeout error
-> 1320 raise URLError(err)
1321 r = h.getresponse()
1322 except:
URLError: <urlopen error [Errno -2] Name or service not known>
# Fit regression model (using the natural log of one of the regressors)
In [5]: results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()