%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format=‘retina‘
 
from __future__ import absolute_import, division, print_function
 
import sys
import os
 
import pandas as pd
import numpy as np
 
# TSA from Statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
 
# Display and Plotting
import matplotlib.pylab as plt
import seaborn as sns
 
pd.set_option(‘display.float_format‘, lambda x: ‘%.5f‘ % x) # pandas
np.set_printoptions(precision=5, suppress=True) # numpy
 
pd.set_option(‘display.max_columns‘, 100)
pd.set_option(‘display.max_rows‘, 100)
 
# seaborn plotting style
sns.set(style=‘ticks‘, context=‘poster‘)
filename_ts = ‘aa.csv‘
ts_df = pd.read_csv(filename_ts, index_col=0, parse_dates=[0])
 
n_sample = ts_df.shape[0]
print(ts_df.shape)
print(ts_df.head())
 # Create a training sample and testing sample before analyzing the series
 
# n_train=int(0.95*n_sample)+1
n_train=int(1*n_sample)+1-6
n_forecast=n_sample-n_train
#ts_df
# print(ts_df.iloc[n_train][‘Close‘])
ts_train = ts_df.iloc[:n_train][‘Close‘]
#从本地读取数据  本文用的是本地数据,未用接口数据
# stock = pd.read_csv(‘aa.csv‘, index_col=0, parse_dates=[0])
# stock.head(10)
#下采样  日频数据太多
# stock_week = stock[‘Close‘].resample(‘W-MON‘).mean()
#训练数据
# stock_train = stock_week[‘2006‘:‘2020‘]
# print(stock_train)
# ts_test = ts_df.iloc[n_train:][‘Close‘]
# print(ts_train.shape)
# print(ts_test.shape)
print("Training Series:", "\n", ts_train.tail(), "\n")
# print("Testing Series:", "\n", ts_test.head())
print(111)
# print(ts_train)
def tsplot(y, lags=None, title=‘‘, figsize=(14, 8)):
    
    fig = plt.figure(figsize=figsize)
    layout = (2, 2)
    ts_ax   = plt.subplot2grid(layout, (0, 0))
    hist_ax = plt.subplot2grid(layout, (0, 1))
    acf_ax  = plt.subplot2grid(layout, (1, 0))
    pacf_ax = plt.subplot2grid(layout, (1, 1))
    
    y.plot(ax=ts_ax) # 折线图
    ts_ax.set_title(title)
    y.plot(ax=hist_ax, kind=‘hist‘, bins=25) #直方图
    hist_ax.set_title(‘Histogram‘)
    smt.graphics.plot_acf(y, lags=lags, ax=acf_ax) # ACF自相关系数
   
    smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax) # 偏自相关系数
    [ax.set_xlim(0) for ax in [acf_ax, pacf_ax]]
    sns.despine()
    fig.tight_layout()
    return ts_ax, acf_ax, pacf_ax
 
tsplot(ts_train, title=‘A Given Training Series‘, lags=20);
 
# Fit the model
import warnings  # 忽略警告
warnings.filterwarnings(‘ignore‘)
arima200 = sm.tsa.SARIMAX(ts_train, order=(1,0,0)) # ARIMA季节性模型,至于p,d,q需要按照下面的方法选择
model_results = arima200.fit()
 # 此处运用BIC(贝叶斯信息准则)进行模型参数选择
# 另外还可以利用AIC(赤池信息准则),视具体情况而定
import itertools
 
p_min = 0
d_min = 0
q_min = 0
p_max = 4
d_max = 0
q_max = 4
# Initialize a DataFrame to store the results
results_bic = pd.DataFrame(index=[‘AR{}‘.format(i) for i in range(p_min,p_max+1)],
                           columns=[‘MA{}‘.format(i) for i in range(q_min,q_max+1)])
 
for p,d,q in itertools.product(range(p_min,p_max+1),
                               range(d_min,d_max+1),
                               range(q_min,q_max+1)):
    if p==0 and d==0 and q==0:
        results_bic.loc[‘AR{}‘.format(p), ‘MA{}‘.format(q)] = np.nan
        continue
    
    try:
        model = sm.tsa.SARIMAX(ts_train, order=(p, d, q),
                               #enforce_stationarity=False,
                               #enforce_invertibility=False,
                              )
        results = model.fit() #此处的result包含了很多信息,具体如果用到需要自己去查询
# http://www.statsmodels.org/stable/tsa.html
#         print("results.bic",results.bic)
#         print("results.aic",results.aic)
        
        results_bic.loc[‘AR{}‘.format(p), ‘MA{}‘.format(q)] = results.bic
    except:
        continue
print(p,d,q)
results_bic = results_bic[results_bic.columns].astype(float)
fig, ax = plt.subplots(figsize=(10, 8))
ax = sns.heatmap(results_bic,
                 mask=results_bic.isnull(),
                 ax=ax,
                 annot=True,
                 fmt=‘.2f‘,
                 );
ax.set_title(‘BIC‘);
# Alternative model selection method, limited to only searching AR and MA parameters
 
train_results = sm.tsa.arma_order_select_ic(ts_train, ic=[‘aic‘, ‘bic‘], trend=‘nc‘, max_ar=4, max_ma=4)
 
print(‘AIC‘, train_results.aic_min_order)
print(‘BIC‘, train_results.bic_min_order)
#残差分析 正态分布 QQ图线性
 
model_results.plot_diagnostics(figsize=(16, 12));
from statsmodels.tsa.arima_model import ARIMA
# print(ts_train)
# print(1111)
# print(ts_test)
model = ARIMA(ts_train,order=(7,1,0))
model = ARIMA(ts_restored,order=(7,1,0)) #第一种情况,导入ARIMA模型
model = ARIMA(data,order=(7,1,0))   #第二种情况,导入ARIMA模型
result = model.fit(disp=-1)
print(result.summary())
result.conf_int()#模型诊断,可以发现所有的系数置信区间都不为0;即在5%的置信水平下,所有的系数都是显著的,即模型通过检验。
#最后画出时序图
# fig, ax = plt.subplots(figsize=(12, 10))
# # pred = result.predict(‘20140609‘, ‘20160701‘,dynamic=True, typ=‘levels‘)
# ax = ts_train.loc[‘2020‘:].plot(ax=ax)   #注意起点是从1901开始
# fig = result.plot_predict(20,500) #因为前面是90个数,所以加上预测的10个就是100
# plt.show()   #数据预测并画图
# print(stock_train)
# model = ARIMA(stock_train, order=(7, 1, 0))
# model_pred = model.fit()
 
原文:https://www.cnblogs.com/hzthyj/p/14724218.html