回帰分析について
以下のデータの公転周期=orbital-periodを応答変数、説明変数をsemi-major-axisにして分析する。
planet,inclination,eccentricity,semi-major-axis,orbital-period Mercury,7.004,0.2056,0.3871,0.241 Venus,3.39471,0.0068,0.72333,0.615 Earth,0.00005,0.0167,1,1 Mars,1.85061,0.0934,1.52366,1.881 Jupiter,1.3053,0.0484,5.20336,11.86 Saturn,2.48446,0.0542,9.53707,29.46 Uranus,0.774,0.0461,19.19138,84.01 Neptune,1.76917,0.0086,30.06896,164.79 Pluto,17.089,0.25025,39.445,247.74
import math import pandas as pd import numpy as np import matplotlib.pyplot as plt data = pd.read_csv("/planet.data") axismean = data['semi-major-axis'].mean() periodmean = data['orbital-period'].mean() axismean_var = data.apply(lambda x: (x["semi-major-axis"] - axismean) * (x["semi-major-axis"] - axismean) , axis=1).sum() / data['semi-major-axis'].count() - 1 periodmean_var = data.apply(lambda x: (x["orbital-period"] - periodmean) * (x["orbital-period"] - periodmean) , axis=1).sum() / data['orbital-period'].count() - 1 covariance = data.apply(lambda x: (x["semi-major-axis"] - axismean) * (x["orbital-period"] - periodmean), axis=1).sum() / data['orbital-period'].count() - 1 correlation_coefficient = covariance / (math.sqrt(axismean_var) * math.sqrt(periodmean_var)) regression_coefficient = covariance / axismean_var alpha = periodmean - regression_coefficient * axismean plt.scatter(data['semi-major-axis'], data['orbital-period']) r = np.corrcoef(data['semi-major-axis'], data['orbital-period']) x = np.linspace(0, 50) y = alpha + regression_coefficient * x plt.plot(x, y, "r-") plt.title(" alpha = " + str(round(alpha,2)) + " regression_coefficient = " + str(round(regression_coefficient,2)) + " correlation_coefficient = " + str(round(correlation_coefficient,2))) plt.show()
結果