day03_machine_learning.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. from sklearn.datasets import load_boston
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.preprocessing import StandardScaler
  4. from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge
  5. from sklearn.metrics import mean_squared_error
  6. # from sklearn.externals import joblib # 新版本直接 import joblib
  7. import joblib
  8. def linear1():
  9. """
  10. 正规方程的优化方法对波士顿房价进行预测
  11. :return:
  12. """
  13. # 1)获取数据
  14. boston = load_boston()
  15. # 2)划分数据集
  16. x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)
  17. # 3)标准化
  18. transfer = StandardScaler()
  19. x_train = transfer.fit_transform(x_train)
  20. x_test = transfer.transform(x_test)
  21. # 4)预估器
  22. estimator = LinearRegression()
  23. estimator.fit(x_train, y_train)
  24. # 5)得出模型
  25. print("正规方程-权重系数为:\n", estimator.coef_)
  26. print("正规方程-偏置为:\n", estimator.intercept_)
  27. # 6)模型评估
  28. y_predict = estimator.predict(x_test)
  29. print("预测房价:\n", y_predict)
  30. error = mean_squared_error(y_test, y_predict)
  31. print("正规方程-均方误差为:\n", error)
  32. return None
  33. def linear2():
  34. """
  35. 梯度下降的优化方法对波士顿房价进行预测
  36. :return:
  37. """
  38. # 1)获取数据
  39. boston = load_boston()
  40. print("特征数量:\n", boston.data.shape)
  41. # 2)划分数据集
  42. x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)
  43. # 3)标准化
  44. transfer = StandardScaler()
  45. x_train = transfer.fit_transform(x_train)
  46. x_test = transfer.transform(x_test)
  47. # 4)预估器
  48. estimator = SGDRegressor(learning_rate="constant", eta0=0.01, max_iter=10000, penalty="l1")
  49. estimator.fit(x_train, y_train)
  50. # 5)得出模型
  51. print("梯度下降-权重系数为:\n", estimator.coef_)
  52. print("梯度下降-偏置为:\n", estimator.intercept_)
  53. # 6)模型评估
  54. y_predict = estimator.predict(x_test)
  55. print("预测房价:\n", y_predict)
  56. error = mean_squared_error(y_test, y_predict)
  57. print("梯度下降-均方误差为:\n", error)
  58. return None
  59. def linear3():
  60. """
  61. 岭回归对波士顿房价进行预测
  62. :return:
  63. """
  64. # 1)获取数据
  65. boston = load_boston()
  66. print("特征数量:\n", boston.data.shape)
  67. # 2)划分数据集
  68. x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)
  69. # 3)标准化
  70. transfer = StandardScaler()
  71. x_train = transfer.fit_transform(x_train)
  72. x_test = transfer.transform(x_test)
  73. # 4)预估器
  74. # estimator = Ridge(alpha=0.5, max_iter=10000)
  75. # estimator.fit(x_train, y_train)
  76. # 保存模型
  77. # joblib.dump(estimator, "my_ridge.pkl")
  78. # 加载模型
  79. estimator = joblib.load("my_ridge.pkl")
  80. # 5)得出模型
  81. print("岭回归-权重系数为:\n", estimator.coef_)
  82. print("岭回归-偏置为:\n", estimator.intercept_)
  83. # 6)模型评估
  84. y_predict = estimator.predict(x_test)
  85. print("预测房价:\n", y_predict)
  86. error = mean_squared_error(y_test, y_predict)
  87. print("岭回归-均方误差为:\n", error)
  88. return None
  89. if __name__ == "__main__":
  90. # 代码1:正规方程的优化方法对波士顿房价进行预测
  91. linear1()
  92. # 代码2:梯度下降的优化方法对波士顿房价进行预测
  93. linear2()
  94. # 代码3:岭回归对波士顿房价进行预测
  95. linear3()