jam-sagamaker.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. #import the required packages
  2. import pandas as pd
  3. import warnings
  4. warnings.filterwarnings("ignore")
  5. import seaborn.apionly as sns
  6. import sklearn
  7. from sklearn.model_selection import train_test_split
  8. import numpy as np
  9. from sklearn.ensemble import RandomForestClassifier
  10. from scipy.stats import randint as sp_randint
  11. %matplotlib inline
  12. import pickle
  13. #load the 'Iris' dataset, display simple stats about data size, and then print sample data
  14. df = pd.DataFrame(sns.load_dataset('iris'))
  15. print 'shape of the data frame'+str(df.shape)
  16. print 'We have an even spread of iris flower types'
  17. print df.groupby(['species']).size()
  18. print'Display ten random rows from the iris dataset'
  19. df.(samplen=10)
  20. """
  21. 一一一一一一一一一一一一一一一一一一一一一
  22. shape of the data frame(150, 5)
  23. We have an even spread of iris flower types
  24. species
  25. setosa 50
  26. versicolor 50
  27. virginica 50
  28. dtype: int64
  29. Display ten random rows from the iris dataset
  30. sepal_length sepal_width petal_length petal_width species
  31. 143 6.8 3.2 5.9 2.3 virginica
  32. 58 6.6 2.9 4.6 1.3 versicolor
  33. 148 6.2 3.4 5.4 2.3 virginica
  34. 91 6.1 3.0 4.6 1.4 versicolor
  35. 97 6.2 2.9 4.3 1.3 versicolor
  36. 74 6.4 2.9 4.3 1.3 versicolor
  37. 112 6.8 3.0 5.5 2.1 virginica
  38. 72 6.3 2.5 4.9 1.5 versicolor
  39. 104 6.5 3.0 5.8 2.2 virginica
  40. 114 5.8 2.8 5.1 2.4 virginica
  41. """
  42. #let's group setosa and virginica together for the sake of this machine learning exercise
  43. df['y']= np.where(df['species']=='versicolor',1,0)
  44. print df.groupby(['y']).size()
  45. print 'we now have 50 versicolors and 100 non-versicolors'
  46. X=df.drop('species',1).drop('y',1)
  47. y=df['y']
  48. df.sample(n=10)
  49. """"
  50. 二二二二二二二二二二二二二二二二二二二二二二
  51. y
  52. 0 100
  53. 1 50
  54. dtype: int64
  55. we now have 50 versicolors and 100 non-versicolors
  56. sepal_length sepal_width petal_length petal_width species y
  57. 7 5.0 3.4 1.5 0.2 setosa 0
  58. 137 6.4 3.1 5.5 1.8 virginica 0
  59. 116 6.5 3.0 5.5 1.8 virginica 0
  60. 102 7.1 3.0 5.9 2.1 virginica 0
  61. 122 7.7 2.8 6.7 2.0 virginica 0
  62. 98 5.1 2.5 3.0 1.1 versicolor 1
  63. 13 4.3 3.0 1.1 0.1 setosa 0
  64. 14 5.8 4.0 1.2 0.2 setosa 0
  65. 53 5.5 2.3 4.0 1.3 versicolor 1
  66. 41 4.5 2.3 1.3 0.3 setosa 0
  67. """"
  68. #Initialize the random forest machine learning algorithm object
  69. RANDOM_STATE=0
  70. forest = RandomForestClassifier(n_estimators = 500, random_state=RANDOM_STATE, oob_score="True")
  71. #Train the random forest model on the data
  72. forest_model = forest.fit(X,y)
  73. #use the forest model to make a prediction on a new row of data
  74. #define a new array with the order of 'sepal_length', 'sepal_width', 'petal_length', and 'petal_width'
  75. n=[[[6.1,2.9,4.7,1.4]],
  76. [[7.2,3.2,6.0,1.8]],
  77. [[6.5,3.0,5.8,2.2]],
  78. [[7.2,3.6,6.1,2.5]],
  79. [[6.3,3.4,5.6,2.4]],
  80. [[6.3,3.3,6.0,2.5]],
  81. [[5.8,2.6,4.0,1.2]],
  82. [[6.4,3.1,5.5,1.8]],
  83. [[5.6,3.0,4.5,1.5]],
  84. [[5.0,2.3,3.3,1.0]],
  85. [[5.9,3.2,4.8,1.8]],
  86. [[6.6,2.9,4.6,1.3]],
  87. [[6.2,2.9,4.3,1.3]],
  88. [[5.0,3.6,1.4,0.2]],
  89. [[5.1,3.8,1.9,0.4]],
  90. [[7.2,3.0,5.8,1.6]],
  91. [[5.5,3.5,1.3,0.2]],
  92. [[6.7,3.1,4.7,1.5]],
  93. [[5.5,2.4,3.7,1.0]],
  94. [[4.5,2.3,1.3,0.3]],
  95. [[5.0,3.2,1.2,0.2]],
  96. [[4.8,3.0,1.4,0.1]],
  97. [[5.4,3.9,1.3,0.4]],
  98. [[6.0,2.2,5.0,1.5]],
  99. [[6.9,3.2,5.7,2.3]],
  100. [[4.7,3.2,1.6,0.2]],
  101. [[5.7,3.0,4.2,1.2]],
  102. [[6.9,3.1,5.4,2.1]],
  103. [[5.4,3.0,4.5,1.5]],
  104. [[6.1,3.0,4.9,1.8]],
  105. [[5.7,2.6,3.5,1.0]],
  106. [[5.7,2.8,4.1,1.3]],
  107. [[5.8,2.7,3.9,1.2]],
  108. [[6.3,2.7,4.9,1.8]],
  109. [[4.8,3.1,1.6,0.2]],
  110. [[4.3,3.0,1.1,0.1]],
  111. [[5.0,3.4,1.6,0.4]],
  112. [[4.4,3.0,1.3,0.2]],
  113. [[6.3,2.5,5.0,1.9]],
  114. [[5.8,2.8,5.1,2.4]],
  115. [[5.0,3.3,1.4,0.2]],
  116. [[5.9,3.0,5.1,1.8]],
  117. [[5.8,2.7,5.1,1.9]],
  118. [[4.7,3.2,1.3,0.2]],
  119. [[4.4,2.9,1.4,0.2]],
  120. [[6.0,2.2,4.0,1.0]],
  121. [[6.0,2.7,5.1,1.6]],
  122. [[7.1,3.0,5.9,2.1]],
  123. [[6.2,2.8,4.8,1.8]],
  124. [[6.7,2.5,5.8,1.8]]]
  125. #print new_flower
  126. v = 0
  127. nv = 0
  128. for new_flower in n :
  129. prediction=forest_model.predict(new_flower)
  130. print "a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor"
  131. print prediction
  132. #This is the prediction confidence for the forest_model on that row of data being a versicolor iris.
  133. prediction_proba=forest_model.predict_proba(new_flower)
  134. print 'the confidence of the prediction'
  135. print prediction_proba[0,1]
  136. if prediction == 0 && prediction_proba[0,1] > 0.7:
  137. nv = nv + 1
  138. else :
  139. v = v + 1
  140. print("v, nv ", v, nv)
  141. #save (pickle) your model to disk and then to s3
  142. local_path = "/home/ubuntu" # temp local path to export your model
  143. existing_bucket = "suika-un-labeled" # Bucket has prebuilt Model and Un-Labeled Data
  144. bucket_name = "suika-string-model" # s3 bucket name string to save your model
  145. filename = 'finalized_model.sav'
  146. pickle.dump(forest, open(filename, 'wb'))
  147. #you should now see your finalized_model.sav object in the file path
  148. #the ls command prints the contents of this notebook's folder
  149. print "list of the objects in this jupyter notebook's folder"
  150. !ls
  151. # Upload the saved model to S3
  152. import boto3
  153. s3 = boto3.resource('s3')
  154. s3.Bucket(bucket_name).put_object(Key='finalized_model.sav', Body=open('finalized_model.sav'))
  155. """
  156. 三三三三三三三三三三三三三三三三三三三三三三
  157. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  158. [1]
  159. the confidence of the prediction
  160. 0.998
  161. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  162. [0]
  163. the confidence of the prediction
  164. 0.022
  165. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  166. [0]
  167. the confidence of the prediction
  168. 0.0
  169. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  170. [0]
  171. the confidence of the prediction
  172. 0.002
  173. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  174. [0]
  175. the confidence of the prediction
  176. 0.004
  177. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  178. [0]
  179. the confidence of the prediction
  180. 0.006
  181. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  182. [1]
  183. the confidence of the prediction
  184. 0.998
  185. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  186. [0]
  187. the confidence of the prediction
  188. 0.002
  189. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  190. [1]
  191. the confidence of the prediction
  192. 0.998
  193. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  194. [1]
  195. the confidence of the prediction
  196. 0.994
  197. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  198. [1]
  199. the confidence of the prediction
  200. 0.69
  201. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  202. [1]
  203. the confidence of the prediction
  204. 1.0
  205. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  206. [1]
  207. the confidence of the prediction
  208. 0.998
  209. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  210. [0]
  211. the confidence of the prediction
  212. 0.0
  213. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  214. [0]
  215. the confidence of the prediction
  216. 0.0
  217. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  218. [0]
  219. the confidence of the prediction
  220. 0.156
  221. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  222. [0]
  223. the confidence of the prediction
  224. 0.006
  225. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  226. [1]
  227. the confidence of the prediction
  228. 0.998
  229. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  230. [1]
  231. the confidence of the prediction
  232. 1.0
  233. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  234. [0]
  235. the confidence of the prediction
  236. 0.038
  237. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  238. [0]
  239. the confidence of the prediction
  240. 0.0
  241. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  242. [0]
  243. the confidence of the prediction
  244. 0.0
  245. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  246. [0]
  247. the confidence of the prediction
  248. 0.0
  249. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  250. [0]
  251. the confidence of the prediction
  252. 0.298
  253. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  254. [0]
  255. the confidence of the prediction
  256. 0.002
  257. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  258. [0]
  259. the confidence of the prediction
  260. 0.0
  261. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  262. [1]
  263. the confidence of the prediction
  264. 0.998
  265. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  266. [0]
  267. the confidence of the prediction
  268. 0.002
  269. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  270. [1]
  271. the confidence of the prediction
  272. 0.968
  273. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  274. [0]
  275. the confidence of the prediction
  276. 0.026
  277. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  278. [1]
  279. the confidence of the prediction
  280. 1.0
  281. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  282. [1]
  283. the confidence of the prediction
  284. 1.0
  285. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  286. [1]
  287. the confidence of the prediction
  288. 1.0
  289. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  290. [0]
  291. the confidence of the prediction
  292. 0.02
  293. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  294. [0]
  295. the confidence of the prediction
  296. 0.0
  297. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  298. [0]
  299. the confidence of the prediction
  300. 0.0
  301. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  302. [0]
  303. the confidence of the prediction
  304. 0.0
  305. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  306. [0]
  307. the confidence of the prediction
  308. 0.0
  309. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  310. [0]
  311. the confidence of the prediction
  312. 0.012
  313. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  314. [0]
  315. the confidence of the prediction
  316. 0.0
  317. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  318. [0]
  319. the confidence of the prediction
  320. 0.0
  321. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  322. [0]
  323. the confidence of the prediction
  324. 0.058
  325. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  326. [0]
  327. the confidence of the prediction
  328. 0.008
  329. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  330. [0]
  331. the confidence of the prediction
  332. 0.0
  333. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  334. [0]
  335. the confidence of the prediction
  336. 0.026
  337. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  338. [1]
  339. the confidence of the prediction
  340. 0.988
  341. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  342. [1]
  343. the confidence of the prediction
  344. 0.726
  345. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  346. [0]
  347. the confidence of the prediction
  348. 0.0
  349. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  350. [0]
  351. the confidence of the prediction
  352. 0.074
  353. a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
  354. [0]
  355. the confidence of the prediction
  356. 0.01
  357. ('v, nv ', 16, 34)
  358. list of the objects in this jupyter notebook's folder
  359. finalized_model.sav iris.ipynb lost+found
  360. s3.Object(bucket_name='suika-string-model', key='finalized_model.sav')
  361. """