|
|
@@ -0,0 +1,381 @@
|
|
|
+
|
|
|
+#import the required packages
|
|
|
+import pandas as pd
|
|
|
+import warnings
|
|
|
+warnings.filterwarnings("ignore")
|
|
|
+import seaborn.apionly as sns
|
|
|
+import sklearn
|
|
|
+from sklearn.model_selection import train_test_split
|
|
|
+import numpy as np
|
|
|
+from sklearn.ensemble import RandomForestClassifier
|
|
|
+from scipy.stats import randint as sp_randint
|
|
|
+%matplotlib inline
|
|
|
+import pickle
|
|
|
+
|
|
|
+#load the 'Iris' dataset, display simple stats about data size, and then print sample data
|
|
|
+df = pd.DataFrame(sns.load_dataset('iris'))
|
|
|
+print 'shape of the data frame'+str(df.shape)
|
|
|
+print 'We have an even spread of iris flower types'
|
|
|
+print df.groupby(['species']).size()
|
|
|
+print'Display ten random rows from the iris dataset'
|
|
|
+df.(samplen=10)
|
|
|
+
|
|
|
+
|
|
|
+"""
|
|
|
+一一一一一一一一一一一一一一一一一一一一一
|
|
|
+shape of the data frame(150, 5)
|
|
|
+We have an even spread of iris flower types
|
|
|
+species
|
|
|
+setosa 50
|
|
|
+versicolor 50
|
|
|
+virginica 50
|
|
|
+dtype: int64
|
|
|
+Display ten random rows from the iris dataset
|
|
|
+sepal_length sepal_width petal_length petal_width species
|
|
|
+143 6.8 3.2 5.9 2.3 virginica
|
|
|
+58 6.6 2.9 4.6 1.3 versicolor
|
|
|
+148 6.2 3.4 5.4 2.3 virginica
|
|
|
+91 6.1 3.0 4.6 1.4 versicolor
|
|
|
+97 6.2 2.9 4.3 1.3 versicolor
|
|
|
+74 6.4 2.9 4.3 1.3 versicolor
|
|
|
+112 6.8 3.0 5.5 2.1 virginica
|
|
|
+72 6.3 2.5 4.9 1.5 versicolor
|
|
|
+104 6.5 3.0 5.8 2.2 virginica
|
|
|
+114 5.8 2.8 5.1 2.4 virginica
|
|
|
+"""
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+#let's group setosa and virginica together for the sake of this machine learning exercise
|
|
|
+df['y']= np.where(df['species']=='versicolor',1,0)
|
|
|
+print df.groupby(['y']).size()
|
|
|
+print 'we now have 50 versicolors and 100 non-versicolors'
|
|
|
+
|
|
|
+X=df.drop('species',1).drop('y',1)
|
|
|
+y=df['y']
|
|
|
+df.sample(n=10)
|
|
|
+
|
|
|
+""""
|
|
|
+二二二二二二二二二二二二二二二二二二二二二二
|
|
|
+y
|
|
|
+0 100
|
|
|
+1 50
|
|
|
+dtype: int64
|
|
|
+we now have 50 versicolors and 100 non-versicolors
|
|
|
+sepal_length sepal_width petal_length petal_width species y
|
|
|
+7 5.0 3.4 1.5 0.2 setosa 0
|
|
|
+137 6.4 3.1 5.5 1.8 virginica 0
|
|
|
+116 6.5 3.0 5.5 1.8 virginica 0
|
|
|
+102 7.1 3.0 5.9 2.1 virginica 0
|
|
|
+122 7.7 2.8 6.7 2.0 virginica 0
|
|
|
+98 5.1 2.5 3.0 1.1 versicolor 1
|
|
|
+13 4.3 3.0 1.1 0.1 setosa 0
|
|
|
+14 5.8 4.0 1.2 0.2 setosa 0
|
|
|
+53 5.5 2.3 4.0 1.3 versicolor 1
|
|
|
+41 4.5 2.3 1.3 0.3 setosa 0
|
|
|
+""""
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+#Initialize the random forest machine learning algorithm object
|
|
|
+RANDOM_STATE=0
|
|
|
+forest = RandomForestClassifier(n_estimators = 500, random_state=RANDOM_STATE, oob_score="True")
|
|
|
+
|
|
|
+#Train the random forest model on the data
|
|
|
+forest_model = forest.fit(X,y)
|
|
|
+
|
|
|
+#use the forest model to make a prediction on a new row of data
|
|
|
+#define a new array with the order of 'sepal_length', 'sepal_width', 'petal_length', and 'petal_width'
|
|
|
+n=[[[6.1,2.9,4.7,1.4]],
|
|
|
+[[7.2,3.2,6.0,1.8]],
|
|
|
+[[6.5,3.0,5.8,2.2]],
|
|
|
+[[7.2,3.6,6.1,2.5]],
|
|
|
+[[6.3,3.4,5.6,2.4]],
|
|
|
+[[6.3,3.3,6.0,2.5]],
|
|
|
+[[5.8,2.6,4.0,1.2]],
|
|
|
+[[6.4,3.1,5.5,1.8]],
|
|
|
+[[5.6,3.0,4.5,1.5]],
|
|
|
+[[5.0,2.3,3.3,1.0]],
|
|
|
+[[5.9,3.2,4.8,1.8]],
|
|
|
+[[6.6,2.9,4.6,1.3]],
|
|
|
+[[6.2,2.9,4.3,1.3]],
|
|
|
+[[5.0,3.6,1.4,0.2]],
|
|
|
+[[5.1,3.8,1.9,0.4]],
|
|
|
+[[7.2,3.0,5.8,1.6]],
|
|
|
+[[5.5,3.5,1.3,0.2]],
|
|
|
+[[6.7,3.1,4.7,1.5]],
|
|
|
+[[5.5,2.4,3.7,1.0]],
|
|
|
+[[4.5,2.3,1.3,0.3]],
|
|
|
+[[5.0,3.2,1.2,0.2]],
|
|
|
+[[4.8,3.0,1.4,0.1]],
|
|
|
+[[5.4,3.9,1.3,0.4]],
|
|
|
+[[6.0,2.2,5.0,1.5]],
|
|
|
+[[6.9,3.2,5.7,2.3]],
|
|
|
+[[4.7,3.2,1.6,0.2]],
|
|
|
+[[5.7,3.0,4.2,1.2]],
|
|
|
+[[6.9,3.1,5.4,2.1]],
|
|
|
+[[5.4,3.0,4.5,1.5]],
|
|
|
+[[6.1,3.0,4.9,1.8]],
|
|
|
+[[5.7,2.6,3.5,1.0]],
|
|
|
+[[5.7,2.8,4.1,1.3]],
|
|
|
+[[5.8,2.7,3.9,1.2]],
|
|
|
+[[6.3,2.7,4.9,1.8]],
|
|
|
+[[4.8,3.1,1.6,0.2]],
|
|
|
+[[4.3,3.0,1.1,0.1]],
|
|
|
+[[5.0,3.4,1.6,0.4]],
|
|
|
+[[4.4,3.0,1.3,0.2]],
|
|
|
+[[6.3,2.5,5.0,1.9]],
|
|
|
+[[5.8,2.8,5.1,2.4]],
|
|
|
+[[5.0,3.3,1.4,0.2]],
|
|
|
+[[5.9,3.0,5.1,1.8]],
|
|
|
+[[5.8,2.7,5.1,1.9]],
|
|
|
+[[4.7,3.2,1.3,0.2]],
|
|
|
+[[4.4,2.9,1.4,0.2]],
|
|
|
+[[6.0,2.2,4.0,1.0]],
|
|
|
+[[6.0,2.7,5.1,1.6]],
|
|
|
+[[7.1,3.0,5.9,2.1]],
|
|
|
+[[6.2,2.8,4.8,1.8]],
|
|
|
+[[6.7,2.5,5.8,1.8]]]
|
|
|
+#print new_flower
|
|
|
+v = 0
|
|
|
+nv = 0
|
|
|
+for new_flower in n :
|
|
|
+
|
|
|
+ prediction=forest_model.predict(new_flower)
|
|
|
+ print "a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor"
|
|
|
+ print prediction
|
|
|
+
|
|
|
+ #This is the prediction confidence for the forest_model on that row of data being a versicolor iris.
|
|
|
+ prediction_proba=forest_model.predict_proba(new_flower)
|
|
|
+ print 'the confidence of the prediction'
|
|
|
+ print prediction_proba[0,1]
|
|
|
+ if prediction == 0 && prediction_proba[0,1] > 0.7:
|
|
|
+ nv = nv + 1
|
|
|
+ else :
|
|
|
+ v = v + 1
|
|
|
+print("v, nv ", v, nv)
|
|
|
+#save (pickle) your model to disk and then to s3
|
|
|
+local_path = "/home/ubuntu" # temp local path to export your model
|
|
|
+existing_bucket = "suika-un-labeled" # Bucket has prebuilt Model and Un-Labeled Data
|
|
|
+bucket_name = "suika-string-model" # s3 bucket name string to save your model
|
|
|
+filename = 'finalized_model.sav'
|
|
|
+pickle.dump(forest, open(filename, 'wb'))
|
|
|
+
|
|
|
+#you should now see your finalized_model.sav object in the file path
|
|
|
+#the ls command prints the contents of this notebook's folder
|
|
|
+print "list of the objects in this jupyter notebook's folder"
|
|
|
+!ls
|
|
|
+
|
|
|
+# Upload the saved model to S3
|
|
|
+import boto3
|
|
|
+s3 = boto3.resource('s3')
|
|
|
+s3.Bucket(bucket_name).put_object(Key='finalized_model.sav', Body=open('finalized_model.sav'))
|
|
|
+
|
|
|
+"""
|
|
|
+三三三三三三三三三三三三三三三三三三三三三三
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.998
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.022
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.002
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.004
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.006
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.998
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.002
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.998
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.994
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.69
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+1.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.998
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.156
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.006
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.998
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+1.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.038
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.298
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.002
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.998
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.002
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.968
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.026
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+1.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+1.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+1.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.02
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.012
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.058
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.008
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.026
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.988
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[1]
|
|
|
+the confidence of the prediction
|
|
|
+0.726
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.0
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.074
|
|
|
+a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
|
|
|
+[0]
|
|
|
+the confidence of the prediction
|
|
|
+0.01
|
|
|
+('v, nv ', 16, 34)
|
|
|
+list of the objects in this jupyter notebook's folder
|
|
|
+finalized_model.sav iris.ipynb lost+found
|
|
|
+s3.Object(bucket_name='suika-string-model', key='finalized_model.sav')
|
|
|
+
|
|
|
+"""
|