#import the required packages import pandas as pd import warnings warnings.filterwarnings("ignore") import seaborn.apionly as sns import sklearn from sklearn.model_selection import train_test_split import numpy as np from sklearn.ensemble import RandomForestClassifier from scipy.stats import randint as sp_randint %matplotlib inline import pickle #load the 'Iris' dataset, display simple stats about data size, and then print sample data df = pd.DataFrame(sns.load_dataset('iris')) print 'shape of the data frame'+str(df.shape) print 'We have an even spread of iris flower types' print df.groupby(['species']).size() print'Display ten random rows from the iris dataset' df.(samplen=10) """ 一一一一一一一一一一一一一一一一一一一一一 shape of the data frame(150, 5) We have an even spread of iris flower types species setosa 50 versicolor 50 virginica 50 dtype: int64 Display ten random rows from the iris dataset sepal_length sepal_width petal_length petal_width species 143 6.8 3.2 5.9 2.3 virginica 58 6.6 2.9 4.6 1.3 versicolor 148 6.2 3.4 5.4 2.3 virginica 91 6.1 3.0 4.6 1.4 versicolor 97 6.2 2.9 4.3 1.3 versicolor 74 6.4 2.9 4.3 1.3 versicolor 112 6.8 3.0 5.5 2.1 virginica 72 6.3 2.5 4.9 1.5 versicolor 104 6.5 3.0 5.8 2.2 virginica 114 5.8 2.8 5.1 2.4 virginica """ #let's group setosa and virginica together for the sake of this machine learning exercise df['y']= np.where(df['species']=='versicolor',1,0) print df.groupby(['y']).size() print 'we now have 50 versicolors and 100 non-versicolors' X=df.drop('species',1).drop('y',1) y=df['y'] df.sample(n=10) """" 二二二二二二二二二二二二二二二二二二二二二二 y 0 100 1 50 dtype: int64 we now have 50 versicolors and 100 non-versicolors sepal_length sepal_width petal_length petal_width species y 7 5.0 3.4 1.5 0.2 setosa 0 137 6.4 3.1 5.5 1.8 virginica 0 116 6.5 3.0 5.5 1.8 virginica 0 102 7.1 3.0 5.9 2.1 virginica 0 122 7.7 2.8 6.7 2.0 virginica 0 98 5.1 2.5 3.0 1.1 versicolor 1 13 4.3 3.0 1.1 0.1 setosa 0 14 5.8 4.0 1.2 0.2 setosa 0 53 5.5 2.3 4.0 1.3 versicolor 1 41 4.5 2.3 1.3 0.3 setosa 0 """" #Initialize the random forest machine learning algorithm object RANDOM_STATE=0 forest = RandomForestClassifier(n_estimators = 500, random_state=RANDOM_STATE, oob_score="True") #Train the random forest model on the data forest_model = forest.fit(X,y) #use the forest model to make a prediction on a new row of data #define a new array with the order of 'sepal_length', 'sepal_width', 'petal_length', and 'petal_width' n=[[[6.1,2.9,4.7,1.4]], [[7.2,3.2,6.0,1.8]], [[6.5,3.0,5.8,2.2]], [[7.2,3.6,6.1,2.5]], [[6.3,3.4,5.6,2.4]], [[6.3,3.3,6.0,2.5]], [[5.8,2.6,4.0,1.2]], [[6.4,3.1,5.5,1.8]], [[5.6,3.0,4.5,1.5]], [[5.0,2.3,3.3,1.0]], [[5.9,3.2,4.8,1.8]], [[6.6,2.9,4.6,1.3]], [[6.2,2.9,4.3,1.3]], [[5.0,3.6,1.4,0.2]], [[5.1,3.8,1.9,0.4]], [[7.2,3.0,5.8,1.6]], [[5.5,3.5,1.3,0.2]], [[6.7,3.1,4.7,1.5]], [[5.5,2.4,3.7,1.0]], [[4.5,2.3,1.3,0.3]], [[5.0,3.2,1.2,0.2]], [[4.8,3.0,1.4,0.1]], [[5.4,3.9,1.3,0.4]], [[6.0,2.2,5.0,1.5]], [[6.9,3.2,5.7,2.3]], [[4.7,3.2,1.6,0.2]], [[5.7,3.0,4.2,1.2]], [[6.9,3.1,5.4,2.1]], [[5.4,3.0,4.5,1.5]], [[6.1,3.0,4.9,1.8]], [[5.7,2.6,3.5,1.0]], [[5.7,2.8,4.1,1.3]], [[5.8,2.7,3.9,1.2]], [[6.3,2.7,4.9,1.8]], [[4.8,3.1,1.6,0.2]], [[4.3,3.0,1.1,0.1]], [[5.0,3.4,1.6,0.4]], [[4.4,3.0,1.3,0.2]], [[6.3,2.5,5.0,1.9]], [[5.8,2.8,5.1,2.4]], [[5.0,3.3,1.4,0.2]], [[5.9,3.0,5.1,1.8]], [[5.8,2.7,5.1,1.9]], [[4.7,3.2,1.3,0.2]], [[4.4,2.9,1.4,0.2]], [[6.0,2.2,4.0,1.0]], [[6.0,2.7,5.1,1.6]], [[7.1,3.0,5.9,2.1]], [[6.2,2.8,4.8,1.8]], [[6.7,2.5,5.8,1.8]]] #print new_flower v = 0 nv = 0 for new_flower in n : prediction=forest_model.predict(new_flower) print "a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor" print prediction #This is the prediction confidence for the forest_model on that row of data being a versicolor iris. prediction_proba=forest_model.predict_proba(new_flower) print 'the confidence of the prediction' print prediction_proba[0,1] if prediction == 0 && prediction_proba[0,1] > 0.7: nv = nv + 1 else : v = v + 1 print("v, nv ", v, nv) #save (pickle) your model to disk and then to s3 local_path = "/home/ubuntu" # temp local path to export your model existing_bucket = "suika-un-labeled" # Bucket has prebuilt Model and Un-Labeled Data bucket_name = "suika-string-model" # s3 bucket name string to save your model filename = 'finalized_model.sav' pickle.dump(forest, open(filename, 'wb')) #you should now see your finalized_model.sav object in the file path #the ls command prints the contents of this notebook's folder print "list of the objects in this jupyter notebook's folder" !ls # Upload the saved model to S3 import boto3 s3 = boto3.resource('s3') s3.Bucket(bucket_name).put_object(Key='finalized_model.sav', Body=open('finalized_model.sav')) """ 三三三三三三三三三三三三三三三三三三三三三三 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.998 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.022 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.002 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.004 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.006 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.998 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.002 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.998 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.994 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.69 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 1.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.998 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.156 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.006 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.998 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 1.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.038 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.298 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.002 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.998 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.002 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.968 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.026 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 1.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 1.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 1.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.02 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.012 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.058 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.008 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.026 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.988 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [1] the confidence of the prediction 0.726 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.0 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.074 a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor [0] the confidence of the prediction 0.01 ('v, nv ', 16, 34) list of the objects in this jupyter notebook's folder finalized_model.sav iris.ipynb lost+found s3.Object(bucket_name='suika-string-model', key='finalized_model.sav') """