| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381 |
- #import the required packages
- import pandas as pd
- import warnings
- warnings.filterwarnings("ignore")
- import seaborn.apionly as sns
- import sklearn
- from sklearn.model_selection import train_test_split
- import numpy as np
- from sklearn.ensemble import RandomForestClassifier
- from scipy.stats import randint as sp_randint
- %matplotlib inline
- import pickle
- #load the 'Iris' dataset, display simple stats about data size, and then print sample data
- df = pd.DataFrame(sns.load_dataset('iris'))
- print 'shape of the data frame'+str(df.shape)
- print 'We have an even spread of iris flower types'
- print df.groupby(['species']).size()
- print'Display ten random rows from the iris dataset'
- df.(samplen=10)
- """
- 一一一一一一一一一一一一一一一一一一一一一
- shape of the data frame(150, 5)
- We have an even spread of iris flower types
- species
- setosa 50
- versicolor 50
- virginica 50
- dtype: int64
- Display ten random rows from the iris dataset
- sepal_length sepal_width petal_length petal_width species
- 143 6.8 3.2 5.9 2.3 virginica
- 58 6.6 2.9 4.6 1.3 versicolor
- 148 6.2 3.4 5.4 2.3 virginica
- 91 6.1 3.0 4.6 1.4 versicolor
- 97 6.2 2.9 4.3 1.3 versicolor
- 74 6.4 2.9 4.3 1.3 versicolor
- 112 6.8 3.0 5.5 2.1 virginica
- 72 6.3 2.5 4.9 1.5 versicolor
- 104 6.5 3.0 5.8 2.2 virginica
- 114 5.8 2.8 5.1 2.4 virginica
- """
- #let's group setosa and virginica together for the sake of this machine learning exercise
- df['y']= np.where(df['species']=='versicolor',1,0)
- print df.groupby(['y']).size()
- print 'we now have 50 versicolors and 100 non-versicolors'
- X=df.drop('species',1).drop('y',1)
- y=df['y']
- df.sample(n=10)
- """"
- 二二二二二二二二二二二二二二二二二二二二二二
- y
- 0 100
- 1 50
- dtype: int64
- we now have 50 versicolors and 100 non-versicolors
- sepal_length sepal_width petal_length petal_width species y
- 7 5.0 3.4 1.5 0.2 setosa 0
- 137 6.4 3.1 5.5 1.8 virginica 0
- 116 6.5 3.0 5.5 1.8 virginica 0
- 102 7.1 3.0 5.9 2.1 virginica 0
- 122 7.7 2.8 6.7 2.0 virginica 0
- 98 5.1 2.5 3.0 1.1 versicolor 1
- 13 4.3 3.0 1.1 0.1 setosa 0
- 14 5.8 4.0 1.2 0.2 setosa 0
- 53 5.5 2.3 4.0 1.3 versicolor 1
- 41 4.5 2.3 1.3 0.3 setosa 0
- """"
- #Initialize the random forest machine learning algorithm object
- RANDOM_STATE=0
- forest = RandomForestClassifier(n_estimators = 500, random_state=RANDOM_STATE, oob_score="True")
- #Train the random forest model on the data
- forest_model = forest.fit(X,y)
- #use the forest model to make a prediction on a new row of data
- #define a new array with the order of 'sepal_length', 'sepal_width', 'petal_length', and 'petal_width'
- n=[[[6.1,2.9,4.7,1.4]],
- [[7.2,3.2,6.0,1.8]],
- [[6.5,3.0,5.8,2.2]],
- [[7.2,3.6,6.1,2.5]],
- [[6.3,3.4,5.6,2.4]],
- [[6.3,3.3,6.0,2.5]],
- [[5.8,2.6,4.0,1.2]],
- [[6.4,3.1,5.5,1.8]],
- [[5.6,3.0,4.5,1.5]],
- [[5.0,2.3,3.3,1.0]],
- [[5.9,3.2,4.8,1.8]],
- [[6.6,2.9,4.6,1.3]],
- [[6.2,2.9,4.3,1.3]],
- [[5.0,3.6,1.4,0.2]],
- [[5.1,3.8,1.9,0.4]],
- [[7.2,3.0,5.8,1.6]],
- [[5.5,3.5,1.3,0.2]],
- [[6.7,3.1,4.7,1.5]],
- [[5.5,2.4,3.7,1.0]],
- [[4.5,2.3,1.3,0.3]],
- [[5.0,3.2,1.2,0.2]],
- [[4.8,3.0,1.4,0.1]],
- [[5.4,3.9,1.3,0.4]],
- [[6.0,2.2,5.0,1.5]],
- [[6.9,3.2,5.7,2.3]],
- [[4.7,3.2,1.6,0.2]],
- [[5.7,3.0,4.2,1.2]],
- [[6.9,3.1,5.4,2.1]],
- [[5.4,3.0,4.5,1.5]],
- [[6.1,3.0,4.9,1.8]],
- [[5.7,2.6,3.5,1.0]],
- [[5.7,2.8,4.1,1.3]],
- [[5.8,2.7,3.9,1.2]],
- [[6.3,2.7,4.9,1.8]],
- [[4.8,3.1,1.6,0.2]],
- [[4.3,3.0,1.1,0.1]],
- [[5.0,3.4,1.6,0.4]],
- [[4.4,3.0,1.3,0.2]],
- [[6.3,2.5,5.0,1.9]],
- [[5.8,2.8,5.1,2.4]],
- [[5.0,3.3,1.4,0.2]],
- [[5.9,3.0,5.1,1.8]],
- [[5.8,2.7,5.1,1.9]],
- [[4.7,3.2,1.3,0.2]],
- [[4.4,2.9,1.4,0.2]],
- [[6.0,2.2,4.0,1.0]],
- [[6.0,2.7,5.1,1.6]],
- [[7.1,3.0,5.9,2.1]],
- [[6.2,2.8,4.8,1.8]],
- [[6.7,2.5,5.8,1.8]]]
- #print new_flower
- v = 0
- nv = 0
- for new_flower in n :
-
- prediction=forest_model.predict(new_flower)
- print "a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor"
- print prediction
-
- #This is the prediction confidence for the forest_model on that row of data being a versicolor iris.
- prediction_proba=forest_model.predict_proba(new_flower)
- print 'the confidence of the prediction'
- print prediction_proba[0,1]
- if prediction == 0 && prediction_proba[0,1] > 0.7:
- nv = nv + 1
- else :
- v = v + 1
- print("v, nv ", v, nv)
- #save (pickle) your model to disk and then to s3
- local_path = "/home/ubuntu" # temp local path to export your model
- existing_bucket = "suika-un-labeled" # Bucket has prebuilt Model and Un-Labeled Data
- bucket_name = "suika-string-model" # s3 bucket name string to save your model
- filename = 'finalized_model.sav'
- pickle.dump(forest, open(filename, 'wb'))
- #you should now see your finalized_model.sav object in the file path
- #the ls command prints the contents of this notebook's folder
- print "list of the objects in this jupyter notebook's folder"
- !ls
-
- # Upload the saved model to S3
- import boto3
- s3 = boto3.resource('s3')
- s3.Bucket(bucket_name).put_object(Key='finalized_model.sav', Body=open('finalized_model.sav'))
- """
- 三三三三三三三三三三三三三三三三三三三三三三
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.998
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.022
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.002
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.004
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.006
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.998
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.002
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.998
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.994
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.69
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 1.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.998
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.156
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.006
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.998
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 1.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.038
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.298
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.002
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.998
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.002
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.968
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.026
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 1.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 1.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 1.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.02
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.012
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.058
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.008
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.026
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.988
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [1]
- the confidence of the prediction
- 0.726
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.0
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.074
- a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
- [0]
- the confidence of the prediction
- 0.01
- ('v, nv ', 16, 34)
- list of the objects in this jupyter notebook's folder
- finalized_model.sav iris.ipynb lost+found
- s3.Object(bucket_name='suika-string-model', key='finalized_model.sav')
- """
|