iaun
/
xdc_git_python_20210511


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
							
#import the required packages
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import seaborn.apionly as sns
import sklearn
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import randint as sp_randint
%matplotlib inline
import pickle

#load the 'Iris' dataset, display simple stats about data size, and then print sample data
df = pd.DataFrame(sns.load_dataset('iris'))
print 'shape of the data frame'+str(df.shape)
print 'We have an even spread of iris flower types'
print df.groupby(['species']).size()
print'Display ten random rows from the iris dataset'
df.(samplen=10)


"""
一一一一一一一一一一一一一一一一一一一一一
shape of the data frame(150, 5)
We have an even spread of iris flower types
species
setosa        50
versicolor    50
virginica     50
dtype: int64
Display ten random rows from the iris dataset
sepal_length	sepal_width	petal_length	petal_width	species
143	6.8	3.2	5.9	2.3	virginica
58	6.6	2.9	4.6	1.3	versicolor
148	6.2	3.4	5.4	2.3	virginica
91	6.1	3.0	4.6	1.4	versicolor
97	6.2	2.9	4.3	1.3	versicolor
74	6.4	2.9	4.3	1.3	versicolor
112	6.8	3.0	5.5	2.1	virginica
72	6.3	2.5	4.9	1.5	versicolor
104	6.5	3.0	5.8	2.2	virginica
114	5.8	2.8	5.1	2.4	virginica
"""


#let's group setosa and virginica together for the sake of this machine learning exercise
df['y']= np.where(df['species']=='versicolor',1,0)
print df.groupby(['y']).size()
print 'we now have 50 versicolors and 100 non-versicolors'

X=df.drop('species',1).drop('y',1)
y=df['y']
df.sample(n=10)

""""
二二二二二二二二二二二二二二二二二二二二二二
y
0    100
1     50
dtype: int64
we now have 50 versicolors and 100 non-versicolors
sepal_length	sepal_width	petal_length	petal_width	species	y
7	5.0	3.4	1.5	0.2	setosa	0
137	6.4	3.1	5.5	1.8	virginica	0
116	6.5	3.0	5.5	1.8	virginica	0
102	7.1	3.0	5.9	2.1	virginica	0
122	7.7	2.8	6.7	2.0	virginica	0
98	5.1	2.5	3.0	1.1	versicolor	1
13	4.3	3.0	1.1	0.1	setosa	0
14	5.8	4.0	1.2	0.2	setosa	0
53	5.5	2.3	4.0	1.3	versicolor	1
41	4.5	2.3	1.3	0.3	setosa	0
""""


#Initialize the random forest machine learning algorithm object
RANDOM_STATE=0
forest = RandomForestClassifier(n_estimators = 500, random_state=RANDOM_STATE, oob_score="True")

#Train the random forest model on the data
forest_model = forest.fit(X,y)

#use the forest model to make a prediction on a new row of data
#define a new array with the order of 'sepal_length', 'sepal_width', 'petal_length', and 'petal_width'
n=[[[6.1,2.9,4.7,1.4]],
[[7.2,3.2,6.0,1.8]],
[[6.5,3.0,5.8,2.2]],
[[7.2,3.6,6.1,2.5]],
[[6.3,3.4,5.6,2.4]],
[[6.3,3.3,6.0,2.5]],
[[5.8,2.6,4.0,1.2]],
[[6.4,3.1,5.5,1.8]],
[[5.6,3.0,4.5,1.5]],
[[5.0,2.3,3.3,1.0]],
[[5.9,3.2,4.8,1.8]],
[[6.6,2.9,4.6,1.3]],
[[6.2,2.9,4.3,1.3]],
[[5.0,3.6,1.4,0.2]],
[[5.1,3.8,1.9,0.4]],
[[7.2,3.0,5.8,1.6]],
[[5.5,3.5,1.3,0.2]],
[[6.7,3.1,4.7,1.5]],
[[5.5,2.4,3.7,1.0]],
[[4.5,2.3,1.3,0.3]],
[[5.0,3.2,1.2,0.2]],
[[4.8,3.0,1.4,0.1]],
[[5.4,3.9,1.3,0.4]],
[[6.0,2.2,5.0,1.5]],
[[6.9,3.2,5.7,2.3]],
[[4.7,3.2,1.6,0.2]],
[[5.7,3.0,4.2,1.2]],
[[6.9,3.1,5.4,2.1]],
[[5.4,3.0,4.5,1.5]],
[[6.1,3.0,4.9,1.8]],
[[5.7,2.6,3.5,1.0]],
[[5.7,2.8,4.1,1.3]],
[[5.8,2.7,3.9,1.2]],
[[6.3,2.7,4.9,1.8]],
[[4.8,3.1,1.6,0.2]],
[[4.3,3.0,1.1,0.1]],
[[5.0,3.4,1.6,0.4]],
[[4.4,3.0,1.3,0.2]],
[[6.3,2.5,5.0,1.9]],
[[5.8,2.8,5.1,2.4]],
[[5.0,3.3,1.4,0.2]],
[[5.9,3.0,5.1,1.8]],
[[5.8,2.7,5.1,1.9]],
[[4.7,3.2,1.3,0.2]],
[[4.4,2.9,1.4,0.2]],
[[6.0,2.2,4.0,1.0]],
[[6.0,2.7,5.1,1.6]],
[[7.1,3.0,5.9,2.1]],
[[6.2,2.8,4.8,1.8]],
[[6.7,2.5,5.8,1.8]]]
#print new_flower
v  = 0
nv = 0
for new_flower in n :
    
    prediction=forest_model.predict(new_flower)
    print "a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor"
    print prediction
    
    #This is the prediction confidence for the forest_model on that row of data being a versicolor iris.
    prediction_proba=forest_model.predict_proba(new_flower)
    print 'the confidence of the prediction'
    print prediction_proba[0,1]
    if prediction == 0 && prediction_proba[0,1] > 0.7:
        nv = nv + 1
    else :
        v = v + 1
print("v, nv ", v, nv)
#save (pickle) your model to disk and then to s3
local_path = "/home/ubuntu" # temp local path to export your model
existing_bucket = "suika-un-labeled" # Bucket has prebuilt Model and Un-Labeled Data
bucket_name = "suika-string-model" # s3 bucket name string to save your model
filename = 'finalized_model.sav'
pickle.dump(forest, open(filename, 'wb'))

#you should now see your finalized_model.sav object in the file path
#the ls command prints the contents of this notebook's folder
print "list of the objects in this jupyter notebook's folder"
!ls
 
# Upload the saved model to S3
import boto3
s3 = boto3.resource('s3')
s3.Bucket(bucket_name).put_object(Key='finalized_model.sav', Body=open('finalized_model.sav'))

"""
三三三三三三三三三三三三三三三三三三三三三三
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.998
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.022
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.002
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.004
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.006
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.998
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.002
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.998
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.994
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.69
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
1.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.998
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.156
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.006
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.998
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
1.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.038
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.298
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.002
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.998
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.002
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.968
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.026
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
1.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
1.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
1.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.02
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.012
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.058
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.008
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.026
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.988
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[1]
the confidence of the prediction
0.726
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.0
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.074
a prediction of '1' is for verisicolor. '0' is for prediction of non-versicolor
[0]
the confidence of the prediction
0.01
('v, nv ', 16, 34)
list of the objects in this jupyter notebook's folder
finalized_model.sav  iris.ipynb  lost+found
s3.Object(bucket_name='suika-string-model', key='finalized_model.sav')

"""