{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1、读取数据\n",
    "path = \"https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data\"\n",
    "column_name = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape',\n",
    "                   'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin',\n",
    "                   'Normal Nucleoli', 'Mitoses', 'Class']\n",
    "\n",
    "data = pd.read_csv(path, names=column_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sample code number</th>\n",
       "      <th>Clump Thickness</th>\n",
       "      <th>Uniformity of Cell Size</th>\n",
       "      <th>Uniformity of Cell Shape</th>\n",
       "      <th>Marginal Adhesion</th>\n",
       "      <th>Single Epithelial Cell Size</th>\n",
       "      <th>Bare Nuclei</th>\n",
       "      <th>Bland Chromatin</th>\n",
       "      <th>Normal Nucleoli</th>\n",
       "      <th>Mitoses</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1000025</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1002945</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1015425</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1016277</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1017023</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Sample code number  Clump Thickness  Uniformity of Cell Size  \\\n",
       "0             1000025                5                        1   \n",
       "1             1002945                5                        4   \n",
       "2             1015425                3                        1   \n",
       "3             1016277                6                        8   \n",
       "4             1017023                4                        1   \n",
       "\n",
       "   Uniformity of Cell Shape  Marginal Adhesion  Single Epithelial Cell Size  \\\n",
       "0                         1                  1                            2   \n",
       "1                         4                  5                            7   \n",
       "2                         1                  1                            2   \n",
       "3                         8                  1                            3   \n",
       "4                         1                  3                            2   \n",
       "\n",
       "  Bare Nuclei  Bland Chromatin  Normal Nucleoli  Mitoses  Class  \n",
       "0           1                3                1        1      2  \n",
       "1          10                3                2        1      2  \n",
       "2           2                3                1        1      2  \n",
       "3           4                3                7        1      2  \n",
       "4           1                3                1        1      2  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2、缺失值处理\n",
    "# 1）替换-》np.nan\n",
    "data = data.replace(to_replace=\"?\", value=np.nan)\n",
    "# 2）删除缺失样本\n",
    "data.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Sample code number             False\n",
       "Clump Thickness                False\n",
       "Uniformity of Cell Size        False\n",
       "Uniformity of Cell Shape       False\n",
       "Marginal Adhesion              False\n",
       "Single Epithelial Cell Size    False\n",
       "Bare Nuclei                    False\n",
       "Bland Chromatin                False\n",
       "Normal Nucleoli                False\n",
       "Mitoses                        False\n",
       "Class                          False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isnull().any() # 不存在缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3、划分数据集\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sample code number</th>\n",
       "      <th>Clump Thickness</th>\n",
       "      <th>Uniformity of Cell Size</th>\n",
       "      <th>Uniformity of Cell Shape</th>\n",
       "      <th>Marginal Adhesion</th>\n",
       "      <th>Single Epithelial Cell Size</th>\n",
       "      <th>Bare Nuclei</th>\n",
       "      <th>Bland Chromatin</th>\n",
       "      <th>Normal Nucleoli</th>\n",
       "      <th>Mitoses</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1000025</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1002945</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1015425</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1016277</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1017023</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Sample code number  Clump Thickness  Uniformity of Cell Size  \\\n",
       "0             1000025                5                        1   \n",
       "1             1002945                5                        4   \n",
       "2             1015425                3                        1   \n",
       "3             1016277                6                        8   \n",
       "4             1017023                4                        1   \n",
       "\n",
       "   Uniformity of Cell Shape  Marginal Adhesion  Single Epithelial Cell Size  \\\n",
       "0                         1                  1                            2   \n",
       "1                         4                  5                            7   \n",
       "2                         1                  1                            2   \n",
       "3                         8                  1                            3   \n",
       "4                         1                  3                            2   \n",
       "\n",
       "  Bare Nuclei  Bland Chromatin  Normal Nucleoli  Mitoses  Class  \n",
       "0           1                3                1        1      2  \n",
       "1          10                3                2        1      2  \n",
       "2           2                3                1        1      2  \n",
       "3           4                3                7        1      2  \n",
       "4           1                3                1        1      2  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 筛选特征值和目标值\n",
    "x = data.iloc[:, 1:-1]\n",
    "y = data[\"Class\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Clump Thickness</th>\n",
       "      <th>Uniformity of Cell Size</th>\n",
       "      <th>Uniformity of Cell Shape</th>\n",
       "      <th>Marginal Adhesion</th>\n",
       "      <th>Single Epithelial Cell Size</th>\n",
       "      <th>Bare Nuclei</th>\n",
       "      <th>Bland Chromatin</th>\n",
       "      <th>Normal Nucleoli</th>\n",
       "      <th>Mitoses</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Clump Thickness  Uniformity of Cell Size  Uniformity of Cell Shape  \\\n",
       "0                5                        1                         1   \n",
       "1                5                        4                         4   \n",
       "2                3                        1                         1   \n",
       "3                6                        8                         8   \n",
       "4                4                        1                         1   \n",
       "\n",
       "   Marginal Adhesion  Single Epithelial Cell Size Bare Nuclei  \\\n",
       "0                  1                            2           1   \n",
       "1                  5                            7          10   \n",
       "2                  1                            2           2   \n",
       "3                  1                            3           4   \n",
       "4                  3                            2           1   \n",
       "\n",
       "   Bland Chromatin  Normal Nucleoli  Mitoses  \n",
       "0                3                1        1  \n",
       "1                3                2        1  \n",
       "2                3                1        1  \n",
       "3                3                7        1  \n",
       "4                3                1        1  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    2\n",
       "1    2\n",
       "2    2\n",
       "3    2\n",
       "4    2\n",
       "Name: Class, dtype: int64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train, x_test, y_train, y_test = train_test_split(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Clump Thickness</th>\n",
       "      <th>Uniformity of Cell Size</th>\n",
       "      <th>Uniformity of Cell Shape</th>\n",
       "      <th>Marginal Adhesion</th>\n",
       "      <th>Single Epithelial Cell Size</th>\n",
       "      <th>Bare Nuclei</th>\n",
       "      <th>Bland Chromatin</th>\n",
       "      <th>Normal Nucleoli</th>\n",
       "      <th>Mitoses</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>6</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>413</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>159</th>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>371</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>331</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Clump Thickness  Uniformity of Cell Size  Uniformity of Cell Shape  \\\n",
       "71                 6                       10                         2   \n",
       "413                5                        1                         2   \n",
       "159                9                        9                        10   \n",
       "371                1                        1                         3   \n",
       "331                5                        1                         1   \n",
       "\n",
       "     Marginal Adhesion  Single Epithelial Cell Size Bare Nuclei  \\\n",
       "71                   8                           10           2   \n",
       "413                  1                            2           1   \n",
       "159                  3                            6          10   \n",
       "371                  1                            2           1   \n",
       "331                  1                            2           1   \n",
       "\n",
       "     Bland Chromatin  Normal Nucleoli  Mitoses  \n",
       "71                 7                8       10  \n",
       "413                3                1        1  \n",
       "159                7               10        6  \n",
       "371                1                1        1  \n",
       "331                3                1        2  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4、标准化\n",
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/zwy/.virtualenvs/python4/lib/python3.6/site-packages/sklearn/preprocessing/data.py:617: DataConversionWarning: Data with input dtype int64, object were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "/Users/zwy/.virtualenvs/python4/lib/python3.6/site-packages/sklearn/base.py:462: DataConversionWarning: Data with input dtype int64, object were all converted to float64 by StandardScaler.\n",
      "  return self.fit(X, **fit_params).transform(X)\n",
      "/Users/zwy/.virtualenvs/python4/lib/python3.6/site-packages/ipykernel_launcher.py:3: DataConversionWarning: Data with input dtype int64, object were all converted to float64 by StandardScaler.\n",
      "  This is separate from the ipykernel package so we can avoid doing imports until\n"
     ]
    }
   ],
   "source": [
    "transfer = StandardScaler()\n",
    "x_train = transfer.fit_transform(x_train)\n",
    "x_test = transfer.transform(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.55384961,  2.25850408, -0.39241988, ...,  1.48150122,\n",
       "         1.58884606,  4.5986847 ],\n",
       "       [ 0.20071013, -0.68803805, -0.39241988, ..., -0.17196717,\n",
       "        -0.61573167, -0.34779968],\n",
       "       [ 1.61326804,  1.93111051,  2.29545943, ...,  1.48150122,\n",
       "         2.21872542,  2.4002472 ],\n",
       "       ...,\n",
       "       [-0.50556882, -0.68803805, -0.72840479, ..., -0.99870137,\n",
       "        -0.61573167, -0.34779968],\n",
       "       [ 0.20071013,  2.25850408,  2.29545943, ...,  0.65476703,\n",
       "         2.21872542,  0.75141907],\n",
       "       [ 1.96640752,  0.29414266,  0.27954995, ...,  0.65476703,\n",
       "         0.01414768,  0.75141907]])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/zwy/.virtualenvs/python4/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='warn',\n",
       "          n_jobs=None, penalty='l2', random_state=None, solver='warn',\n",
       "          tol=0.0001, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 5、预估器流程\n",
    "estimator = LogisticRegression()\n",
    "estimator.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.39074328, 0.24254804, 0.77224014, 0.74201473, 0.23764145,\n",
       "        1.04861389, 0.90381812, 0.53872577, 0.90103902]])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 逻辑回归的模型参数：回归系数和偏置\n",
    "estimator.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-1.18507135])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "estimator.intercept_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "y_predict:\n",
      " [2 2 2 2 4 4 2 2 2 2 4 4 2 4 2 2 4 2 4 4 2 2 4 4 2 4 2 2 4 2 2 2 2 4 2 4 4\n",
      " 4 2 2 4 2 4 4 2 2 2 4 4 2 2 2 2 2 2 4 4 2 2 2 4 4 4 2 4 2 4 2 2 2 2 2 4 2\n",
      " 4 4 2 2 4 2 2 2 2 2 2 4 4 4 2 2 2 2 2 2 2 2 2 2 4 4 4 4 4 2 2 2 2 2 2 4 2\n",
      " 2 4 4 2 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 2 2 2 2 2 4 2 4 4 2 4 2 2 4 4 4 4\n",
      " 2 4 2 2 4 4 2 2 2 4 2 2 2 4 4 2 2 2 2 2 2 4 2]\n",
      "直接比对真实值和预测值:\n",
      " 432     True\n",
      "256     True\n",
      "203     True\n",
      "281     True\n",
      "588     True\n",
      "590     True\n",
      "66      True\n",
      "473     True\n",
      "637     True\n",
      "537     True\n",
      "103     True\n",
      "334     True\n",
      "8       True\n",
      "44      True\n",
      "141     True\n",
      "90      True\n",
      "329     True\n",
      "454     True\n",
      "440     True\n",
      "200     True\n",
      "665     True\n",
      "495     True\n",
      "279     True\n",
      "272     True\n",
      "43     False\n",
      "282     True\n",
      "22      True\n",
      "548     True\n",
      "330     True\n",
      "654     True\n",
      "       ...  \n",
      "340     True\n",
      "470     True\n",
      "343     True\n",
      "253     True\n",
      "264     True\n",
      "262     True\n",
      "132     True\n",
      "51     False\n",
      "42      True\n",
      "250     True\n",
      "651     True\n",
      "232    False\n",
      "581     True\n",
      "94      True\n",
      "385     True\n",
      "566     True\n",
      "386     True\n",
      "234     True\n",
      "189     True\n",
      "556     True\n",
      "605     True\n",
      "112     True\n",
      "529     True\n",
      "345     True\n",
      "410     True\n",
      "327     True\n",
      "480     True\n",
      "350     True\n",
      "381     True\n",
      "545     True\n",
      "Name: Class, Length: 171, dtype: bool\n",
      "准确率为：\n",
      " 0.9649122807017544\n"
     ]
    }
   ],
   "source": [
    "# 6、模型评估\n",
    "# 方法1：直接比对真实值和预测值\n",
    "y_predict = estimator.predict(x_test)\n",
    "print(\"y_predict:\\n\", y_predict)\n",
    "print(\"直接比对真实值和预测值:\\n\", y_test == y_predict)\n",
    "\n",
    "# 方法2：计算准确率\n",
    "score = estimator.score(x_test, y_test)\n",
    "print(\"准确率为：\\n\", score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看精确率、召回率、F1-score\n",
    "from sklearn.metrics import classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "report = classification_report(y_test, y_predict, labels=[2, 4], target_names=[\"良性\", \"恶性\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "          良性       0.95      0.99      0.97       107\n",
      "          恶性       0.98      0.92      0.95        64\n",
      "\n",
      "   micro avg       0.96      0.96      0.96       171\n",
      "   macro avg       0.97      0.96      0.96       171\n",
      "weighted avg       0.97      0.96      0.96       171\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(report)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "432    2\n",
       "256    2\n",
       "203    2\n",
       "281    2\n",
       "588    4\n",
       "Name: Class, dtype: int64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# y_true：每个样本的真实类别，必须为0(反例),1(正例)标记\n",
    "# 将y_test 转换成 0 1\n",
    "y_true = np.where(y_test > 3, 1, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,\n",
       "       1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,\n",
       "       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0,\n",
       "       1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n",
       "       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,\n",
       "       0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n",
       "       0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,\n",
       "       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0])"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_true"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import roc_auc_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9562646028037384"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "roc_auc_score(y_true, y_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}