diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb index d0b3ffa..432ee08 100644 --- a/07_ensemble_learning_and_random_forests.ipynb +++ b/07_ensemble_learning_and_random_forests.ipynb @@ -10,12 +10,12 @@ "output_type": "stream", "text": [ "CPython 3.5.5\n", - "IPython 6.2.1\n", + "IPython 6.3.1\n", "\n", - "numpy 1.14.1\n", - "scipy 1.0.0\n", + "numpy 1.14.3\n", + "scipy 1.0.1\n", "sklearn 0.19.1\n", - "pandas 0.22.0\n", + "pandas 0.23.0\n", "matplotlib 2.2.2\n" ] } @@ -196,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -209,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -221,14 +221,6 @@ "SVC 0.888\n", "VotingClassifier 0.896\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/haesun/anaconda3/envs/handson-ml/lib/python3.5/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - " if diff:\n" - ] } ], "source": [ @@ -242,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -257,7 +249,7 @@ " flatten_transform=None, n_jobs=1, voting='soft', weights=None)" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -275,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -287,14 +279,6 @@ "SVC 0.888\n", "VotingClassifier 0.912\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/haesun/anaconda3/envs/handson-ml/lib/python3.5/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - " if diff:\n" - ] } ], "source": [ @@ -315,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -331,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -349,7 +333,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -369,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -395,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -430,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -441,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -451,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -465,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -474,7 +458,7 @@ "0.976" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -485,7 +469,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -510,7 +494,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -519,7 +503,7 @@ "array([0.11249225, 0.02311929, 0.44103046, 0.423358 ])" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -530,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -565,7 +549,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -574,7 +558,7 @@ "0.9013333333333333" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -589,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -972,7 +956,7 @@ " [0.57291667, 0.42708333]])" ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -983,7 +967,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -992,7 +976,7 @@ "0.912" ] }, - "execution_count": 24, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1012,7 +996,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -1022,7 +1006,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -1036,7 +1020,7 @@ " oob_score=False, random_state=42, verbose=0, warm_start=False)" ] }, - "execution_count": 26, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1048,7 +1032,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -1061,7 +1045,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -1094,7 +1078,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -1110,7 +1094,7 @@ " learning_rate=0.5, n_estimators=200, random_state=42)" ] }, - "execution_count": 29, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1126,7 +1110,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -1146,7 +1130,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -1187,7 +1171,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -1202,7 +1186,7 @@ " 'n_classes_']" ] }, - "execution_count": 32, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1220,7 +1204,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -1229,33 +1213,6 @@ "y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)" ] }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,\n", - " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", - " min_impurity_split=None, min_samples_leaf=1,\n", - " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", - " presort=False, random_state=42, splitter='best')" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.tree import DecisionTreeRegressor\n", - "\n", - "tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", - "tree_reg1.fit(X, y)" - ] - }, { "cell_type": "code", "execution_count": 35, @@ -1277,9 +1234,10 @@ } ], "source": [ - "y2 = y - tree_reg1.predict(X)\n", - "tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", - "tree_reg2.fit(X, y2)" + "from sklearn.tree import DecisionTreeRegressor\n", + "\n", + "tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", + "tree_reg1.fit(X, y)" ] }, { @@ -1303,18 +1261,35 @@ } ], "source": [ - "y3 = y2 - tree_reg2.predict(X)\n", - "tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", - "tree_reg3.fit(X, y3)" + "y2 = y - tree_reg1.predict(X)\n", + "tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", + "tree_reg2.fit(X, y2)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,\n", + " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", + " min_impurity_split=None, min_samples_leaf=1,\n", + " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", + " presort=False, random_state=42, splitter='best')" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "X_new = np.array([[0.8]])" + "y3 = y2 - tree_reg2.predict(X)\n", + "tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", + "tree_reg3.fit(X, y3)" ] }, { @@ -1323,13 +1298,22 @@ "metadata": {}, "outputs": [], "source": [ - "y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))" + "X_new = np.array([[0.8]])" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, + "outputs": [], + "source": [ + "y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, "outputs": [ { "data": { @@ -1337,7 +1321,7 @@ "array([0.75026781])" ] }, - "execution_count": 39, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1348,7 +1332,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -1408,7 +1392,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -1423,7 +1407,7 @@ " subsample=1.0, verbose=0, warm_start=False)" ] }, - "execution_count": 41, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -1437,7 +1421,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -1452,7 +1436,7 @@ " subsample=1.0, verbose=0, warm_start=False)" ] }, - "execution_count": 42, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -1464,7 +1448,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "metadata": { "scrolled": true }, @@ -1504,7 +1488,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -1519,7 +1503,7 @@ " subsample=1.0, verbose=0, warm_start=False)" ] }, - "execution_count": 44, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1544,7 +1528,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -1553,7 +1537,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -1590,7 +1574,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -1614,7 +1598,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -1631,9 +1615,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "최소 검증 MSE: 0.002712853325235463\n" + ] + } + ], "source": [ "print(\"최소 검증 MSE:\", min_val_error)" ] @@ -1647,7 +1639,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -1660,9 +1652,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "검증 MSE: 0.0028512559726563943\n" + ] + } + ], "source": [ "if xgboost is not None: # 책에는 없음\n", " xgb_reg = xgboost.XGBRegressor(random_state=42)\n", @@ -1674,9 +1674,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0]\tvalidation_0-rmse:0.286719\n", + "Will train until validation_0-rmse hasn't improved in 2 rounds.\n", + "[1]\tvalidation_0-rmse:0.258221\n", + "[2]\tvalidation_0-rmse:0.232634\n", + "[3]\tvalidation_0-rmse:0.210526\n", + "[4]\tvalidation_0-rmse:0.190232\n", + "[5]\tvalidation_0-rmse:0.172196\n", + "[6]\tvalidation_0-rmse:0.156394\n", + "[7]\tvalidation_0-rmse:0.142241\n", + "[8]\tvalidation_0-rmse:0.129789\n", + "[9]\tvalidation_0-rmse:0.118752\n", + "[10]\tvalidation_0-rmse:0.108388\n", + "[11]\tvalidation_0-rmse:0.100155\n", + "[12]\tvalidation_0-rmse:0.09208\n", + "[13]\tvalidation_0-rmse:0.084791\n", + "[14]\tvalidation_0-rmse:0.078699\n", + "[15]\tvalidation_0-rmse:0.073248\n", + "[16]\tvalidation_0-rmse:0.069391\n", + "[17]\tvalidation_0-rmse:0.066277\n", + "[18]\tvalidation_0-rmse:0.063458\n", + "[19]\tvalidation_0-rmse:0.060326\n", + "[20]\tvalidation_0-rmse:0.0578\n", + "[21]\tvalidation_0-rmse:0.055643\n", + "[22]\tvalidation_0-rmse:0.053943\n", + "[23]\tvalidation_0-rmse:0.053138\n", + "[24]\tvalidation_0-rmse:0.052415\n", + "[25]\tvalidation_0-rmse:0.051821\n", + "[26]\tvalidation_0-rmse:0.051226\n", + "[27]\tvalidation_0-rmse:0.051135\n", + "[28]\tvalidation_0-rmse:0.05091\n", + "[29]\tvalidation_0-rmse:0.050893\n", + "[30]\tvalidation_0-rmse:0.050725\n", + "[31]\tvalidation_0-rmse:0.050471\n", + "[32]\tvalidation_0-rmse:0.050285\n", + "[33]\tvalidation_0-rmse:0.050492\n", + "[34]\tvalidation_0-rmse:0.050348\n", + "Stopping. Best iteration:\n", + "[32]\tvalidation_0-rmse:0.050285\n", + "\n", + "검증 MSE: 0.0025349167568108864\n" + ] + } + ], "source": [ "if xgboost is not None: # 책에는 없음\n", " xgb_reg.fit(X_train, y_train,\n", @@ -1688,18 +1735,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4.3 ms ± 452 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], "source": [ "%timeit xgboost.XGBRegressor().fit(X_train, y_train) if xgboost is not None else None" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9.82 ms ± 22.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], "source": [ "%timeit GradientBoostingRegressor().fit(X_train, y_train)" ] @@ -1733,7 +1796,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -1742,7 +1805,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -1751,7 +1814,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ @@ -1760,7 +1823,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ @@ -1779,7 +1842,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -1790,7 +1853,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -1802,9 +1865,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "훈련 예측기: RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", + " oob_score=False, random_state=42, verbose=0, warm_start=False)\n", + "훈련 예측기: ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", + " oob_score=False, random_state=42, verbose=0, warm_start=False)\n", + "훈련 예측기: LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n", + " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n", + " multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,\n", + " verbose=0)\n", + "훈련 예측기: MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n", + " beta_2=0.999, early_stopping=False, epsilon=1e-08,\n", + " hidden_layer_sizes=(100,), learning_rate='constant',\n", + " learning_rate_init=0.001, max_iter=200, momentum=0.9,\n", + " nesterovs_momentum=True, power_t=0.5, random_state=42, shuffle=True,\n", + " solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,\n", + " warm_start=False)\n" + ] + } + ], "source": [ "estimators = [random_forest_clf, extra_trees_clf, svm_clf, mlp_clf]\n", "for estimator in estimators:\n", @@ -1814,9 +1907,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[0.9467, 0.9512, 0.8661, 0.9588]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[estimator.score(X_val, y_val) for estimator in estimators]" ] @@ -1837,7 +1941,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 64, "metadata": {}, "outputs": [], "source": [ @@ -1846,7 +1950,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ @@ -1860,7 +1964,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 66, "metadata": {}, "outputs": [], "source": [ @@ -1869,27 +1973,66 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "VotingClassifier(estimators=[('random_forest_clf', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " ... solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,\n", + " warm_start=False))],\n", + " flatten_transform=None, n_jobs=1, voting='hard', weights=None)" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "voting_clf.fit(X_train, y_train)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.961" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "voting_clf.score(X_val, y_val)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[0.9467, 0.9512, 0.8661, 0.9588]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[estimator.score(X_val, y_val) for estimator in voting_clf.estimators_]" ] @@ -1903,9 +2046,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 70, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "VotingClassifier(estimators=[('random_forest_clf', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " ... solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,\n", + " warm_start=False))],\n", + " flatten_transform=None, n_jobs=1, voting='hard', weights=None)" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "voting_clf.set_params(svm_clf=None)" ] @@ -1919,9 +2079,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('random_forest_clf',\n", + " RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", + " oob_score=False, random_state=42, verbose=0, warm_start=False)),\n", + " ('extra_trees_clf',\n", + " ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", + " oob_score=False, random_state=42, verbose=0, warm_start=False)),\n", + " ('svm_clf', None),\n", + " ('mlp_clf',\n", + " MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n", + " beta_2=0.999, early_stopping=False, epsilon=1e-08,\n", + " hidden_layer_sizes=(100,), learning_rate='constant',\n", + " learning_rate_init=0.001, max_iter=200, momentum=0.9,\n", + " nesterovs_momentum=True, power_t=0.5, random_state=42, shuffle=True,\n", + " solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,\n", + " warm_start=False))]" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "voting_clf.estimators" ] @@ -1935,9 +2128,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 72, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", + " oob_score=False, random_state=42, verbose=0, warm_start=False),\n", + " ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", + " oob_score=False, random_state=42, verbose=0, warm_start=False),\n", + " LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n", + " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n", + " multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,\n", + " verbose=0),\n", + " MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n", + " beta_2=0.999, early_stopping=False, epsilon=1e-08,\n", + " hidden_layer_sizes=(100,), learning_rate='constant',\n", + " learning_rate_init=0.001, max_iter=200, momentum=0.9,\n", + " nesterovs_momentum=True, power_t=0.5, random_state=42, shuffle=True,\n", + " solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,\n", + " warm_start=False)]" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "voting_clf.estimators_" ] @@ -1951,7 +2177,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ @@ -1967,9 +2193,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 74, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.9653" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "voting_clf.score(X_val, y_val)" ] @@ -1983,7 +2220,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 75, "metadata": {}, "outputs": [], "source": [ @@ -1992,9 +2229,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 76, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.9703" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "voting_clf.score(X_val, y_val)" ] @@ -2015,18 +2263,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 77, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.9652" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "voting_clf.score(X_test, y_test)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[0.9434, 0.9444, 0.9541]" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]" ] @@ -2054,7 +2324,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 79, "metadata": {}, "outputs": [], "source": [ @@ -2066,18 +2336,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 80, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2., 2., 2., 2.],\n", + " [7., 7., 7., 7.],\n", + " [4., 4., 4., 4.],\n", + " ...,\n", + " [4., 4., 4., 4.],\n", + " [9., 9., 9., 9.],\n", + " [4., 4., 4., 4.]], dtype=float32)" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "X_val_predictions" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 81, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=200, n_jobs=1,\n", + " oob_score=True, random_state=42, verbose=0, warm_start=False)" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)\n", "rnd_forest_blender.fit(X_val_predictions, y_val)" @@ -2085,9 +2388,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 82, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.9644" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "rnd_forest_blender.oob_score_" ] @@ -2108,7 +2422,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 83, "metadata": {}, "outputs": [], "source": [ @@ -2120,7 +2434,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 84, "metadata": {}, "outputs": [], "source": [ @@ -2129,7 +2443,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 85, "metadata": {}, "outputs": [], "source": [ @@ -2138,9 +2452,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 86, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.9566" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "accuracy_score(y_test, y_pred)" ]