{"id":468,"date":"2024-05-01T18:07:46","date_gmt":"2024-05-01T10:07:46","guid":{"rendered":"https:\/\/blog.zwdblog.online\/?p=468"},"modified":"2024-06-04T17:31:40","modified_gmt":"2024-06-04T09:31:40","slug":"%e5%86%b3%e7%ad%96%e6%a0%91%e6%9e%84%e5%bb%ba%e5%b9%b6%e6%bc%94%e7%bb%83","status":"publish","type":"post","link":"https:\/\/blog.zwdblog.online\/index.php\/2024\/05\/01\/%e5%86%b3%e7%ad%96%e6%a0%91%e6%9e%84%e5%bb%ba%e5%b9%b6%e6%bc%94%e7%bb%83\/","title":{"rendered":"\u51b3\u7b56\u6811\u6784\u5efa\u5e76\u6f14\u7ec3"},"content":{"rendered":"\n<h2 class=\"wp-block-heading\">\u51b3\u7b56\u6811\u7b80\u4ecb<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u51b3\u7b56\u6811\u662f\u4e00\u79cd\u5e38\u89c1\u7684\u673a\u5668\u5b66\u4e60\u6a21\u578b\uff0c\u7528\u4e8e\u89e3\u51b3\u5206\u7c7b\u548c\u56de\u5f52\u95ee\u9898\u3002\u5b83\u6a21\u62df\u4eba\u7c7b\u5728\u505a\u51b3\u7b56\u65f6\u7684\u601d\u7ef4\u8fc7\u7a0b\uff0c\u901a\u8fc7\u4e00\u7cfb\u5217\u7684\u51b3\u7b56\u8282\u70b9\u548c\u5206\u652f\u6765\u8868\u793a\u6570\u636e\u96c6\u7684\u51b3\u7b56\u8fc7\u7a0b\u3002\u51b3\u7b56\u6811\u7684\u6bcf\u4e2a\u5185\u90e8\u8282\u70b9\u8868\u793a\u5bf9\u67d0\u4e2a\u5c5e\u6027\u7684\u6d4b\u8bd5\uff0c\u6bcf\u4e2a\u5206\u652f\u4ee3\u8868\u4e00\u4e2a\u6d4b\u8bd5\u7ed3\u679c\uff0c\u6bcf\u4e2a\u53f6\u5b50\u8282\u70b9\u4ee3\u8868\u4e00\u4e2a\u7c7b\u522b\u6807\u7b7e\u6216\u4e00\u4e2a\u6570\u503c\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u51b3\u7b56\u6811\u7684\u7279\u70b9\u5305\u62ec\uff1a<\/h2>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u6613\u4e8e\u7406\u89e3\u548c\u89e3\u91ca\uff1a<\/strong> \u51b3\u7b56\u6811\u6a21\u578b\u7c7b\u4f3c\u4e8e\u4eba\u7c7b\u7684\u51b3\u7b56\u8fc7\u7a0b\uff0c\u56e0\u6b64\u975e\u4e13\u4e1a\u4eba\u58eb\u4e5f\u80fd\u591f\u7406\u89e3\u548c\u89e3\u91ca\u6a21\u578b\u7684\u5de5\u4f5c\u539f\u7406\u3002<\/li>\n\n\n\n<li><strong>\u53ef\u5904\u7406\u591a\u8f93\u51fa\u95ee\u9898\uff1a<\/strong> \u51b3\u7b56\u6811\u53ef\u4ee5\u5904\u7406\u591a\u7c7b\u522b\u8f93\u51fa\u95ee\u9898\uff0c\u4e5f\u53ef\u4ee5\u5904\u7406\u8fde\u7eed\u578b\u8f93\u51fa\u95ee\u9898\u3002<\/li>\n\n\n\n<li><strong>\u6570\u636e\u9884\u5904\u7406\u7b80\u5355\uff1a<\/strong> \u51b3\u7b56\u6811\u6a21\u578b\u5bf9\u6570\u636e\u7684\u5904\u7406\u80fd\u529b\u8f83\u5f3a\uff0c\u4e0d\u9700\u8981\u5bf9\u6570\u636e\u8fdb\u884c\u8fc7\u591a\u7684\u9884\u5904\u7406\uff0c\u5982\u5f52\u4e00\u5316\u3001\u6807\u51c6\u5316\u7b49\u3002<\/li>\n\n\n\n<li><strong>\u80fd\u591f\u5904\u7406\u7f3a\u5931\u503c\uff1a<\/strong> \u51b3\u7b56\u6811\u6a21\u578b\u80fd\u591f\u5904\u7406\u7f3a\u5931\u503c\uff0c\u4e0d\u9700\u8981\u5bf9\u7f3a\u5931\u503c\u8fdb\u884c\u586b\u5145\u3002<\/li>\n\n\n\n<li><strong>\u9002\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u96c6\uff1a<\/strong> \u51b3\u7b56\u6811\u6a21\u578b\u9002\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u96c6\uff0c\u5e76\u4e14\u5728\u6570\u636e\u96c6\u5305\u542b\u5927\u91cf\u5c5e\u6027\u65f6\u4e5f\u80fd\u8868\u73b0\u826f\u597d\u3002<\/li>\n<\/ol>\n\n\n\n<p class=\"wp-block-paragraph\">\u51b3\u7b56\u6811\u7684\u6784\u5efa\u8fc7\u7a0b\u6d89\u53ca\u9009\u62e9\u6700\u4f73\u7684\u7279\u5f81\u6765\u6784\u5efa\u6811\uff0c\u901a\u5e38\u4f7f\u7528\u4fe1\u606f\u589e\u76ca\u3001\u57fa\u5c3c\u7cfb\u6570\u7b49\u6307\u6807\u6765\u8bc4\u4f30\u7279\u5f81\u7684\u91cd\u8981\u6027\u3002\u901a\u8fc7\u9012\u5f52\u5730\u9009\u62e9\u6700\u4f73\u7279\u5f81\u5e76\u5206\u88c2\u6570\u636e\u96c6\uff0c\u6700\u7ec8\u6784\u5efa\u51fa\u4e00\u68f5\u80fd\u591f\u5bf9\u65b0\u6570\u636e\u8fdb\u884c\u51c6\u786e\u9884\u6d4b\u7684\u51b3\u7b56\u6811\u6a21\u578b\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u51b3\u7b56\u6811\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\u5177\u6709\u5e7f\u6cdb\u7684\u7528\u9014\uff0c\u5305\u62ec\u533b\u7597\u8bca\u65ad\u3001\u91d1\u878d\u98ce\u9669\u8bc4\u4f30\u3001\u5ba2\u6237\u5206\u7c7b\u7b49\u9886\u57df\u3002\u5b83\u4e0d\u4ec5\u80fd\u591f\u63d0\u4f9b\u51c6\u786e\u7684\u9884\u6d4b\u7ed3\u679c\uff0c\u8fd8\u80fd\u591f\u5e2e\u52a9\u7528\u6237\u7406\u89e3\u6570\u636e\u80cc\u540e\u7684\u51b3\u7b56\u903b\u8f91\uff0c\u5177\u6709\u5f88\u9ad8\u7684\u5b9e\u7528\u6027\u548c\u53ef\u89e3\u91ca\u6027\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u51b3\u7b56\u6811\u7684\u6784\u5efa\u8fc7\u7a0b<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u51b3\u7b56\u6811\u7684\u6784\u5efa\u662f\u4e00\u4e2a\u9010\u6b65\u9009\u62e9\u7279\u5f81\u5e76\u5206\u88c2\u6570\u636e\u96c6\u7684\u8fc7\u7a0b\uff0c\u4ee5\u6700\u7ec8\u6784\u5efa\u51fa\u80fd\u591f\u5bf9\u65b0\u6570\u636e\u8fdb\u884c\u51c6\u786e\u9884\u6d4b\u7684\u51b3\u7b56\u6811\u6a21\u578b\u3002\u5176\u6784\u5efa\u6b65\u9aa4\u5982\u4e0b\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u7279\u5f81\u9009\u62e9<\/strong> \u5728\u6784\u5efa\u51b3\u7b56\u6811\u65f6\uff0c\u9700\u8981\u9009\u62e9\u6700\u4f73\u7684\u7279\u5f81\u6765\u4f5c\u4e3a\u8282\u70b9\u8fdb\u884c\u6570\u636e\u96c6\u7684\u5206\u5272\u3002\u5e38\u7528\u7684\u7279\u5f81\u9009\u62e9\u6307\u6807\u5305\u62ec\u4fe1\u606f\u589e\u76ca\uff08ID3\u7b97\u6cd5\uff09\u3001\u589e\u76ca\u7387\uff08C4.5\u7b97\u6cd5\uff09\u3001\u57fa\u5c3c\u6307\u6570\uff08CART\u7b97\u6cd5\uff09\u7b49\u3002\u8fd9\u4e9b\u6307\u6807\u5e2e\u52a9\u51b3\u7b56\u6811\u7b97\u6cd5\u786e\u5b9a\u54ea\u4e2a\u7279\u5f81\u6700\u9002\u5408\u4f5c\u4e3a\u8282\u70b9\u6765\u5212\u5206\u6570\u636e\u96c6\u3002<\/li>\n\n\n\n<li><strong>\u6570\u636e\u96c6\u5206\u5272<\/strong> \u4e00\u65e6\u9009\u62e9\u4e86\u6700\u4f73\u7684\u7279\u5f81\uff0c\u6570\u636e\u96c6\u5c06\u6839\u636e\u8be5\u7279\u5f81\u7684\u53d6\u503c\u8fdb\u884c\u5206\u5272\u3002\u8fd9\u4e2a\u8fc7\u7a0b\u4f1a\u6301\u7eed\u8fdb\u884c\uff0c\u76f4\u5230\u6570\u636e\u96c6\u4e2d\u7684\u6240\u6709\u6837\u672c\u90fd\u5c5e\u4e8e\u540c\u4e00\u7c7b\u522b\uff08\u5bf9\u4e8e\u5206\u7c7b\u95ee\u9898\uff09\u6216\u76f4\u5230\u6ee1\u8db3\u67d0\u4e2a\u505c\u6b62\u6761\u4ef6\u3002<\/li>\n\n\n\n<li><strong>\u505c\u6b62\u6761\u4ef6<\/strong> \u5728\u6784\u5efa\u51b3\u7b56\u6811\u65f6\uff0c\u9700\u8981\u8bbe\u5b9a\u505c\u6b62\u6761\u4ef6\uff0c\u4ee5\u9632\u6b62\u6811\u7684\u8fc7\u5ea6\u751f\u957f\uff08\u8fc7\u62df\u5408\uff09\u3002\u505c\u6b62\u6761\u4ef6\u53ef\u4ee5\u662f\u8282\u70b9\u4e2d\u6837\u672c\u6570\u91cf\u7684\u9608\u503c\u3001\u6811\u7684\u6df1\u5ea6\u7b49\u3002<\/li>\n\n\n\n<li><strong>\u526a\u679d<\/strong> \u6784\u5efa\u5b8c\u6210\u540e\u7684\u51b3\u7b56\u6811\u53ef\u80fd\u5b58\u5728\u8fc7\u62df\u5408\u95ee\u9898\uff0c\u56e0\u6b64\u901a\u5e38\u9700\u8981\u8fdb\u884c\u526a\u679d\u64cd\u4f5c\uff0c\u4ee5\u7b80\u5316\u6811\u7684\u7ed3\u6784\u5e76\u63d0\u9ad8\u6cdb\u5316\u80fd\u529b\u3002\u526a\u679d\u53ef\u4ee5\u662f\u9884\u526a\u679d\uff08\u5728\u6784\u5efa\u6811\u7684\u8fc7\u7a0b\u4e2d\u8fdb\u884c\u526a\u679d\uff09\u6216\u540e\u526a\u679d\uff08\u5728\u6811\u6784\u5efa\u5b8c\u6210\u540e\u8fdb\u884c\u526a\u679d\uff09\u3002<\/li>\n\n\n\n<li><strong>\u751f\u6210\u51b3\u7b56\u6811\u6a21\u578b<\/strong> \u7ecf\u8fc7\u4e0a\u8ff0\u6b65\u9aa4\uff0c\u5c31\u53ef\u4ee5\u751f\u6210\u4e00\u4e2a\u7528\u4e8e\u5206\u7c7b\u6216\u56de\u5f52\u7684\u51b3\u7b56\u6811\u6a21\u578b\u3002\u8be5\u6a21\u578b\u53ef\u4ee5\u5bf9\u65b0\u6837\u672c\u8fdb\u884c\u9884\u6d4b\uff0c\u5e76\u4e14\u5177\u6709\u4e00\u5b9a\u7684\u53ef\u89e3\u91ca\u6027\uff0c\u4f7f\u4eba\u4eec\u80fd\u591f\u7406\u89e3\u6a21\u578b\u7684\u51b3\u7b56\u8fc7\u7a0b\u3002<\/li>\n<\/ol>\n\n\n\n<h2 class=\"wp-block-heading\">\u5b9e\u6218\u6f14\u7ec3(iris.data.txt)<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>\u5185\u5bb9<\/strong><\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">1\uff09\u8bfb\u53d6\u6570\u636e\u96c6\u201ciris.data.txt\u201d<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">2\uff09\u5bf9\u6570\u636e\u96c6\u8fdb\u884c\u8bad\u7ec3\u96c6\u4e0e\u6d4b\u8bd5\u96c6\u7684\u968f\u673a\u6216\u8005\u4e0d\u968f\u673a\u5206\u5272<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">3\uff09\u6784\u5efa\u51b3\u7b56\u6811\u5206\u7c7b\u6a21\u578b\uff0c\u91c7\u7528\u4e8c\u8def\u5212\u5206\uff0c\u5b9e\u73b0\u57fa\u4e8e\u5ea6\u91cf(\u5305\u62ecGini\u6307\u6807\u548c\u4fe1\u606f\u589e\u76ca)\u7684\u5c5e\u6027\u5212\u5206\uff0c\u5e76\u8fed\u4ee3\u6784\u5efa\u51b3\u7b56\u6811<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">4\uff09\u8bc4\u4f30\u51b3\u7b56\u6811\u5206\u7c7b\u6a21\u578b\uff0c\u5229\u7528\u4e0a\u4e00\u6b65\u6784\u5efa\u7684\u51b3\u7b56\u6811\uff0c\u5bf9\u6d4b\u8bd5\u96c6\u4e2d\u7684\u6837\u672c\u8fdb\u884c\u5206\u7c7b\uff0c\u5e76\u8ba1\u7b97\u5206\u7c7b\u51c6\u786e\u7387<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">5\uff09\u5229\u7528scikit-learn\u5e93\u4e2d\u5df2\u6709\u7684\u51b3\u7b56\u6811\u7c7bDecisionTreeClassifier\u5bf9\u4e0a\u8ff0\u7684\u8bad\u7ec3\u96c6\u8fdb\u884c\u62df\u5408\uff0c\u5728\u6d4b\u8bd5\u96c6\u4e0a\u8ba1\u7b97\u5206\u7c7b\u51c6\u786e\u7387\uff0c\u6bd4\u8f83\u4e24\u4e2a\u6a21\u578b\u51c6\u786e\u7387<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u8bfb\u53d6\u6570\u636e\u96c6\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>data = pd.read_csv(\"iris.data.txt\",header = 0,names=&#91;\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\", \"class\"])\nprint(\"\u524d\u5341\u6761\u6570\u636e\u4e3a\uff1an\",data.head(10))<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u5bf9\u6570\u636e\u96c6\u8fdb\u884c\u5206\u5272\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>X = data.drop(\"class\",axis=1)\ny = data&#91;'class']\n# \u5206\u5272\u6570\u636e\u96c6\uff0c70%\u8bad\u7ec3\u96c6\uff0c30%\u6d4b\u8bd5\u96c6\uff0c\u968f\u673a\u5206\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n\n#\u4e0d\u968f\u673a\u5206\n # \u8ba1\u7b97\u7528\u4e8e\u8bad\u7ec3\u7684\u6837\u672c\u6570\u91cf\ntrain_size = int(0.7 * len(data))\n# \u6309\u7167\u7d22\u5f15\u5212\u5206\u6570\u636e\u96c6\nX_train = data.iloc&#91;:train_size, :-1]  # \u9009\u62e9\u524d70%\u7684\u6837\u672c\u4f5c\u4e3a\u8bad\u7ec3\u96c6\u7684\u7279\u5f81\ny_train = data.iloc&#91;:train_size, -1]   # \u9009\u62e9\u524d70%\u7684\u6837\u672c\u4f5c\u4e3a\u8bad\u7ec3\u96c6\u7684\u76ee\u6807\u53d8\u91cf\nX_test = data.iloc&#91;train_size:, :-1]   # \u9009\u62e9\u540e30%\u7684\u6837\u672c\u4f5c\u4e3a\u6d4b\u8bd5\u96c6\u7684\u7279\u5f81\ny_test = data.iloc&#91;train_size:, -1]    # \u9009\u62e9\u540e30%\u7684\u6837\u672c\u4f5c\u4e3a\u6d4b\u8bd5\u96c6\u7684\u76ee\u6807\u53d8\u91cf<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u8ba1\u7b97gini\u6307\u6807\u3001\u8ba1\u7b97\u4fe1\u606f\u71b5\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5b9a\u4e49\u57fa\u5c3c\u4e0d\u7eaf\u5ea6\u8ba1\u7b97\u51fd\u6570\ndef gini_impurity(labels):\n    total_samples = len(labels)\n    classes, counts = np.unique(labels, return_counts=True)\n    impurity = 1 - sum((counts&#91;i] \/ total_samples) ** 2 for i in range(len(classes)))\n    return impurity\n\n#\u5b9a\u4e49\u4fe1\u606f\u71b5\u8ba1\u7b97\u51fd\u6570\ndef entropy(labels):\n    total_samples=len(labels)\n    classes,counts=np.unique(labels,return_counts=True)\n    entropy_val=0\n    entropy_val-=sum((counts&#91;i]\/total_samples)*math.log(counts&#91;i]\/total_samples,2) for i in range(len(classes)))\n    return entropy_val<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u6784\u5efa\u51b3\u7b56\u6811\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u9012\u5f52\u6784\u5efa\u51b3\u7b56\u6811\uff0c\u6700\u5927\u6df1\u5ea6\u505c\u6b62\ndef build_decision_tree(data, depth=0, max_depth=None):\n    if len(data&#91;'class'].unique()) == 1 or (max_depth is not None and depth == max_depth):\n        return data&#91;'class'].mode().iloc&#91;0]   #\u8fd4\u56de\u9891\u7387\u6700\u9ad8\n\n    if len(data.columns) == 1:\n        return data&#91;'class'].mode().iloc&#91;0]\n\n    best_feature, best_threshold = find_best_split(data)\n\n    if best_feature is None:\n        return data&#91;'class'].mode().iloc&#91;0]\n\n    left_data, right_data = split_dataset(data, best_feature, best_threshold)\n\n    left_su***ree = build_decision_tree(left_data, depth + 1, max_depth)\n    right_su***ree = build_decision_tree(right_data, depth + 1, max_depth)\n\n    decision_tree = {\n        'feature': best_feature,\n        'threshold': best_threshold,\n        'left_su***ree': left_su***ree,   #\u5de6\u5b50\u6811\n        'right_su***ree': right_su***ree  #\u53f3\u5b50\u6811\n    }\n\n    return decision_tree<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>\u6837\u672c\u5206\u7c7b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5206\u7c7b\u6837\u672c\ndef classify_sample(sample, decision_tree):\n    if isinstance(decision_tree, str):\n        return decision_tree\n    feature = decision_tree&#91;'feature']\n    threshold = decision_tree&#91;'threshold']\n    if sample&#91;feature] &lt;= threshold:\n        return classify_sample(sample, decision_tree&#91;'left_su***ree'])\n    else:\n        return classify_sample(sample, decision_tree&#91;'right_su***ree'])<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u57fa\u672c\u91cd\u8981\u8fc7\u7a0b\u5c31\u5728\u4e0a\u8ff0\uff0c\u5176\u76ee\u6807\u5c31\u662f\u81ea\u5df1\u5b66\u4f1a\u6784\u5efa\u4e00\u68f5\u51b3\u7b56\u6811\u5e76\u8bad\u7ec3\u597d\u4e4b\u540e\u5bf9\u6d4b\u8bd5\u6570\u636e\u96c6\u8fdb\u884c\u4e00\u4e2a\u7c7b\u522b\u8bc4\u5224\u4ee5\u5f97\u5230\u4e00\u4e2a\u51c6\u786e\u7387\u5e76\u4e0escikit-learn\u5e93\u4e2d\u81ea\u5e26\u7684\u51b3\u7b56\u6811\u5f97\u5230\u7684\u51c6\u786e\u7387\u8fdb\u884c\u6bd4\u8f83\uff0c\u901a\u8fc7\u4e0a\u8ff0\u7a0b\u5e8f\u5f97\u5230\u7684\u8fd0\u884c\u56fe\u5982\u4e0b\uff1a<\/p>\n\n\n<div class=\"wp-block-image\">\n<figure class=\"aligncenter size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"693\" height=\"699\" src=\"https:\/\/blog.zwdblog.online\/wp-content\/uploads\/2024\/05\/image-1.png\" alt=\"\u51b3\u7b56\u6811\" class=\"wp-image-470\" srcset=\"https:\/\/blog.zwdblog.online\/wp-content\/uploads\/2024\/05\/image-1.png 693w, https:\/\/blog.zwdblog.online\/wp-content\/uploads\/2024\/05\/image-1-297x300.png 297w, https:\/\/blog.zwdblog.online\/wp-content\/uploads\/2024\/05\/image-1-150x150.png 150w\" sizes=\"auto, (max-width: 693px) 100vw, 693px\" \/><\/figure>\n<\/div>\n\n<div class=\"wp-block-image\">\n<figure class=\"aligncenter size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"692\" height=\"689\" src=\"https:\/\/blog.zwdblog.online\/wp-content\/uploads\/2024\/05\/image.png\" alt=\"\u51b3\u7b56\u6811\" class=\"wp-image-469\" srcset=\"https:\/\/blog.zwdblog.online\/wp-content\/uploads\/2024\/05\/image.png 692w, https:\/\/blog.zwdblog.online\/wp-content\/uploads\/2024\/05\/image-300x300.png 300w, https:\/\/blog.zwdblog.online\/wp-content\/uploads\/2024\/05\/image-150x150.png 150w\" sizes=\"auto, (max-width: 692px) 100vw, 692px\" \/><\/figure>\n<\/div>\n\n\n<p class=\"wp-block-paragraph\">\u6839\u636e\u8fd0\u884c\u7ed3\u679c\u56fe\u53ef\u77e5\uff1a\u81ea\u5df1\u6784\u5efa\u7684\u51b3\u7b56\u6811\u51c6\u786e\u7387\u4e3a\u767e\u5206\u4e4b\u4e00\u767e\uff0c\u4e0escikit-learn\u5e93\u7684\u51b3\u7b56\u6811\u5f97\u51fa\u7684\u51c6\u786e\u7387\u76f8\u6bd4\u662f\u4e00\u6837\u7684\uff0c\u7531\u6b64\u53ef\u77e5\uff0c\u81ea\u5df1\u6240\u6784\u5efa\u7684\u51b3\u7b56\u6811\u5927\u6982\u7387\u662f\u6bd4\u8f83\u597d\u7684\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u5168\u90e8\u4ee3\u7801\u5728\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-light-grey-background-color has-background\"><code>import numpy as np\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.tree import DecisionTreeClassifier\nimport math\n\nwhile True:\n    # \u8bfb\u53d6\u6570\u636e\u96c6\n    data = pd.read_csv('iris.data.txt', header=0, names=&#91;\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\", \"class\"])\n    print(\"\u524d\u5341\u6761\u6570\u636e\u4e3a\uff1an\",data.head(10))\n\n    # \u7279\u5f81\u548c\u76ee\u6807\u53d8\u91cf,\u9884\u6d4b\u82b1\u7684\u7c7b\u522b\uff0cX\u662f\u7279\u5f81\uff0cy\u662f\u76ee\u6807\n    X = data.drop('class', axis=1)\n    y = data&#91;'class']\n    \n    print(\"1\u3001\u968f\u673a\u5206 2\u3001\u4e0d\u968f\u673a\u5206 \u8bf7\u9009\u62e9\uff1a\")\n    a = int(input())\n    if a == 1: \n        # \u5206\u5272\u6570\u636e\u96c6\uff0c70%\u8bad\u7ec3\u96c6\uff0c30%\u6d4b\u8bd5\u96c6\uff0c\u968f\u673a\u5206\n        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n    elif a == 2:\n        # \u8ba1\u7b97\u7528\u4e8e\u8bad\u7ec3\u7684\u6837\u672c\u6570\u91cf\n        train_size = int(0.7 * len(data))\n\n        # \u6309\u7167\u7d22\u5f15\u5212\u5206\u6570\u636e\u96c6\n        X_train = data.iloc&#91;:train_size, :-1]  # \u9009\u62e9\u524d70%\u7684\u6837\u672c\u4f5c\u4e3a\u8bad\u7ec3\u96c6\u7684\u7279\u5f81\n        y_train = data.iloc&#91;:train_size, -1]   # \u9009\u62e9\u524d70%\u7684\u6837\u672c\u4f5c\u4e3a\u8bad\u7ec3\u96c6\u7684\u76ee\u6807\u53d8\u91cf\n        X_test = data.iloc&#91;train_size:, :-1]   # \u9009\u62e9\u540e30%\u7684\u6837\u672c\u4f5c\u4e3a\u6d4b\u8bd5\u96c6\u7684\u7279\u5f81\n        y_test = data.iloc&#91;train_size:, -1]    # \u9009\u62e9\u540e30%\u7684\u6837\u672c\u4f5c\u4e3a\u6d4b\u8bd5\u96c6\u7684\u76ee\u6807\u53d8\u91cf\n\n    print(\"\u8bad\u7ec3\u96c6\u5927\u5c0f:\", X_train.shape)\n    print(\"\u6d4b\u8bd5\u96c6\u5927\u5c0f:\", X_test.shape)\n    print(\"\u6d4b\u8bd5\u96c6\u4e3a\uff1an\",X_test)\n\n    # \u5b9a\u4e49\u6570\u636e\u96c6\u5212\u5206\u51fd\u6570\n    def split_dataset(data, feature, threshold):\n        left_indices = data&#91;feature] &lt;= threshold   #\u5de6\u8fb9\u5c0f\u4e8e\u7b49\u4e8e\u9608\u503c\n        right_indices = data&#91;feature] &gt; threshold   #\u53f3\u8fb9\u5927\u4e8e\u9608\u503c\n        left_data = data&#91;left_indices]\n        right_data = data&#91;right_indices]\n        return left_data, right_data\n\n    print(\"1\u3001Gini\u7cfb\u6570\u5212\u5206 2\u3001\u4fe1\u606f\u589e\u76ca\u5212\u5206 \u8bf7\u9009\u62e9\uff1a\")\n    b = int(input())\n    if b == 1:\n        # \u5b9a\u4e49\u57fa\u5c3c\u4e0d\u7eaf\u5ea6\u8ba1\u7b97\u51fd\u6570\n        def gini_impurity(labels):\n            total_samples = len(labels)\n            classes, counts = np.unique(labels, return_counts=True)\n            impurity = 1 - sum((counts&#91;i] \/ total_samples) ** 2 for i in range(len(classes)))\n            return impurity\n\n        # \u5bfb\u627e\u6700\u4f73\u5206\u88c2\u7279\u5f81\u548c\u9608\u503c(gini)\n        def find_best_split(data):\n            best_gini = float('inf')\n            best_feature = None    #\u6700\u597d\u7684\u7279\u6027\n            best_threshold = None  #\u6700\u597d\u7684\u9608\u503c\n\n            for feature in data.columns&#91;:-1]:   #\u6392\u9664\u6700\u540e\u4e00\u4e2a\u5c5e\u6027\"\u7c7b\u522b\"\n                thresholds = data&#91;feature].unique()  #\u53d6\u552f\u4e00\u503c\n                for threshold in thresholds:\n                    left_data, right_data = split_dataset(data, feature, threshold)\n                    if len(left_data) == 0 or len(right_data) == 0:\n                        continue\n                    gini = (len(left_data) \/ len(data)) * gini_impurity(left_data&#91;'class']) + \n                        (len(right_data) \/ len(data)) * gini_impurity(right_data&#91;'class'])  #gini\u52a0\u6743\u5e73\u5747\n                    if gini &lt; best_gini:\n                        best_gini = gini\n                        best_feature = feature\n                        best_threshold = threshold\n            print(f\"\u6309 {best_feature} \u7279\u5f81\u5212\u5206,\u5212\u5206\u9608\u503c\u4e3a: {best_threshold}\")\n            return best_feature, best_threshold\n    elif b == 2:\n        #\u5b9a\u4e49\u4fe1\u606f\u71b5\u8ba1\u7b97\u51fd\u6570\n        def entropy(labels):\n            total_samples=len(labels)\n            classes,counts=np.unique(labels,return_counts=True)\n            entropy_val=0\n            entropy_val-=sum((counts&#91;i]\/total_samples)*math.log(counts&#91;i]\/total_samples,2) for i in range(len(classes)))\n            return entropy_val\n\n        # \u5bfb\u627e\u6700\u4f73\u5206\u88c2\u7279\u5f81\u548c\u9608\u503c\uff08\u57fa\u4e8e\u4fe1\u606f\u589e\u76ca\uff09\n        def find_best_split(data):\n            best_info_gain = float('-inf')\n            best_feature = None\n            best_threshold = None\n\n            for feature in data.columns&#91;:-1]:\n                thresholds = data&#91;feature].unique()\n                for threshold in thresholds:\n                    left_data, right_data = split_dataset(data, feature, threshold)\n                    if len(left_data) == 0 or len(right_data) == 0:\n                        continue\n                    info_gain = entropy(data&#91;'class']) - ((len(left_data) \/ len(data)) * entropy(left_data&#91;'class']) +\n                                                        (len(right_data) \/ len(data)) * entropy(right_data&#91;'class']))\n                    if info_gain &gt; best_info_gain:\n                        best_info_gain = info_gain\n                        best_feature = feature\n                        best_threshold = threshold\n\n            print(f\"\u6309 {best_feature} \u7279\u5f81\u5212\u5206,\u5212\u5206\u9608\u503c\u4e3a: {best_threshold}\")\n            return best_feature, best_threshold\n\n    # \u9012\u5f52\u6784\u5efa\u51b3\u7b56\u6811\uff0c\u6700\u5927\u6df1\u5ea6\u505c\u6b62\n    def build_decision_tree(data, depth=0, max_depth=None):\n        if len(data&#91;'class'].unique()) == 1 or (max_depth is not None and depth == max_depth):\n            return data&#91;'class'].mode().iloc&#91;0]   #\u8fd4\u56de\u9891\u7387\u6700\u9ad8\n\n        if len(data.columns) == 1:\n            return data&#91;'class'].mode().iloc&#91;0]\n\n        best_feature, best_threshold = find_best_split(data)\n\n        if best_feature is None:\n            return data&#91;'class'].mode().iloc&#91;0]\n\n        left_data, right_data = split_dataset(data, best_feature, best_threshold)\n\n        left_su***ree = build_decision_tree(left_data, depth + 1, max_depth)\n        right_su***ree = build_decision_tree(right_data, depth + 1, max_depth)\n\n        decision_tree = {\n            'feature': best_feature,\n            'threshold': best_threshold,\n            'left_su***ree': left_su***ree,   #\u5de6\u5b50\u6811\n            'right_su***ree': right_su***ree  #\u53f3\u5b50\u6811\n        }\n\n        return decision_tree\n\n    # \u5206\u7c7b\u6837\u672c\n    def classify_sample(sample, decision_tree):\n        if isinstance(decision_tree, str):\n            return decision_tree\n        feature = decision_tree&#91;'feature']\n        threshold = decision_tree&#91;'threshold']\n        if sample&#91;feature] &lt;= threshold:\n            return classify_sample(sample, decision_tree&#91;'left_su***ree'])\n        else:\n            return classify_sample(sample, decision_tree&#91;'right_su***ree'])\n\n    # \u8bc4\u4f30\u6a21\u578b\u6027\u80fd\n    def evaluate_model(test_data, decision_tree):\n        correct_predictions = 0\n        total_samples = len(test_data)\n        for _, sample in test_data.iterrows():\n            predicted_class = classify_sample(sample, decision_tree)\n            if predicted_class == sample&#91;'class']:\n                correct_predictions += 1\n        accuracy = correct_predictions \/ total_samples\n        return accuracy\n\n    # \u4fdd\u5b58\u51b3\u7b56\u6811\u4e3a.dot\u6587\u4ef6\n    def save_tree_as_dot(decision_tree, filename='decision_tree.dot'):\n        def traverse(node, file):\n            if isinstance(node, dict):\n                file.write(f\"{node&#91;'feature']} &lt;= {node&#91;'threshold']};n\")\n                file.write(f\"{node&#91;'feature']} &gt; {node&#91;'threshold']};n\")\n                traverse(node&#91;'left_su***ree'], file)\n                traverse(node&#91;'right_su***ree'], file)\n            else:\n                file.write(f\"{node};n\")\n\n        with open(filename, 'w') as f:\n            f.write('digraph decision_tree {n')\n            traverse(decision_tree, f)\n            f.write('}')\n\n    # \u4f7f\u7528\u8bad\u7ec3\u96c6\u6784\u5efa\u51b3\u7b56\u6811\n    decision_tree_custom = build_decision_tree(pd.concat(&#91;X_train, y_train], axis=1))\n    print(\"\u4ece\u5934\u5f00\u59cb\u6784\u5efa\u7684\u51b3\u7b56\u6811\u6a21\u578b:\", decision_tree_custom)\n\n    # \u4fdd\u5b58\u51b3\u7b56\u6811\u4e3a.dot\u6587\u4ef6\n    save_tree_as_dot(decision_tree_custom, filename='decision_tree_custom.dot')\n\n    # \u8bc4\u4f30\u4ece\u5934\u5f00\u59cb\u6784\u5efa\u7684\u51b3\u7b56\u6811\u6a21\u578b\n    accuracy_custom_tree = evaluate_model(X_test.join(y_test), decision_tree_custom)\n    print(\"\u4ece\u5934\u5f00\u59cb\u6784\u5efa\u7684\u51b3\u7b56\u6811\u6a21\u578b\u51c6\u786e\u7387:\", accuracy_custom_tree)\n\n    # \u4f7f\u7528scikit-learn\u7684DecisionTreeClassifier\u6784\u5efa\u6a21\u578b\n    sklearn_tree = DecisionTreeClassifier()\n    sklearn_tree.fit(X_train, y_train)\n\n    # \u4f7f\u7528scikit-learn\u7684\u51b3\u7b56\u6811\u6a21\u578b\u8bc4\u4f30\u6027\u80fd\n    accuracy_sklearn_tree = sklearn_tree.score(X_test, y_test)\n    print(\"\u4f7f\u7528scikit-learn\u7684\u51b3\u7b56\u6811\u6a21\u578b\u51c6\u786e\u7387:\", accuracy_sklearn_tree)\n\n    print(\"\u662f\u5426\u7ee7\u7eed\uff1a\uff08y\/n\uff09\")\n    c = input()\n    if c.lower() == 'n':\n        break<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u51b3\u7b56\u6811\u7b80\u4ecb \u51b3\u7b56\u6811\u662f\u4e00\u79cd\u5e38\u89c1\u7684\u673a\u5668\u5b66\u4e60\u6a21\u578b\uff0c\u7528\u4e8e\u89e3\u51b3\u5206\u7c7b\u548c\u56de [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":471,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3],"tags":[],"class_list":["post-468","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-jl"],"_links":{"self":[{"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/posts\/468","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/comments?post=468"}],"version-history":[{"count":0,"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/posts\/468\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/media\/471"}],"wp:attachment":[{"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/media?parent=468"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/categories?post=468"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.zwdblog.online\/index.php\/wp-json\/wp\/v2\/tags?post=468"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}