Python隨機(jī)森林例子源碼分享

ID:780408 · 發(fā)表于 2020-12-18 11:04

"#測(cè)試gini\n",
"gini=calGini((l,r),classLabels)\n",
"print(gini)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def getBestSplit(dataSet,featureNumbers):\n",
" '''\n",
" 對(duì)于一個(gè)數(shù)據(jù)集，選擇featureNumber個(gè)特征進(jìn)行簡(jiǎn)單劃分，得到最好的特征和劃分結(jié)果\n",
" args:\n",
" dataSet:數(shù)據(jù)集,類(lèi)型：list\n",
" featureNumbers:選擇的特征值數(shù),類(lèi)型：int\n",
" classLabels：所有分類(lèi)，類(lèi)型：list\n",
" ''' \n",
" \n",
" #樣本數(shù)\n",
" m=len(dataSet)\n",
" if m==0:\n",
" return None\n",
" #樣本特征值數(shù)+1（因?yàn)樽詈笥幸粋€(gè)標(biāo)簽）\n",
" totalColumnNumber=len(dataSet[0])\n",
" #隨機(jī)選擇的特征索引\n",
" randomSelectedFeatures=[]\n",
" \n",
" \n",
" \n",
" #選擇數(shù)目必須在特征數(shù)目范圍內(nèi)\n",
" if totalColumnNumber-1>=featureNumbers: \n",
" #借助這個(gè)變量防止選擇重復(fù)的特征進(jìn)入\n",
" indexList=list(range(totalColumnNumber-1)) \n",
" for j in range(featureNumbers):\n",
" #索引序列長(zhǎng)度\n",
" leftSize=len(indexList)\n",
" #隨機(jī)數(shù)\n",
" randIndex=random.randrange(leftSize)\n",
" #索引學(xué)列隨機(jī)數(shù)處數(shù)據(jù)彈出，放入選擇特征列表\n",
" origIndex=indexList.pop(randIndex)\n",
" #存入的是原始數(shù)據(jù)特征索引\n",
" randomSelectedFeatures.append(origIndex)\n",
" else:\n",
" randomSelectedFeatures=range(totalColumnNumber-1)#特征全部被選擇\n",
" \n",
" \n",
" # print(\"current select features\")\n",
" # print(randomSelectedFeatures)\n",
"\n",
" #當(dāng)前數(shù)據(jù)集的標(biāo)簽序列\(zhòng)n",
" class_values=list(set(item[-1] for item in dataSet))\n",
" \n",
" #對(duì)于每個(gè)特征以及每個(gè)特征值進(jìn)行簡(jiǎn)單劃分\n",
" #保留最小的基尼系數(shù)\n",
" minGini=9999\n",
" #存入最好的信息\n",
" bestInfor={}\n",
" #外層循環(huán)，對(duì)于每個(gè)特征\n",
" for index in randomSelectedFeatures:\n",
" #內(nèi)層循環(huán)對(duì)于每個(gè)特征值\n",
" tempFeatureValueList=list(set(item[index] for item in dataSet))\n",
" #print(len(tempFeatureValueList))\n",
" for tempValue in tempFeatureValueList:\n",
" #簡(jiǎn)單分類(lèi)\n",
" groups=simpleSplit(dataSet,index,tempValue) \n",
" #print(\"currentIndex:%d,CurrentTempValue:%f\"%(index,tempValue))\n",
" #計(jì)算基尼系數(shù)\n",
" gini=calGini(groups,class_values)\n",
" #print(\"computed gini:\",gini) \n",
" if gini<minGini:\n",
" minGini=gini\n",
" #保存目前最后的信息\n",
" bestInfor[\"index\"]=index#存入原來(lái)索引 \n",
" bestInfor[\"indexValue\"]=tempValue\n",
" bestInfor[\"groups\"]=groups\n",
" bestInfor[\"gini\"]=gini\n",
" \n",
" return bestInfor"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"52 0.017\n"
]
}
],
"source": [
"#測(cè)試最好分類(lèi)函數(shù)\n",
"bestInfor=getBestSplit(dataSet,3)\n",
"print(bestInfor[\"index\"],bestInfor[\"indexValue\"])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def terminalLabel(subSet):\n",
" '''\n",
" 樹(shù)葉點(diǎn)對(duì)應(yīng)的標(biāo)簽\n",
" args:\n",
" subSet:當(dāng)前數(shù)據(jù)集，最后列是標(biāo)簽列，類(lèi)型：list\n",
" returns:\n",
" 當(dāng)前列中最多的標(biāo)簽，類(lèi)型：原標(biāo)簽類(lèi)型\n",
" '''\n",
" #得到最后一列\(zhòng)n",
" labelList=[item[-1] for item in subSet]\n",
" #max函數(shù)，key后是函數(shù)，代表對(duì)前面的進(jìn)行那種運(yùn)算，這里是技術(shù)\n",
" #max返回值是第一個(gè)參數(shù)，這里set是把labelList轉(zhuǎn)換成集合，即去掉重復(fù)項(xiàng)\n",
" #key:相當(dāng)于循環(huán)調(diào)用labelList.count（set(labelList))中的每個(gè)元素，然后max取得最大值\n",
" #返回set(labelList)中對(duì)應(yīng)最大的那個(gè)標(biāo)簽\n",
" return max(set(labelList), key=labelList.count) # 輸出 subSet 中出現(xiàn)次數(shù)較多的標(biāo)簽 \n",
"\n",
" #下面的寫(xiě)法也是成立的，利用lambda表達(dá)式，表達(dá)式中x從全面取，這種寫(xiě)法可能更好理解些\n",
" #return max(set(labelList), key=lambda x:labelList.count(x)) "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"R\n"
]
}
],
"source": [
"#測(cè)試\n",
"label=terminalLabel(l)\n",
"print(label)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"#對(duì)得到的最好分類(lèi)信息進(jìn)行分割\n",
"def split(node, max_depth, min_size, n_features, depth): # 創(chuàng)建子分割器遞歸分類(lèi) 直到分類(lèi)結(jié)束\n",
" '''\n",
" :param node: 節(jié)點(diǎn)，類(lèi)型：字典\n",
" bestInfor[\"index\"]=index#存入原來(lái)索引 \n",
" bestInfor[\"indexValue\"]=tempValue\n",
" bestInfor[\"groups\"]=groups\n",
" bestInfor[\"gini\"]=gini\n",
" :param max_depth: 最大深度,int\n",
" :param min_size: 最小,int\n",
" :param n_features: 特征選取個(gè)數(shù),int\n",
" :param depth: 深度,int\n",
" :return:\n",
" '''\n",
" left, right = node['groups']\n",
" del (node['groups'])\n",
"\n",
" if not left or not right: # 如果只有一個(gè)子集\n",
" node['left'] = node['right'] = terminalLabel(left + right) # 投票出類(lèi)型\n",
" return\n",
"\n",
" if depth >= max_depth: # 如果即將超過(guò)\n",
" node['left'], node['right'] = terminalLabel(left), terminalLabel(right) # 投票出類(lèi)型\n",
" return\n",
"\n",
" if len(left) <= min_size: # 處理左子集\n",
" node['left'] = terminalLabel(left)\n",
" else:\n",
" node['left'] = getBestSplit(left, n_features) # node['left']是一個(gè)字典，形式為{'index':b_index, 'value':b_value, 'groups':b_groups}，所以node是一個(gè)多層字典\n",
" split(node['left'], max_depth, min_size, n_features, depth + 1) # 遞歸，depth+1計(jì)算遞歸層數(shù)\n",
"\n",
" if len(right) <= min_size: # 處理右子集\n",
" node['right'] = terminalLabel(right)\n",
" else:\n",
" node['right'] = getBestSplit(right, n_features)\n",
" split(node['right'], max_depth, min_size, n_features, depth + 1)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"#構(gòu)建一個(gè)決策樹(shù)\n",
"def buildTree(train, max_depth, min_size, n_features):\n",
" '''\n",
" 創(chuàng)建一個(gè)決策樹(shù)\n",
" :param train: 訓(xùn)練數(shù)據(jù)集\n",
" :param max_depth: 決策樹(shù)深度不能太深不然容易導(dǎo)致過(guò)擬合\n",
" :param min_size: 葉子節(jié)點(diǎn)的大小\n",
" :param n_features: 選擇的特征的個(gè)數(shù)\n",
" :return\n",
" root 返回決策樹(shù)\n",
" '''\n",
" root = getBestSplit(train, n_features) # 獲取樣本數(shù)據(jù)集\n",
" split(root, max_depth, min_size, n_features, 1) # 進(jìn)行樣本分割，構(gòu)架決策樹(shù)\n",
" return root # 返回決策樹(shù)\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'index': 55, 'indexValue': 0.0114, 'gini': 0.0, 'left': {'index': 35, 'indexValue': 0.2288, 'gini': 0.0, 'left': 'R', 'right': {'index': 33, 'indexValue': 0.2907, 'gini': 0.0, 'left': 'R', 'right': {'index': 58, 'indexValue': 0.0057, 'gini': 0.0, 'left': {'index': 12, 'indexValue': 0.0493, 'gini': 0.0, 'left': 'R', 'right': 'R'}, 'right': 'R'}}}, 'right': {'index': 54, 'indexValue': 0.0063, 'gini': 0.0, 'left': {'index': 21, 'indexValue': 0.8384, 'gini': 0.0, 'left': 'M', 'right': 'M'}, 'right': {'index': 32, 'indexValue': 0.558, 'gini': 0.0, 'left': 'M', 'right': {'index': 58, 'indexValue': 0.0332, 'gini': 0.0, 'left': 'M', 'right': 'M'}}}}\n"
]
}
],
"source": [
"#測(cè)試決策樹(shù)\n",
"#選擇一個(gè)子集\n",
"s=putBackSample(dataSet,10)\n",
"tempTree=buildTree(s,10,1,3)\n",
"print(tempTree)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"#根據(jù)決策樹(shù)進(jìn)行預(yù)測(cè)\n",
"def predict(node, row): # 預(yù)測(cè)模型分類(lèi)結(jié)果\n",
" '''\n",
" 在當(dāng)前節(jié)點(diǎn)進(jìn)行預(yù)測(cè)，row是待預(yù)測(cè)樣本\n",
" args:\n",
" node:樹(shù)節(jié)點(diǎn)\n",
" row:待分類(lèi)樣本\n",
" return:\n",
" 分類(lèi)標(biāo)簽\n",
" '''\n",
" if row[node['index']] < node['indexValue']:\n",
" if isinstance(node['left'], dict): # isinstance 是 Python 中的一個(gè)內(nèi)建函數(shù)。是用來(lái)判斷一個(gè)對(duì)象是否是一個(gè)已知的類(lèi)型。\n",
" return predict(node['left'], row)\n",
" else:\n",
" return node['left']\n",
" else:\n",
" if isinstance(node['right'], dict):\n",
" return predict(node['right'], row)\n",
" else:\n",
" return node['right']"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"R R\n"
]
}
],
"source": [
"#測(cè)試下\n",
"label=predict(tempTree,s[0])\n",
"print(label,s[0][-1])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"#多個(gè)樹(shù)的決策，多數(shù)服從少數(shù)\n",
"def baggingPredict(trees, row):\n",
" \"\"\"\n",
" 多個(gè)樹(shù)的決策，多數(shù)服從少數(shù)\n",
" Args:\n",
" trees 決策樹(shù)的集合\n",
" row 測(cè)試數(shù)據(jù)集的每一行數(shù)據(jù)\n",
" Returns:\n",
" 返回隨機(jī)森林中，決策樹(shù)結(jié)果出現(xiàn)次數(shù)做大的\n",
" \"\"\"\n",
"\n",
" # 使用多個(gè)決策樹(shù)trees對(duì)測(cè)試集test的第row行進(jìn)行預(yù)測(cè)，再使用簡(jiǎn)單投票法判斷出該行所屬分類(lèi)\n",
" predictions = [predict(tree, row) for tree in trees]\n",
" return max(set(predictions), key=predictions.count)\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"def subSample(dataSet, ratio): \n",
" '''\n",
" 按比例隨機(jī)抽取數(shù)據(jù)，有重復(fù)抽樣\n",
" args:\n",
" dataSet:數(shù)據(jù)集，類(lèi)型:list\n",
" ratio:0-1之間的數(shù)\n",
" '''\n",
" if ratio<0.0:\n",
" return None\n",
" if ratio>=1:\n",
" return dataSet\n",
" sampleNumber=int(len(dataSet)*ratio)\n",
" subSet=putBackSample(dataSet,sampleNumber)\n",
" return subSet"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"41\n"
]
}
],
"source": [
"#測(cè)試\n",
"subSet=subSample(dataSet,0.2)\n",
"print(len(subSet))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"#隨機(jī)森林主函數(shù)\n",
"def buildRandomForest(train, max_depth=10, min_size=1, sample_size=0.2, n_trees=10, n_features=3):\n",
" \"\"\"\n",
" random_forest(評(píng)估算法性能，返回模型得分)\n",
" Args:\n",
" train 訓(xùn)練數(shù)據(jù)集，類(lèi)型:list \n",
" max_depth 決策樹(shù)深度不能太深，不然容易導(dǎo)致過(guò)擬合\n",
" min_size 葉子節(jié)點(diǎn)的大小\n",
" sample_size 訓(xùn)練數(shù)據(jù)集的樣本比例,0,1之間的數(shù)\n",
" n_trees 決策樹(shù)的個(gè)數(shù)\n",
" n_features 選取的特征的個(gè)數(shù)\n",
" Returns:\n",
" trees:樹(shù)序列\(zhòng)n",
" \"\"\"\n",
"\n",
" trees = list()\n",
" # n_trees 表示決策樹(shù)的數(shù)量\n",
" for i in range(n_trees):\n",
" # 隨機(jī)抽樣的訓(xùn)練樣本，隨機(jī)采樣保證了每棵決策樹(shù)訓(xùn)練集的差異性\n",
" sample = subSample(train, sample_size)\n",
" # 創(chuàng)建一個(gè)決策樹(shù)\n",
" tree = buildTree(sample, max_depth, min_size, n_features)\n",
" trees.append(tree)\n",
" return trees\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"def predictByForest(trees,test):\n",
" '''\n",
" predictions 每一行的預(yù)測(cè)結(jié)果，bagging 預(yù)測(cè)最后的分類(lèi)結(jié)果\n",
" '''\n",
" # 每一行的預(yù)測(cè)結(jié)果，bagging 預(yù)測(cè)最后的分類(lèi)結(jié)果\n",
" predictions = [baggingPredict(trees, row) for row in test]\n",
" return predictions"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"def calQuota(predictions,labelClass,OrigClassLabels):\n",
" '''\n",
" 計(jì)算分類(lèi)指標(biāo)\n",
" args:\n",
" predictions:預(yù)測(cè)值，類(lèi)型：list\n",
" labelClass:真實(shí)標(biāo)簽，類(lèi)型：list\n",
" OrigClassLabels:數(shù)據(jù)可能的標(biāo)簽庫(kù)，一個(gè)正例一個(gè)負(fù)例標(biāo)簽\n",
" '''\n",
" \n",
" Pos=OrigClassLabels[0]\n",
" Nev=OrigClassLabels[1] \n",
" #真正例 \n",
" #TP=len([item for item in labelClass if item==Pos and predictions[labelClass.index(item)]==Pos])\n",
" TP=0\n",
" TN=0\n",
" FP=0\n",
" FN=0\n",
" for j in range(len(predictions)): \n",
" if predictions[j]==Pos and labelClass[j]==Pos:\n",
" TP+=1\n",
" if predictions[j]==Nev and labelClass[j]==Nev:\n",
" TN+=1\n",
" if predictions[j]==Pos and labelClass[j]==Nev:\n",
" FP+=1\n",
" if predictions[j]==Nev and labelClass[j]==Pos:\n",
" FN+=1\n",
"# #真負(fù)例,下面的做法不行，原因是index可能得到不同的索引\n",
"# TN=len([item for item in labelClass if item==Nev and predictions[labelClass.index(item)]==Nev])\n",
"# #偽正例\n",
"# FP=len([item for item in labelClass if item==Nev and predictions[labelClass.index(item)]==Pos])\n",
"# #偽負(fù)例\n",
"# FN=len([item for item in labelClass if item==Pos and predictions[labelClass.index(item)]==Nev])\n",
"\n",
" #Recall,TruePosProp=TP/(TP+FN)#識(shí)別的正例占整個(gè)正例的比率\n",
" #FalsPosProp=FP/(FP+TN)#識(shí)別的正例占整個(gè)負(fù)例的比率\n",
" #Precition=TP/(TP+FP)#識(shí)別的正確正例占識(shí)別出所有正例的比率\n",
" \n",
" return TP,TN,FP,FN"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"#測(cè)試下：\n",
"trees=buildRandomForest(dataSet)\n",
"testSet=nonPutBackSample(dataSet,100)\n",
"prediction=predictByForest(trees,testSet)\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(44, 39, 12, 5)\n"
]
}
],
"source": [
"labelClass=[item[-1] for item in testSet]\n",
"\n",
"tp=calQuota(prediction,labelClass,list(classLabels))\n",
"print(tp)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"def accuracy( predicted,actual): \n",
" correct = 0\n",
" for i in range(len(actual)):\n",
" if actual[i] == predicted[i]:\n",
" correct += 1\n",
" return correct / float(len(actual)) * 100.0\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"83.0\n"
]
}
],
"source": [
"a=accuracy(prediction,labelClass)\n",
"print(a)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"def createCrossValideSets(trainSet,n_folds,bPutBack=True):\n",
" '''\n",
" 產(chǎn)生交叉驗(yàn)證數(shù)據(jù)集\n",
" Args:\n",
" dataset 原始數(shù)據(jù)集 \n",
" n_folds 數(shù)據(jù)的份數(shù)，數(shù)據(jù)集交叉驗(yàn)證的份數(shù)，采用無(wú)放回抽取\n",
" bPutBack 是否放回\n",
" '''\n",
" subSetsList=[]\n",
" subLen=int(len(trainSet)/n_folds)\n",
" if bPutBack:\n",
" for j in range(n_folds):\n",
" subSet=putBackSample(trainSet,subLen)\n",
" subSetsList.append(subSet)\n",
" else:\n",
" for j in range(n_folds):\n",
" subSet=nonPutBackSample(trainSet,subLen)\n",
" subSetsList.append(subSet)\n",
" return subSetsList"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"def randomForest(trainSet,testSet,max_depth=10, min_size=1, sample_size=0.2, n_trees=10, n_features=3):\n",
" '''\n",
" 構(gòu)造隨機(jī)森林并測(cè)試\n",
" Args:\n",
" train 訓(xùn)練數(shù)據(jù)集，類(lèi)型:list \n",
" testSet 測(cè)試集，類(lèi)型：list\n",
" max_depth 決策樹(shù)深度不能太深，不然容易導(dǎo)致過(guò)擬合\n",
" min_size 葉子節(jié)點(diǎn)的大小\n",
" sample_size 訓(xùn)練數(shù)據(jù)集的樣本比例,0,1之間的數(shù)\n",
" n_trees 決策樹(shù)的個(gè)數(shù)\n",
" n_features 選取的特征的個(gè)數(shù)\n",
" Returns:\n",
" predition 測(cè)試集預(yù)測(cè)值，類(lèi)型：list\n",
" '''\n",
" trees=buildRandomForest(trainSet,max_depth, min_size, sample_size, n_trees, n_features)\n",
" predition=predictByForest(trees,testSet)\n",
" return predition"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"def evaluteAlgorithm(trainSet,algorithm,n_folds,*args):\n",
" '''\n",
" 評(píng)價(jià)算法函數(shù)\n",
" Args:\n",
" dataset 原始數(shù)據(jù)集\n",
" algorithm 使用的算法\n",
" n_folds 數(shù)據(jù)的份數(shù)，數(shù)據(jù)集交叉驗(yàn)證的份數(shù)，采用無(wú)放回抽取\n",
" *args 其他的參數(shù)\n",
" Returns:\n",
" scores 模型得分\n",
" '''\n",
" folds = createCrossValideSets(trainSet, n_folds)\n",
" scores = list()\n",
" # 每次循環(huán)從 folds 從取出一個(gè) fold 作為測(cè)試集，其余作為訓(xùn)練集，遍歷整個(gè) folds ，實(shí)現(xiàn)交叉驗(yàn)證\n",
" for fold in folds:\n",
" train_set = list(folds)\n",
" train_set.remove(fold)\n",
" # 將多個(gè) fold 列表組合成一個(gè) train_set 列表, 類(lèi)似 union all\n",
" \"\"\"\n",
" In [20]: l1=[[1, 2, 'a'], [11, 22, 'b']]\n",
" In [21]: l2=[[3, 4, 'c'], [33, 44, 'd']]\n",
" In [22]: l=[]\n",
" In [23]: l.append(l1)\n",
" In [24]: l.append(l2)\n",
" In [25]: l\n",
" Out[25]: [[[1, 2, 'a'], [11, 22, 'b']], [[3, 4, 'c'], [33, 44, 'd']]]\n",
" In [26]: sum(l, [])\n",
" Out[26]: [[1, 2, 'a'], [11, 22, 'b'], [3, 4, 'c'], [33, 44, 'd']]\n",
" \"\"\"\n",
" train_set = sum(train_set, [])\n",
" test_set = list()\n",
" # fold 表示從原始數(shù)據(jù)集 dataset 提取出來(lái)的測(cè)試集\n",
"# for row in fold:\n",
"# row_copy = list(row)\n",
"# row_copy[-1] = None\n",
"# test_set.append(row_copy)\n",
" predicted = algorithm(train_set, fold, *args)\n",
" \n",
" actual = [row[-1] for row in fold]\n",
"\n",
" # 計(jì)算隨機(jī)森林的預(yù)測(cè)結(jié)果的正確率\n",
" accuracyValue = accuracy(predicted,actual)\n",
" scores.append(accuracyValue)\n",
" return scores"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"隨機(jī)因子= 0.13436424411240122\n",
"決策樹(shù)個(gè)數(shù): 1\n",
"模型得分: [87.8048780487805, 90.2439024390244, 92.6829268292683, 85.36585365853658, 95.1219512195122]\n",
"平均準(zhǔn)確度: 90.244%\n",
"隨機(jī)因子= 0.13436424411240122\n",
"決策樹(shù)個(gè)數(shù): 10\n",
"模型得分: [92.6829268292683, 92.6829268292683, 87.8048780487805, 78.04878048780488, 100.0]\n",
"平均準(zhǔn)確度: 90.244%\n"
]
}
],
"source": [
" \n",
" #綜合測(cè)試函數(shù)\n",
" n_folds = 5 # 分成5份數(shù)據(jù)，進(jìn)行交叉驗(yàn)證\n",
" max_depth = 20 # 調(diào)參（自己修改） #決策樹(shù)深度不能太深，不然容易導(dǎo)致過(guò)擬合\n",
" min_size = 1 # 決策樹(shù)的葉子節(jié)點(diǎn)最少的元素?cái)?shù)量\n",
" sample_size = 1.0 # 做決策樹(shù)時(shí)候的樣本的比例\n",
" # n_features = int((len(dataset[0])-1))\n",
" n_features = 15 # 調(diào)參（自己修改） #準(zhǔn)確性與多樣性之間的權(quán)衡\n",
" for n_trees in [1, 10]: # 理論上樹(shù)是越多越好\n",
" scores = evaluteAlgorithm(dataSet, randomForest, n_folds, max_depth, min_size, sample_size, n_trees, n_features)\n",
" # 每一次執(zhí)行本文件時(shí)都能產(chǎn)生同一個(gè)隨機(jī)數(shù)\n",
" random.seed(1)\n",
" print('隨機(jī)因子=', random.random()) # 每一次執(zhí)行本文件時(shí)都能產(chǎn)生同一個(gè)隨機(jī)數(shù)\n",
" print('決策樹(shù)個(gè)數(shù): %d' % n_trees) # 輸出決策樹(shù)個(gè)數(shù)\n",
" print('模型得分: %s' % scores) # 輸出五份隨機(jī)樣本的模型得分\n",
" print('平均準(zhǔn)確度: %.3f%%' % (sum(scores)/float(len(scores)))) # 輸出五份隨機(jī)樣本的平均準(zhǔn)確度\n"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"隨機(jī)因子= 0.13436424411240122\n",
"決策樹(shù)個(gè)數(shù): 1\n",
"模型得分: [80.48780487804879, 75.60975609756098, 73.17073170731707, 75.60975609756098, 78.04878048780488]\n",
"平均準(zhǔn)確度: 76.585%\n",
"隨機(jī)因子= 0.13436424411240122\n",
"決策樹(shù)個(gè)數(shù): 10\n",
"模型得分: [87.8048780487805, 85.36585365853658, 90.2439024390244, 78.04878048780488, 92.6829268292683]\n",
"平均準(zhǔn)確度: 86.829%\n"
]
}
],
"source": [
" sample_size =0.5 # 做決策樹(shù)時(shí)候的樣本的比例\n",
" \n",
" for n_trees in [1, 10]: # 理論上樹(shù)是越多越好\n",
" scores = evaluteAlgorithm(dataSet, randomForest, n_folds, max_depth, min_size, sample_size, n_trees, n_features)\n",
" # 每一次執(zhí)行本文件時(shí)都能產(chǎn)生同一個(gè)隨機(jī)數(shù)\n",
" random.seed(1)\n",
" print('隨機(jī)因子=', random.random()) # 每一次執(zhí)行本文件時(shí)都能產(chǎn)生同一個(gè)隨機(jī)數(shù)\n",
" print('決策樹(shù)個(gè)數(shù): %d' % n_trees) # 輸出決策樹(shù)個(gè)數(shù)\n",
" print('模型得分: %s' % scores) # 輸出五份隨機(jī)樣本的模型得分\n",
" print('平均準(zhǔn)確度: %.3f%%' % (sum(scores)/float(len(scores)))) # 輸出五份隨機(jī)樣本的平均準(zhǔn)確度"
]
}
],
余下見(jiàn)附件

復(fù)制代碼

全部資料51hei下載地址：

隨機(jī)森林例子.zip (99.15 KB, 下載次數(shù): 11)

帳號(hào)		自動(dòng)登錄	找回密碼
密碼			立即注冊(cè)

久久久久久久999_99精品久久精品一区二区爱城_成人欧美一区二区三区在线播放_国产精品日本一区二区不卡视频_国产午夜视频_欧美精品在线观看免费

Python隨機(jī)森林例子源碼分享

評(píng)分

久久久久久久999_99精品久久精品一区二区爱城_成人欧美一区二区三区在线播放_国产精品日本一区二区不卡视频_国产午夜视频_欧美精品在线观看免费

Python隨機(jī)森林例子 源碼分享

評(píng)分

Python隨機(jī)森林例子源碼分享