- %knn 歸一化自寫
- % 把數據分為測試集和訓練集
- % train=[wine(1:30,:);wine(60:89,:);wine(131:160,:)];
- % test=[wine(31:59,:);wine(90:130,:);wine(161:178,:)];
- % train_labels=[wine_labels(1:30,:);wine_labels(60:89,:);wine_labels(131:160,:)];
- % test_labels=[wine_labels(31:59,:);wine_labels(90:130,:);wine_labels(161:178,:)];
- %把標簽加到數據的最后一列
- wine(:,14)=wine_labels;
- %按行隨機打亂數據
- wine_data =wine(randperm(size(wine,1)),:);
- %取前一百條數據給訓練集
- train=wine_data(1:100,1:13);
- train_labels=wine_data(1:100,14);
- %剩下的78條給測試集
- test=wine_data(101:178,1:13);
- test_labels=wine_data(101:178,14);
- %數據歸一化
- h=max(train,[],1);
- l=min(train,[],1);
- for i=1:13
- train(:,i)=(train(:,i)-l(i))./(h(i)-l(i));
- test(:,i)=(test(:,i)-l(i))./(h(i)-l(i));
- end
- %[train,strc]=mapminmax(train');
- %strc歸一化返回的參數 max min
- %test=mapminmax.apply(test',strc);
- %計算測試集與所有訓練集間的距離
- all_distance=dist(test,train');
- %升序排序 B為返回的索引
- [A,B]=sort(all_distance,2);
- %Knn K近鄰
- K=3;
- all_labels=train_labels(B(:,1:K));
- %統計矩陣中出現的類別
- b=unique(all_labels);
- %統計各個類別出現的次數
- c=histc(all_labels',b);
- %新標簽賦值
- [m,predict_labels]=max(c);
- %求準確率
- accuracy=1-(length(find((predict_labels-test_labels')~=0))/length(test_labels))
- %畫圖
- figure;
- hold on;
- plot(test_labels,'o');
- plot(predict_labels,'*');
- xlabel('測試集數據','FontSize',12);
- ylabel('類別標簽','FontSize',12);
- legend('實際測試集分類','預測測試集分類');
- title('測試集的實際分類和預測分類圖','FontSize',12);
- grid on;
- %knn 歸一化使用mapminmax函數進行的
- %knn
- % 把數據分為測試集和訓練集
- % train=[wine(1:30,:);wine(60:89,:);wine(131:160,:)];
- % test=[wine(31:59,:);wine(90:130,:);wine(161:178,:)];
- % train_labels=[wine_labels(1:30,:);wine_labels(60:89,:);wine_labels(131:160,:)];
- % test_labels=[wine_labels(31:59,:);wine_labels(90:130,:);wine_labels(161:178,:)];
- %把標簽加到數據的最后一列
- wine(:,14)=wine_labels;
- %按行隨機打亂數據
- wine_data =wine(randperm(size(wine,1)),:);
- %取前一百條數據給訓練集
- train=wine_data(1:100,1:13);
- train_labels=wine_data(1:100,14);
- %剩下的78條給測試集
- test=wine_data(101:178,1:13);
- test_labels=wine_data(101:178,14);
- %數據歸一化
- [train,strc]=mapminmax(train');
- %strc歸一化返回的參數 max min
- test=mapminmax.apply(test',strc);
- %計算測試集與所有訓練集間的距離
- all_distance=dist(test',train);
- %升序排序 B為返回的索引
- [A,B]=sort(all_distance,2);
- %Knn K近鄰
- K=5;
- all_labels=train_labels(B(:,1:K));
- %統計矩陣中出現的類別
- b=unique(all_labels);
- %統計各個類別出現的次數
- c=histc(all_labels',b);
- %新標簽賦值
- [~,predict_labels]=max(c);
- %求準確率
- accuracy=1-(length(find((predict_labels-test_labels')~=0))/length(test_labels))
- %畫圖
- figure;
- hold on;
- plot(test_labels,'o');
- plot(predict_labels,'*');
- xlabel('測試集數據','FontSize',12);
- ylabel('類別標簽','FontSize',12);
- legend('實際測試集分類','預測測試集分類');
- title('測試集的實際分類和預測分類圖','FontSize',12);
- grid on;
復制代碼
|