PLA之pocket演算法C 實現

NO IMAGE

#include<bits/stdc .h>
using namespace std;
#define ROWS 100 
#define COLUMNS 10001 
#define MAXITERTIMES 1000
int matrix[ROWS][COLUMNS];
int trainY[ROWS],testY[ROWS],myY[ROWS];
int W[COLUMNS] = {0};//因為多了一個x0;
int bestW[COLUMNS] = {0};
ofstream output;
ifstream Input;
int iterTimes = 0;//迭代次數 
int sign(int x) {return x >= 0 ? 1 : -1; }
void makeUpMatrix(string fileName)
{
int lines = 0;
Input.open(fileName.c_str());
string line;
while(getline(Input,line))
{
stringstream ss;
int columns = 1;
//一行一句話的,所以用stringstream,整行讀入的特性會方便很多 
ss.clear(); 
ss.str(line);//From:http://blog.csdn.net/lanbing510/article/details/42423333
//str()成員函式的使用可以讓istringstream物件返回一個string字串 
int dict; 
matrix[lines][0] = 1;
while(ss>>dict) //讀入每行的資料集內容的詞彙 
{
matrix[lines][columns ] = dict;
}
//讀取完這行詞彙了 
lines ;  //第幾行計數 
}
Input.close();

}
void readLabels()
{
Input.open(“train_labels.txt”);
int lines = 0;
string line;
while(getline(Input,line))
{
stringstream ss;
ss.clear(); 
ss.str(line);
int dict; 
while(ss>>dict)
{
trainY[lines ] = dict;
}
}
Input.close();

Input.open(“test_labels.txt”);
lines = 0;
while(getline(Input,line))
{
stringstream ss;
ss.clear(); 
ss.str(line);
int dict; 
while(ss>>dict)
{
testY[lines ] = dict;
}
}
Input.close();
}

int dotProduct(int *a, int *b,int length) //(二維陣列,w陣列)
{
int ans = 0;
for(int i = 0 ; i < length ;i )
{
ans = (*a) * (*b);
a ;
b ;
}
return ans;

void updateW(int label, int* a) //w可能是W陣列,也可能是bestW陣列 
{
for(int i = 0 ; i < COLUMNS; i )
{
W[i] = W[i] label * (*a);
a ;
}
}
bool isBetter(int label,int *a)
{
int originRightTimes = 0, newRightTimes = 0;
for(int i = 0 ;i < ROWS;i ) 
{
if(sign(dotProduct(matrix[i],W,COLUMNS)) == trainY[i]) originRightTimes ;
//計算原來的W得到的正確次數 
if(sign(dotProduct(matrix[i],bestW,COLUMNS)) == trainY[i]) newRightTimes ;
}
printf(“原正確次數= %d, 後正確次數= %d\n”,originRightTimes,newRightTimes);
if(newRightTimes < originRightTimes) return true;
else return false;
}
void findW()
{
bool ok = false;// 是否找到w陣列了
while((!ok) && (iterTimes < MAXITERTIMES)) //還沒完善 
{
ok = true;
for(int i = 0 ; i < ROWS; i )
{
if(sign( dotProduct(matrix[i],W,COLUMNS) ) != trainY[i])
{
ok = false;
updateW(trainY[i],matrix[i]);
iterTimes ;  //迭代一次 
cout<<iterTimes<<endl; 
if(isBetter(trainY[i],matrix[i])) 
{
for(int i = 0 ; i < COLUMNS;i )
{bestW[i] = W[i];}//更新bestW 陣列 

break;
}


}
void seeW()
{
for(int i = 0 ; i < COLUMNS; i ) output<<W[i]<<” “;
output<<endl;
output<<“iterator times is “<< iterTimes <<endl; 
}
void cal4(int TP,int FN,int FP, int TN)
{
double Accuracy = 1.0 * (TP TN) / (TP FP TN FN);
double Recall = 1.0 * TP / (TP FN);
double Precision= 1.0 * TP / (TP FP);
double  F1 = 2 * Precision * Recall / (Precision Recall);
output<< “Accuracy = “<< Accuracy<<endl;
output<< “Recall = “<<Recall<<endl;
output<< “Precision= “<<Precision<<endl;
output<< “F1 = “<<F1<<endl;
}
void test()
{
int TP = 0, FN = 0, FP = 0, TN = 0;
for(int i = 0 ; i < ROWS; i )
{
myY[i] = sign(dotProduct(matrix[i],W,COLUMNS));
if(testY[i] == 1) //正確答案為1 
{
if(myY[i] == 1) TP ;
else  FN ;
}
else //正確答案為-1
{
if(myY[i] == 1) FP ;
else  TN ;
}
}
cal4(TP,FN,FP,TN);
}
int main()
{
output.open(“myansWer.txt”);
makeUpMatrix(“train_data.txt”);  //將train.data資料儲存在matrix這個二維矩陣 
readLabels();   //讀取train,test的標籤 
findW();//完善W陣列了 
seeW();// 看看找到的W陣列和迭代次數 
makeUpMatrix(“test_data.txt”);//將test.data資料儲存在matrix這個二維矩陣 
test();         //對test進行分類 
output.close();