2. SGD训练
SGD权重更新方式,同LR二分类的基本相同;所不同的是,二分类LR只用训练一个权重向量,而K分类LR需要训练K-1个权重向量。函数接口如下:
// train by SGD on the sample file bool TrainSGDOnSampleFile ( const char * sFileName, int iClassNum, int iFeatureNum, // about the samples double dLearningRate, // about the learning int iMaxLoop, double dMinImproveRatio // about the stop criteria );
// initialize the theta matrix with iClassNum and iFeatureNum bool InitThetaMatrix (int iClassNum, int iFeatureNum); // calculate the model function output for iClassIndex by feature vector double CalcFuncOutByFeaVec (vector<FeaValNode> & FeaValNodeVec, int iClassIndex); // calculate the model function output for all the classes, and return the class index with max probability int CalcFuncOutByFeaVecForAllClass (vector<FeaValNode> & FeaValNodeVec, vector<double> & ClassProbVec); // calculate the gradient and update the theta matrix, it returns the cost double UpdateThetaMatrix (Sample & theSample, vector<double> & ClassProbVec, double dLearningRate);
SGD的函数实现代码如下:
// the sample format: classid feature1_value feature2_value...
bool LogisticRegression::TrainSGDOnSampleFile (
const char * sFileName, int iClassNum, int iFeatureNum, // about the samples
double dLearningRate = 0.05, // about the learning
int iMaxLoop = 1, double dMinImproveRatio = 0.01 // about the stop criteria
)
{
ifstream in (sFileName);
if (!in)
{
cerr << "Can not open the file of " << sFileName << endl;
return false;
}
if (!InitThetaMatrix (iClassNum, iFeatureNum))
return false;
double dCost = 0.0;
double dPreCost = 100.0;
for (int iLoop = 0; iLoop < iMaxLoop; iLoop++)
{
int iSampleNum = 0;
int iErrNum = 0;
string sLine;
while (getline (in, sLine))
{
Sample theSample;
if (ReadSampleFrmLine (sLine, theSample))
{
vector<double> ClassProbVec;
int iPredClassIndex = CalcFuncOutByFeaVecForAllClass (theSample.FeaValNodeVec, ClassProbVec);
if (iPredClassIndex != theSample.iClass)
iErrNum++;
dCost += UpdateThetaMatrix (theSample, ClassProbVec, dLearningRate);
iSampleNum++;
}
}
dCost /= iSampleNum;
double dTmpRatio = (dPreCost - dCost) / dPreCost;
double dTmpErrRate = (double)iErrNum / iSampleNum;
// show info on screen
cout << "In loop " << iLoop << ": current cost (" << dCost << ") previous cost (" << dPreCost << ") ratio (" << dTmpRatio << ") "<< endl;
cout << "And Error rate : " << dTmpErrRate << endl;
if (dTmpRatio < dMinImproveRatio)
break;
else
{
dPreCost = dCost;
dCost = 0.0;
//reset the current reading position of file
in.clear();
in.seekg (0, ios::beg);
}
}
return true;
}// it returns the value of f(x) = exp (W*X) for iClassIndex < K, otherwise 1.0 for iClassIndex == K
double LogisticRegression::CalcFuncOutByFeaVec(vector<FeaValNode> & FeaValNodeVec, int iClassIndex)
{
if (iClassIndex >= iClassNum || iClassIndex <0) // wrong situation
return 0.0;
if (iClassIndex == (iClassNum-1) ) // the default class (here is the class with max index)
return 1.0;
double dX = 0.0;
vector<FeaValNode>::iterator p = FeaValNodeVec.begin();
while (p != FeaValNodeVec.end())
{
if (p->iFeatureId < (int)ThetaMatrix.at(iClassIndex).size()) // all input is evil
dX += ThetaMatrix[iClassIndex][p->iFeatureId] * p->dValue;
p++;
}
double dY = exp (dX);
return dY;
}// the class probability is calculated by :
// f(x) = exp (W*X) / {1.0 + sum_exp (W*X)} as long as iClassIndex < K
// f(x) = 1.0 / {1.0 + sum_exp (W*X)} as long as iClassIndex == K
int LogisticRegression::CalcFuncOutByFeaVecForAllClass (vector<FeaValNode> & FeaValNodeVec, vector<double> & ClassProbVec)
{
ClassProbVec.clear();
ClassProbVec.resize (iClassNum, 0.0);
double dSum = 1.0;
for (int i=0; i<iClassNum; i++)
{
ClassProbVec.at(i) = CalcFuncOutByFeaVec (FeaValNodeVec, i);
dSum += ClassProbVec.at(i);
}
double dMaxProb = 0.0;
int iClassMaxProb = -1;
for (int i=0; i<iClassNum; i++)
{
ClassProbVec.at(i) /= dSum;
if (ClassProbVec.at(i) > dMaxProb)
iClassMaxProb = i;
}
return iClassMaxProb;
}计算出的概率实际上是softmax概率。权重更新函数:// the update formula is : theta_new = theta_old - dLearningRate * (dY - iClass) * dXi
double LogisticRegression::UpdateThetaMatrix (Sample & theSample, vector<double> & ClassProbVec, double dLearningRate)
{
double dCost = 0.0;
for (int i=0; i<iClassNum-1; i++)
{
if (i == theSample.iClass)
{
vector<FeaValNode>::iterator p = theSample.FeaValNodeVec.begin();
while (p != theSample.FeaValNodeVec.end())
{
if (p->iFeatureId < (int)ThetaMatrix[i].size())
{
double dGradient = (ClassProbVec[i] - 1.0) * p->dValue;
double dDelta = dGradient * dLearningRate;
ThetaMatrix[i][p->iFeatureId] -= dDelta;
}
p++;
}
// cost = log(dY) when the sample class is the predicted class, otherwise cost = log(1.0 - dY)
dCost -= log (ClassProbVec[i]);
}
else
{
vector<FeaValNode>::iterator p = theSample.FeaValNodeVec.begin();
while (p != theSample.FeaValNodeVec.end())
{
if (p->iFeatureId < (int)ThetaMatrix[i].size())
{
double dGradient = ClassProbVec[i] * p->dValue;
double dDelta = dGradient * dLearningRate;
ThetaMatrix[i][p->iFeatureId] -= dDelta;
}
p++;
}
// cost = log(dY) when the sample class is the predicted class, otherwise cost = log(1.0 - dY)
dCost -= log (1.0 - ClassProbVec[i]);
}
}
return dCost;
}
转载请注明出处:http://blog.csdn.net/xceman1997/article/details/18449317
第三次实现Logistic Regression(c++)_实现(二)
原文:http://blog.csdn.net/xceman1997/article/details/18449317