<span style="font-size:18px;">/***
* @author YangXin
* @info 基于性别的IDRscorer
* 对于在乎性别的用户,IDRscorer能够对物品或用户档案进行过滤。
* 首先,可以先通过检查已经评价过的档案的性别,来猜测该用户所偏好
* 的性别。然后,就可以过滤与之性别相反的档案。
*/
package unitFive;
import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.common.iterator.FileLineIterable;
public class GenderRescorer implements IDRescorer {
/***
* 缓存更对对男性评价的用户
*/
private final FastIDSet men;
private final FastIDSet women;
private final FastIDSet usersRateMoreMen;
private final FastIDSet usersRateLessMen;
private final boolean filterMen;
/**
* 构造函数
*/
public GenderRescorer(FastIDSet men, FastIDSet women, FastIDSet usersRateMoreMen, FastIDSet usersRateLessMen, long userID, DataModel model) throws TasteException{
// TODO Auto-generated constructor stub
this.men = men;
this.women = women;
this.usersRateMoreMen = usersRateMoreMen;
this.usersRateLessMen = usersRateLessMen;
this.filterMen = ratesMoreMen(userID, model);
}
/**
* 解析gender.dat并创建两个档案ID
* */
public static FastIDSet[] parseMenWomen(File genderFile) throws IOException{
FastIDSet men = new FastIDSet(50000);
FastIDSet women = new FastIDSet(50000);
for(String line : new FileLineIterable(genderFile)){
int comma = line.indexOf(',');
char gender = line.charAt(comma + 1);
if(gender == 'U'){
continue;
}
long profileID = Long.parseLong(line.substring(0, comma));
if(gender == 'M'){
men.add(profileID);
}else{
women.add(profileID);
}
}
men.rehash(); //刷新
women.rehash(); //刷新
return new FastIDSet[]{men, women};
}
public boolean ratesMoreMen(long userID, DataModel model) throws TasteException{
if(usersRateMoreMen.contains(userID)){
return true;
}
if(usersRateLessMen.contains(userID)){
return false;
}
PreferenceArray prefs = model.getPreferencesFromUser(userID);
int menCount = 0;
int womenCount = 0;
for(int i = 0; i < prefs.length(); i++){
long profileID = prefs.get(i).getItemID();
if(men.contains(profileID)){
menCount++;
}else if(women.contains(profileID)){
womenCount++;
}
}
boolean ratesMoreMen = menCount > womenCount; //对男性评分的用户可能更喜欢男性
if(ratesMoreMen){
usersRateMoreMen.add(userID);
}else{
usersRateLessMen.add(userID);
}
return ratesMoreMen;
}
@Override
public double rescore(long profileID, double originalScore) {
// TODO Auto-generated method stub
return isFiltered(profileID) ? Double.NaN : originalScore; //将被排除的值赋值为NaN
}
@Override
public boolean isFiltered(long profileID) {
// TODO Auto-generated method stub
return filterMen ? men.contains(profileID) : women.contains(profileID);
}
}
</span>原文:http://blog.csdn.net/u012965373/article/details/50692832