首页 > 其他 > 详细

分享一段基于物品的协同推荐(ItemCF)代码:根据用户读的书推荐图书

时间:2020-03-14 12:42:28      阅读:86      评论:0      收藏:0      [点我收藏+]
from configparser import ConfigParser
from concurrent.futures import ThreadPoolExecutor
import pymysql
import math
import time


def get_db_conn():
    return pymysql.connect(
        host=host,
        user=user,
        password=password,
        database=database,
        port=port,
        autocommit=True,
        charset="utf8mb4")


def get_train():
    print("获取训练数据")

    con = get_db_conn()
    cursor = con.cursor()
    cursor.execute("SELECT user_id, book_id FROM kid_user_read_book_for_reading_recommend %s" % limit)
    results = cursor.fetchall()

    ret = dict()
    for row in results:
        user_id = row[0]
        book_id = row[1]
        ret.setdefault(user_id, {})
        ret[user_id][book_id] = 1

    con.close()
    return ret


def take_second(elem):
    return elem[1]


def item_similarity():
    print("计算物品-物品的共同矩阵")
    index = 1
    count = len(train)
    for u, items in train.items():
        print("进度:%s/%s" % (index, count))
        index += 1

        for i in items.keys():
            n.setdefault(i, 0)
            n[i] += 1
            c.setdefault(i, {})
            for j in items.keys():
                if i == j:
                    continue
                c[i].setdefault(j, 0)
                c[i][j] += 1

    # 计算相似度矩阵
    con = get_db_conn()
    cursor = con.cursor()
    cursor.execute("DROP TABLE IF EXISTS tmp_reading_recommend_for_you_by_read")
    cursor.execute("CREATE TABLE tmp_reading_recommend_for_you_by_read LIKE kid_reading_recommend_for_you_by_read")
    con.close()

    print("计算相似度矩阵")
    global total_count
    total_count = len(c)
    with ThreadPoolExecutor(300) as executor:
        for book_id, related_items in c.items():
            executor.submit(save, book_id, related_items)

    con = get_db_conn()
    cursor = con.cursor()
    cursor.execute("DROP TABLE kid_reading_recommend_for_you_by_read")
    cursor.execute("RENAME TABLE tmp_reading_recommend_for_you_by_read TO kid_reading_recommend_for_you_by_read")
    con.close()


def save(book_id, related_items):
    sorted_list = []
    for j, cij in related_items.items():
        sorted_list.append((j, cij / math.sqrt(n[book_id] * n[j])))
        sorted_list.sort(key=take_second, reverse=True)
        sorted_list = sorted_list[:save_top_count]

    sql = "INSERT INTO tmp_reading_recommend_for_you_by_read(book_id,similar_book_id,value) VALUES(" + str(
        book_id) + ",%s,%s)"
    con = get_db_conn()
    con.cursor().executemany(sql, sorted_list)
    con.close()

    global finish_count
    finish_count += 1
    print("进度:%s/%s" % (finish_count, total_count))


start_time = time.strftime("%H:%M:%S", time.localtime())

limit = ""
save_top_count = 30

finish_count = 0
total_count = 0

cf = ConfigParser()
cf.read("../py_config.ini")
host = cf.get("mysql", "host")
user = cf.get("mysql", "user")
password = cf.get("mysql", "password")
database = cf.get("mysql", "db")
port = cf.getint("mysql", "port")

conn = get_db_conn()
conn.cursor().callproc("计算用户读过的书(为你推荐)")
conn.close()

# 物品-物品的共同矩阵
c = dict()
# 物品被多少个不同用户购买
n = dict()
train = get_train()
item_similarity()

conn = get_db_conn()
conn.cursor().callproc("calcReadingRecommendForYou", [1])
conn.close()

end_time = time.strftime("%H:%M:%S", time.localtime())
print("运行结束,开始于 %s,结束于 %s" % (start_time, end_time))

分享一段基于物品的协同推荐(ItemCF)代码:根据用户读的书推荐图书

原文:https://www.cnblogs.com/ilovejesus/p/12491089.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!