from configparser import ConfigParser from concurrent.futures import ThreadPoolExecutor import pymysql import math import time def get_db_conn(): return pymysql.connect( host=host, user=user, password=password, database=database, port=port, autocommit=True, charset="utf8mb4") def get_train(): print("获取训练数据") con = get_db_conn() cursor = con.cursor() cursor.execute("SELECT user_id, book_id FROM kid_user_read_book_for_reading_recommend %s" % limit) results = cursor.fetchall() ret = dict() for row in results: user_id = row[0] book_id = row[1] ret.setdefault(user_id, {}) ret[user_id][book_id] = 1 con.close() return ret def take_second(elem): return elem[1] def item_similarity(): print("计算物品-物品的共同矩阵") index = 1 count = len(train) for u, items in train.items(): print("进度:%s/%s" % (index, count)) index += 1 for i in items.keys(): n.setdefault(i, 0) n[i] += 1 c.setdefault(i, {}) for j in items.keys(): if i == j: continue c[i].setdefault(j, 0) c[i][j] += 1 # 计算相似度矩阵 con = get_db_conn() cursor = con.cursor() cursor.execute("DROP TABLE IF EXISTS tmp_reading_recommend_for_you_by_read") cursor.execute("CREATE TABLE tmp_reading_recommend_for_you_by_read LIKE kid_reading_recommend_for_you_by_read") con.close() print("计算相似度矩阵") global total_count total_count = len(c) with ThreadPoolExecutor(300) as executor: for book_id, related_items in c.items(): executor.submit(save, book_id, related_items) con = get_db_conn() cursor = con.cursor() cursor.execute("DROP TABLE kid_reading_recommend_for_you_by_read") cursor.execute("RENAME TABLE tmp_reading_recommend_for_you_by_read TO kid_reading_recommend_for_you_by_read") con.close() def save(book_id, related_items): sorted_list = [] for j, cij in related_items.items(): sorted_list.append((j, cij / math.sqrt(n[book_id] * n[j]))) sorted_list.sort(key=take_second, reverse=True) sorted_list = sorted_list[:save_top_count] sql = "INSERT INTO tmp_reading_recommend_for_you_by_read(book_id,similar_book_id,value) VALUES(" + str( book_id) + ",%s,%s)" con = get_db_conn() con.cursor().executemany(sql, sorted_list) con.close() global finish_count finish_count += 1 print("进度:%s/%s" % (finish_count, total_count)) start_time = time.strftime("%H:%M:%S", time.localtime()) limit = "" save_top_count = 30 finish_count = 0 total_count = 0 cf = ConfigParser() cf.read("../py_config.ini") host = cf.get("mysql", "host") user = cf.get("mysql", "user") password = cf.get("mysql", "password") database = cf.get("mysql", "db") port = cf.getint("mysql", "port") conn = get_db_conn() conn.cursor().callproc("计算用户读过的书(为你推荐)") conn.close() # 物品-物品的共同矩阵 c = dict() # 物品被多少个不同用户购买 n = dict() train = get_train() item_similarity() conn = get_db_conn() conn.cursor().callproc("calcReadingRecommendForYou", [1]) conn.close() end_time = time.strftime("%H:%M:%S", time.localtime()) print("运行结束,开始于 %s,结束于 %s" % (start_time, end_time))
分享一段基于物品的协同推荐(ItemCF)代码:根据用户读的书推荐图书
原文:https://www.cnblogs.com/ilovejesus/p/12491089.html