首页 > 编程语言 > 详细

Redis实现搜索和排序

时间:2020-06-03 09:35:22      阅读:57      评论:0      收藏:0      [点我收藏+]

明日更新文字。

 

建立反向索引

基于文件建立单词与文档的反向索引,使用集合存储。

# # #!/usr/bin/env python
# # # -*- coding: UTF-8 -*-
import jieba
import codecs
import redis
import uuid
#分词
def cut_words(file):
    with open(file, r,encoding="utf-8") as f:
        text = f.read()
        words = jieba.cut_for_search(text)
        #print(len(words),words) #查看分词结果
    return words

#去停用词
def drop_Disable_Words(cut_res,stopwords):
    res = []
    for word in cut_res:
        if(len(word)) > 2:
            if word in stopwords or word =="\n" or word =="\u3000":
                continue
            res.append(word)
    #print(len(res),res) #查看去停用词结果
    return res

#读取停用词
def read_stop_word(file_path):
    file = file_path
    stopwords = codecs.open(file,r,encoding=utf8).readlines()
    stopwords = [ w.strip() for w in stopwords ]
    return stopwords

#建立反向索引
def index_document(conn,docid,keywords):
    pipe = conn.pipeline(True) #管道里执行的命令可以保证执行的原子性
    for keyword in keywords:
        pipe.sadd(keyword,docid)
    return len(pipe.execute())


def _set_conmon(conn,method,names,ttl = 30,execute = True):
    id = str(uuid.uuid4())

#读取原始语料、停用词表
files = [file1.txt,file2.txt]
stopwords = read_stop_word("stop_word.txt")

dic = {}
#分词、去停用词
corpus = []
for file in files:
    #分词
    cut_res = cut_words(file)
    #去停用词
    res = drop_Disable_Words(cut_res,stopwords)
    #记录页标和位置
    corpus.append(res)
print(corpus)
pool = redis.ConnectionPool(host=localhost, password=lin@Wen.,port=6379, decode_responses=True)
conn = redis.Redis(connection_pool=pool)
pipeline = conn.pipeline(True)
for i in range(0,len(corpus)):
    for word in corpus[i]:
        pipeline.sadd(idx:+word,files[i])
    print(len(pipeline.execute()))

对单词进行搜索

#搜索
def _set_common(conn,method,names,ttl=30,execute =True):
    id = str(uuid.uuid4())
    pipeline = conn.pipeline(True)
    names = [idx: + name for name in names]
    getattr(pipeline,method)(idx: + id,*names)
    pipeline.expire(idx: + id,ttl)
    if execute:
        print(pipeline.execute())
    return id
#交集计算
def intersect(conn,items,ttl = 30,_execute=True):
    return _set_common(conn,sinterstore,items,ttl,_execute)
#并集计算
def union(conn,items,ttl = 30,_execute=True):
    return _set_common(conn,sunionstore,items,ttl,_execute)
#差集计算
def difference(conn,items,ttl = 30,_execute=True):
    return _set_common(conn,sdiffstore,items,ttl,_execute)

names = ["DirectX","Unity3D","STL"]
pool = redis.ConnectionPool(host=localhost, password=lin@Wen.,port=6379, decode_responses=True)
conn = redis.Redis(connection_pool=pool)
id = union(conn,names)
print(id)
print(conn.smembers(idx:+id))
# redis.exceptions.ResponseError: WRONGTYPE Operation against a key holding the wrong kind of value
# 错误原因:
# redis的存储数据的类型和代码试图读取该数据时使用的函数不一致.
# print(conn.sunion("idx:DirectX", "idx:STL"))

 

Redis实现搜索和排序

原文:https://www.cnblogs.com/-wenli/p/13034835.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!