python 按照jieba分词后,再根据字典的词频进行排序输出

dict.txt

迅雷不及掩耳盗铃之势 1 掩耳盗铃 2 铃儿响叮当 3 当仁不让 5 让世界充满爱 3 让世界充满爱 5 迅雷不及 0 迅雷 0 掩耳 0 盗铃 0 

实现代码

# -*- ecoding: utf-8 -*- # @ModuleName: test002 # @Function:  # @Author: darling # @Time: 2022-05-05 20:01 import jieba from loguru import logger  jieba.set_dictionary(./dict.txt)  # 指定dict.txt加载路径,为了方便部署,使用相对路径。 jieba.initialize()  # jieba库初始化。   def read_headers(file_name):     with open(file_name, 'r', encoding='UTF-8') as file:         lines = file.readlines()         for line in lines:             if line == '\n':                 continue             # 这里用的是[: ]进行拆分,因为value中也会存在冒号             ll = line.split(' ')             key = ll[0].strip()             val = ll[1].strip().replace('\n', '')             if val == '0':                 continue             header[key] = int(val)     return header   # 拆词 def split_words(title):     split_dic = {}     words = []     try:         logger.info('输入的标题为:{}', title)         lcuts = jieba.lcut(title, cut_all=True)         # 排除单个字的,保留两个字以上的词语         list_lcuts = [x for x in lcuts if len(x) >= 2]         # 去重复性词语         list_lcuts = list(set(list_lcuts))         logger.info('lcuts可拆分为:{}', list_lcuts)         for list_lcut in list_lcuts:             # 获得字典的词频 词频不唯一             val = header.get(list_lcut)             # 生成新的字典             split_dic[list_lcut] = val         new_dic = sorted(split_dic.items(), key=lambda x: -x[1])         print(new_dic)         # words = [x for x in new_dic if x[0]]         for splits in new_dic:             words.append(splits[0])         logger.info('词频级排序后:{}', words)         # return list_words     except Exception as e:         logger.info('拆词异常:{}', e)   def sort1(arr):     return arr.sort()   if __name__ == __main__:     header = {}     read_headers('./dict.txt')     split_words('迅雷不及掩耳盗铃儿响叮当仁不让世界充满爱之势,迅雷不及掩耳盗铃之势') 

拓展

对字典,列表进行排序,升序/降序的方式