python类add_word()的实例源码-面圈网

MachineLearning.py 文件源码项目：entity-linker 作者: seucs 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def getData(Mentions,S,E,contextMention,contextEntity, id):
    for mention in Mentions:
        jieba.add_word(mention.name)

        S.append(mention.name)
        id.append('-') #????????????
        contextMention[mention.name] = mention.context
    for item in Mentions:
        temp = []
        cnt = 0
        for candidate in item.candidates:
            if cnt > 100:
                break
            cnt += 1
            temp.append(candidate.title)
            contextEntity[candidate.title] = candidate.context

            id.append(candidate.id)
        E.append(temp)

# element{???mention/entity}
# ??element

gen_dataset.py 文件源码项目：KnowledgeGraph 作者: SilverHelmet 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test():
    x = u"??????????????????Nintendo???2006-11-02???????????????????????????3???????????????????????????????????????"
    x = u'???????????????'
    sentences = split_sentences(x)
    # jieba.add_word(u'????????', 5, 'baike')
    # jieba.add_word(u'Nintendo', 5, 'baike')
    # jieba.add_word(u'????', 5, 'baike')
    # jieba.add_word(u'???', 5, 'baike')
    # jieba.add_word(u'????', 5, 'baike')

    # name2fb_path = os.path.join(cache_dir, 'DatasetFinder.name2fb.sample.cache')
    # fb_ttls_path = os.path.join(cache_dir, 'DatasetFinder.fb_ttls.sample.cache')
    name2fb_path = os.path.join(cache_dir, 'DatasetFinder.name2fb.cache')
    fb_ttls_path = os.path.join(cache_dir, 'DatasetFinder.fb_ttls.cache')
    finder = DatasetFinder.load_from_cache(name2fb_path, fb_ttls_path)
    for x in gen_dataset(sentences[0], finder):
        print x
    print '-' * 50
    for x in gen_dataset(sentences[1], finder):
        print x

jieba_call.py 文件源码项目：HtmlExtract-Python 作者: xinyi-spark 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def add_word_dict(word, freq=None, tag=None):
    '''
    ?????????
    '''
    jieba.add_word(word, freq=None, tag=None)

matcher.py 文件源码项目：PTTChatBot_DL2017 作者: thisray 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def jiebaCustomSetting(self, dict_path, usr_dict_path):

        jieba.set_dictionary(dict_path)
        with open(usr_dict_path, 'r', encoding='utf-8') as dic:
            for word in dic:
                jieba.add_word(word.strip('\n'))

matcher.py 文件源码项目：PTTChatBot_DL2017 作者: thisray 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def TaibaCustomSetting(self, usr_dict):

        with open(usr_dict, 'r', encoding='utf-8') as dic:
            for word in dic:
                Taiba.add_word(word.strip('\n'))

common_lib.py 文件源码项目：FineGrainedOpinionMining 作者: chaoming0625 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word(u"??", 10000)
    jieba.suggest_freq((u"?", u"??"))
    jieba.suggest_freq((u"??", u"??"))
    jieba.suggest_freq((u"??", u"??"))
    jieba.suggest_freq((u"??", u"?"))

common_lib.py 文件源码项目：FineGrainedOpinionMining 作者: chaoming0625 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word("??", 10000)
    jieba.suggest_freq(("?", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "?"))

data_preprocess.py 文件源码项目：Neural-Headline-Generator-CN 作者: QuantumLiu 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def cut(text,custom_words=['FLOAT','TIME','DATE','EOS']):
    jieba.enable_parallel(32)
    for word in custom_words:
        jieba.add_word(word)
    words=jieba.lcut(text)
    return words

FOOD_command.py 文件源码项目：slack_emoji_bot 作者: linnil1 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def __init__(self, slack, custom):
        self.slack = slack
        self.rundata = custom['data']
        self.colorPrint = custom['colorPrint']

        self.food_dir = "data/midnight.json"
        self.food_dic = "data/dict.txt.big"

        # find midnight channel
        self.nochannel = False
        rep = self.slack.api_call("channels.list")
        self.channel_id = ""
        for c in rep['channels']:
            if c['name'].lower() == custom['food_channelname']:
                self.channel_id = c['id']
                break
        if not self.channel_id:
            self.colorPrint(
                "No midnight channel",
                "Restart when midnight channel can use",
                color="FAIL")
            self.nochannel = True
            return

        jieba.set_dictionary(self.food_dic)
        jieba.initialize()

        # add and del words
        for word in self.rundata.get('FOOD_addword'):
            jieba.add_word(word)
        for word in self.rundata.get('FOOD_delword'):
            jieba.del_word(word)

        self.init()

senti_rule.py 文件源码项目：TextClassification 作者: mosu027 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def __init__(self):
        self.negative = []
        self.adverb = []
        self.questionMark = []
        self.rootPath = "E:\workout\data\senitment_data"
        self.wordtypeDict, self.wordfreqDict = self.UserDefineLibrary()
        for word in self.wordfreqDict.keys():
            jieba.add_word(str(word))
        self.initialize()

    # @staticmethod

matcher.py 文件源码项目：PTT-Chat-Generator 作者: zake7749 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def jiebaCustomSetting(self, dict_path, usr_dict_path):

        jieba.set_dictionary(dict_path)
        with open(usr_dict_path, 'r', encoding='utf-8') as dic:
            for word in dic:
                jieba.add_word(word.strip('\n'))

matcher.py 文件源码项目：PTT-Chat-Generator 作者: zake7749 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def TaibaCustomSetting(self, usr_dict):

        with open(usr_dict, 'r', encoding='utf-8') as dic:
            for word in dic:
                Taiba.add_word(word.strip('\n'))

common_lib.py 文件源码项目：WaiMaiOpinionMiner 作者: chaoming0625 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def __init():
    user_dict_path = os.path.join(root_filepath, "f_seg/user_dict.txt")
    jieba.load_userdict(user_dict_path)
    jieba.add_word("??", 10000)
    jieba.suggest_freq(("?", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "??"))
    jieba.suggest_freq(("??", "?"))

FOOD_command.py 文件源码项目：slack_emoji_bot 作者: linnil1 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def main(self, datadict):
        if self.nochannel:
            return
        if datadict['type'] == 'message' and \
                datadict.get('subtype') == "file_share" and \
                datadict.get('channel') == self.channel_id:
            self.imageAdd(datadict['file'])
        if not datadict['type'] == 'message' or 'subtype' in datadict:
            return
        if datadict['text'].startswith("food "):
            text = re.search(
                r"(?<=food ).*", datadict['text'], re.DOTALL).group().strip()

            payload = {
                "username": "?? Midnight",
                "icon_emoji": ":_e9_a3_9f:",
                "thread_ts": datadict.get("thread_ts")or'',
                "channel": datadict['channel']}

            try:
                ans = self.wordSearch(text)

                self.slack.api_call("chat.postMessage",
                                    attachments=[self.wordParse(ans)],
                                    **payload
                                    )
            except BaseException:
                self.slack.api_call("chat.postMessage",
                                    text="Sorry Not Found",
                                    **payload
                                    )

        elif datadict['text'].startswith("foodadd "):
            text = re.search(r"(?<=foodadd ).*",
                             datadict['text']).group().strip()
            jieba.add_word(text)
            self.rundata.append("FOOD_addword", text)
            self.init()
        elif datadict['text'].startswith("fooddel "):
            text = re.search(r"(?<=fooddel ).*",
                             datadict['text']).group().strip()
            jieba.del_word(text)
            self.rundata.append("FOOD_delword", text)
            self.init()