python类tag()的实例源码

paraphrase.py 文件源码 项目:textfool 作者: bogdan-kulynych 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _synonym_prefilter_fn(token, synonym):
    '''
    Similarity heuristics go here
    '''
    if  (len(synonym.text.split()) > 2) or \
        (synonym.lemma == token.lemma) or \
        (synonym.tag != token.tag) or \
        (token.text.lower() == 'be'):
        return False
    else:
        return True
relextract.py 文件源码 项目:Price-Comparator 作者: Thejas-1 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def _join(lst, sep=' ', untag=False):
    """
    Join a list into a string, turning tags tuples into tag strings or just words.
    :param untag: if ``True``, omit the tag from tagged input strings.
    :type lst: list
    :rtype: str
    """
    try:
        return sep.join(lst)
    except TypeError:
        if untag:
            return sep.join(tup[0] for tup in lst)
        from nltk.tag import tuple2str
        return sep.join(tuple2str(tup) for tup in lst)
quiz.py 文件源码 项目:polyglot-quiz 作者: erkanay 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def map_words(self, _text):
        mapping = defaultdict(list)
        tagged_words = pos_tag(set(self.get_words(_text)))
        for word, tag in tagged_words:
            mapping[tag].append(word)
        return mapping
relextract.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def _join(lst, sep=' ', untag=False):
    """
    Join a list into a string, turning tags tuples into tag strings or just words.
    :param untag: if ``True``, omit the tag from tagged input strings.
    :type lst: list
    :rtype: str
    """
    try:
        return sep.join(lst)
    except TypeError:
        if untag:
            return sep.join(tup[0] for tup in lst)
        from nltk.tag import tuple2str
        return sep.join(tuple2str(tup) for tup in lst)
relextract.py 文件源码 项目:neighborhood_mood_aws 作者: jarrellmark 项目源码 文件源码 阅读 60 收藏 0 点赞 0 评论 0
def _join(lst, sep=' ', untag=False):
    """
    Join a list into a string, turning tags tuples into tag strings or just words.
    :param untag: if ``True``, omit the tag from tagged input strings.
    :type lst: list
    :rtype: str
    """
    try:
        return sep.join(lst)
    except TypeError:
        if untag:
            return sep.join(tup[0] for tup in lst)
        from nltk.tag import tuple2str
        return sep.join(tuple2str(tup) for tup in lst)
relextract.py 文件源码 项目:hate-to-hugs 作者: sdoran35 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _join(lst, sep=' ', untag=False):
    """
    Join a list into a string, turning tags tuples into tag strings or just words.
    :param untag: if ``True``, omit the tag from tagged input strings.
    :type lst: list
    :rtype: str
    """
    try:
        return sep.join(lst)
    except TypeError:
        if untag:
            return sep.join(tup[0] for tup in lst)
        from nltk.tag import tuple2str
        return sep.join(tuple2str(tup) for tup in lst)
relextract.py 文件源码 项目:FancyWord 作者: EastonLee 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _join(lst, sep=' ', untag=False):
    """
    Join a list into a string, turning tags tuples into tag strings or just words.
    :param untag: if ``True``, omit the tag from tagged input strings.
    :type lst: list
    :rtype: str
    """
    try:
        return sep.join(lst)
    except TypeError:
        if untag:
            return sep.join(tup[0] for tup in lst)
        from nltk.tag import tuple2str
        return sep.join(tuple2str(tup) for tup in lst)
relextract.py 文件源码 项目:beepboop 作者: nicolehe 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _join(lst, sep=' ', untag=False):
    """
    Join a list into a string, turning tags tuples into tag strings or just words.
    :param untag: if ``True``, omit the tag from tagged input strings.
    :type lst: list
    :rtype: str
    """
    try:
        return sep.join(lst)
    except TypeError:
        if untag:
            return sep.join(tup[0] for tup in lst)
        from nltk.tag import tuple2str
        return sep.join(tuple2str(tup) for tup in lst)
relextract.py 文件源码 项目:kind2anki 作者: prz3m 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def _join(lst, sep=' ', untag=False):
    """
    Join a list into a string, turning tags tuples into tag strings or just words.
    :param untag: if ``True``, omit the tag from tagged input strings.
    :type lst: list
    :rtype: str
    """
    try:
        return sep.join(lst)
    except TypeError:
        if untag:
            return sep.join(tup[0] for tup in lst)
        from nltk.tag import tuple2str
        return sep.join(tuple2str(tup) for tup in lst)
relextract.py 文件源码 项目:but_sentiment 作者: MixedEmotions 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def _join(lst, sep=' ', untag=False):
    """
    Join a list into a string, turning tags tuples into tag strings or just words.
    :param untag: if ``True``, omit the tag from tagged input strings.
    :type lst: list
    :rtype: str
    """
    try:
        return sep.join(lst)
    except TypeError:
        if untag:
            return sep.join(tup[0] for tup in lst)
        from nltk.tag import tuple2str
        return sep.join(tuple2str(tup) for tup in lst)
glue.py 文件源码 项目:but_sentiment 作者: MixedEmotions 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def lookup(self, node, depgraph, counter):
        semtype_names = self.get_semtypes(node)

        semtype = None
        for name in semtype_names:
            if name in self:
                semtype = self[name]
                break
        if semtype is None:
            # raise KeyError, "There is no GlueDict entry for sem type '%s' (for '%s')" % (sem, word)
            return []

        self.add_missing_dependencies(node, depgraph)

        lookup = self._lookup_semtype_option(semtype, node, depgraph)

        if not len(lookup):
            raise KeyError(
                "There is no GlueDict entry for sem type of '%s' "
                "with tag '%s', and rel '%s'" %
                (node['word'], node['tag'], node['rel'])
                )

        return self.get_glueformulas_from_semtype_entry(lookup, node['word'], node, depgraph, counter)
rer_build_history.py 文件源码 项目:Intelligent-Phone-Salesman 作者: ShruthiChari 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def postagger(sent):
    text = nltk.word_tokenize(sent)
    posTagged = pos_tag(text)
    #simplifiedTags = [map_tag('en-ptb', 'universal', tag) for word, tag in posTagged]
    return posTagged
old.py 文件源码 项目:pythainlp 作者: PyThaiNLP 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def tag(text):
    """
    ?????????? ''list'' ?????????? ''list'' ???? [('???????', '??????')]"""
    tagger = nltk.tag.UnigramTagger(model=data())# backoff=default_tagger)
    return tagger.tag(text)
phrasemachine.py 文件源码 项目:phrasemachine 作者: slanglab 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def logmsg(s):
    # would be better to use python logger
    print>>sys.stderr, "[phrasemachine] %s" % s

############## SimpleNP
## Uses a five-tag coarse grammar.
## tagset: A D P N O

# Requires conversion from PTB or Petrov/Gimpel tags to our system.
# "Coarse*" indicates petrov/gimpel
# Grammar change from the FST version: can't repeat NUM in both adj and noun.
phrasemachine.py 文件源码 项目:phrasemachine 作者: slanglab 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def coarse_tag_str(pos_seq):
    """Convert POS sequence to our coarse system, formatted as a string."""
    global tag2coarse
    tags = [tag2coarse.get(tag,'O') for tag in pos_seq]
    return ''.join(tags)

# POS extraction assuming list of POS tags as input.
# >>> pyre.extract_finditer(["VB","JJ","NN","NN","QQ","QQ",])
# [(1, 4)]
# >>> pyre.extract_ngram_filter(["VB","JJ","NN","NN","QQ","QQ",])
# [(1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]
phrasemachine.py 文件源码 项目:phrasemachine 作者: slanglab 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def tag_text(self, text):
        '''take input text and return tokens w/ part of speech tags using NLTK'''
        # putting import here instead of top of file b.c. not all will have nltk installed

        sents = self.sent_detector.tokenize(text)    # TODO: this will fail on some unicode chars. I think assumes ascii
        word_pos_pairs = []

        all_tokens = []
        for sent in sents:
            tokens = self.tokenize(sent)
            all_tokens = all_tokens + tokens
            word_pos_pairs = word_pos_pairs + self.tagger.tag(tokens)
        return {'tokens': all_tokens, 'pos': [tag for (w,tag) in word_pos_pairs]}
feature_construction.py 文件源码 项目:Automatic-Question-Generation 作者: bwanglzu 项目源码 文件源码 阅读 53 收藏 0 点赞 0 评论 0
def _ner_features(self, row):
        """Name entity recognition features
        - Args:
            row(pandas.dataframe): dataframe of current row
        - Returns:
            row(pandas.dataframe): result a pandas dataframe with new feature
        """
        answer = row.Answer
        question = row.Question
        if answer is not None and question is not None:
            sentence_len = len(row.Sentence.split())
            ners_answer = self.st.tag(answer.split())
            ners_question = self.st.tag(question.split())
            ner_values_answer = [v for k, v in ners_answer if v in [
                'PERSON', 'ORGANIZATION', 'LOCATION']]
            ner_values_question = [v for k, v in ners_question if v in [
                'PERSON', 'ORGANIZATION', 'LOCATION']]
        else:
            return None
        # NER IN ANSWER
        if 'PERSON' in ner_values_answer:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_PERS'] = 1
        else:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_PERS'] = 0
        if 'ORGANIZATION' in ner_values_answer:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_ORG'] = 1
        else:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_ORG'] = 0
        if 'LOCATION' in ner_values_answer:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_LOC'] = 1
        else:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_LOC'] = 0
        # NER IN QUESTION
        if 'PERSON' in ner_values_question:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_PERS'] = 1
        else:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_PERS'] = 0
        if 'ORGANIZATION' in ner_values_question:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_ORG'] = 1
        else:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_ORG'] = 0
        if 'LOCATION' in ner_values_question:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_LOC'] = 1
        else:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_LOC'] = 0
        row['NUM_NAMED_ENTITIES_IN_ANSWER'] = len(ner_values_answer)
        row['NUM_NAMED_ENTITIES_OUT_ANSWER'] = len(ner_values_question)
        row['ANSWER_NAMED_ENTITY_DENSITY'] = float(
            len(ner_values_answer)) / sentence_len
        row['QUESTION_NAMED_ENTITY_DENSITY'] = float(
            len(ner_values_question)) / sentence_len
        return row
keyphrase_test_dataset.py 文件源码 项目:seq2seq-keyphrase 作者: memray 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def check_postag(config):
    train_set, validation_set, test_set, idx2word, word2idx = deserialize_from_file(config['dataset'])

    path = os.path.dirname(__file__)
    path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
    jar = path + '/stanford-postagger.jar'
    model = path + '/models/english-bidirectional-distsim.tagger'
    pos_tagger = StanfordPOSTagger(model, jar)

    for dataset_name in config['testing_datasets']:
        # override the original test_set
        # test_set = load_testing_data(dataset_name, kwargs=dict(basedir=config['path']))(idx2word, word2idx, config['preprocess_type'])

        test_sets = load_additional_testing_data(config['testing_datasets'], idx2word, word2idx, config)
        test_set = test_sets[dataset_name]

        # print(dataset_name)
        # print('Avg length=%d, Max length=%d' % (np.average([len(s) for s in test_set['source']]), np.max([len(s) for s in test_set['source']])))
        test_data_plain = zip(*(test_set['source'], test_set['target']))

        test_size = len(test_data_plain)

        # Alternatively to setting the CLASSPATH add the jar and model via their path:
        jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
        # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
        model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'
        pos_tagger = StanfordPOSTagger(model, jar)

        for idx in xrange(len(test_data_plain)):  # len(test_data_plain)
            test_s_o, test_t_o = test_data_plain[idx]

            source = keyphrase_utils.cut_zero(test_s_o, idx2word)

            print(source)

            # Add other jars from Stanford directory
            stanford_dir = jar.rpartition('/')[0]
            stanford_jars = find_jars_within_path(stanford_dir)
            pos_tagger._stanford_jar = ':'.join(stanford_jars)

            text = pos_tagger.tag(source)
            print(text)
rer_build_history.py 文件源码 项目:Intelligent-Phone-Salesman 作者: ShruthiChari 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def build_history(data_list, supported_tags_phones,supported_tags):
    history_list = [] # list of all histories
    sents = []
    count = 0
    expected = []

    for data in data_list: # data is the inputs entered by a given student
        data1 = data['data']

        #data1 is for every sentence entered by user
        for rec in data1:
            updates = rec['updates']
            sent = rec['sentence']  
            relatedTags=[]
            relations=[]
            if "rels" in rec.keys():
                relatedEntities = rec['rels']   
                expected.append(relatedEntities)            
                for i in relatedEntities:
                    relations.append(i.keys())
                    for j in i[i.keys()[0]]:
                        relatedTags.append(j)
            words = []
            posTaggedSent = postagger(sent)
            #chunkPhrases = chunker(sent)


            if len(updates) == len(posTaggedSent):
                for i in range(len(updates)):               
                    words.append({"word":updates[i]['word'],"pos":posTaggedSent[i],"tag":updates[i]['tag']})
                    #------------------------------------------------------------------------------------------------
                    # NOTE: below code is a temporary hack to build the MAxEnt for just 2 tags - we will change this later
                    if (updates[i]['tag'] not in supported_tags_phones):
                        if updates[i]['tag'] == "Model":
                            updates[i]['tag'] = "Version"
                        else:
                            updates[i]['tag'] = "Other"                
                    #------------------------------------------------------------------------------------------------

            sents.append(words)
            history={}
            history['sentence'] = words
            history['i'] = count+1
            #history['phrases'] = chunkPhrases
            history['relatedTags'] = relatedTags
            if len(relations) > 0:
                history_list.append((history,relations[0][0],))
            else:
                history_list.append((history,"None",))
            count += 1


    return (history_list,sents,expected)
rer_build_history.py 文件源码 项目:Intelligent-Phone-Salesman 作者: ShruthiChari 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def chunker(sent):

#a = [("I","PRP"),("hear","VBP"),("Jerusalem","NNP"),("bells","NNS"),("ringing","VBG")]
#input_sent = " Rockwell said the agreement calls for it to supply 200 addititonal so-called shipsets for the planes."
    input_sent = sent 
    text = nltk.word_tokenize(input_sent)
    a = nltk.pos_tag(text)
    phrases = []

    tup = ()
    '''test_sents = conll2000.chunked_sents('test.txt', chunk_types=['VP'])
    train_sents = conll2000.chunked_sents('train.txt', chunk_types=['VP'])
    test_sents = conll2000.chunked_sents('test.txt', chunk_types=['NP'])'''
    NP_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
    VP_sents = conll2000.chunked_sents('train.txt', chunk_types=['VP'])
    class ChunkParser(nltk.ChunkParserI):
        def __init__(self, train_sents):
            train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)] for sent in train_sents]
            self.tagger = nltk.TrigramTagger(train_data)
        def parse(self, sentence):
            pos_tags = [pos for (word,pos) in sentence]
            tagged_pos_tags = self.tagger.tag(pos_tags)
            chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags]
            conlltags = [(word, pos, chunktag) for ((word,pos),chunktag) in zip(sentence, chunktags)]
            return nltk.chunk.util.conlltags2tree(conlltags)

    NPChunker = ChunkParser(NP_sents)
    VPChunker = ChunkParser(VP_sents)
    #print (NPChunker.parse("I hear Jerusalem bells ringing"))
    parsed_sent = NPChunker.parse(a)
    for i in parsed_sent:
        if (type(i)!=type(tup)):
            l=[]
            for t in tuple(i):
                l.append(t[0])
            phrases.append({"NP":" ".join(l)})
    parsed_sent = VPChunker.parse(a)
    for i in parsed_sent:
            if (type(i)!=type(tup)):
                l=[]
                for t in tuple(i):
                    l.append(t[0])
                phrases.append({"VP":" ".join(l)})
    return phrases


问题


面经


文章

微信
公众号

扫码关注公众号