python类trigrams()的实例源码-面圈网

SVM.py 文件源码项目：codenn 作者: sriniiyer 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def tokenize(text):
      # text = NB.remove_punctuation(text)
      try:
        text = text.decode('utf-8').encode('ascii', 'replace').strip().lower()
      except:
        text = text.encode('ascii', 'replace').strip().lower()
      word = [porter.stem(w) for w in re.findall(r"[\w'-]+|[^\s\w]", text)]   # split punctuations but dont split single quotes for words like don't
      biword =  [b for b in nltk.bigrams(word)]
      triword =  [t for t in nltk.trigrams(word)]
      # word = [w for w in word if w not in stopwords.words('english')]
      return  word # triword

utilities.py 文件源码项目：aueb.twitter.sentiment 作者: nlpaueb 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def posTrigramsScore(trigrams,category,pos_tags_trigrams,labels):

    #keep pos tags bigrams of specific category
    trigrams_category = subList(pos_tags_trigrams,labels,category)

    #initialize dictionary
    d = {}

    #calculate score for every bigram
    for trigram in trigrams:
        d[trigram] = score(trigram,category,trigrams_category,pos_tags_trigrams)

    return d

#calculate bigram's f1 score

utilities.py 文件源码项目：aueb.twitter.sentiment 作者: nlpaueb 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def getBigrams(l):
    b = []
    for x in l:
        b.append(list(bigrams(x)))

    return b

#calculate trigrams of every item of the list l

utilities.py 文件源码项目：aueb.twitter.sentiment 作者: nlpaueb 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def getTrigrams(l):
    tr = []
    for x in l:
        tr.append(list(trigrams(x)))

    return tr

#calculate pos tag score