translate.py 文件源码-python代码片段

translate.py 文件源码
python
阅读 33 收藏 0 点赞 0 评论 0
def _get_base_doge_words(self, eng_text):
        """
        Get all base words from text to make doge phrases from.
        eg. 'Hello there, I am happy' -> ['hello', 'are', 'happy']

        Args:
            eng_text (str): Text to get words from.

        Returns:
            list[str]: List of lower case words to use from text.
        """
        phrase_no_punct = "".join([ch for ch in eng_text if ch not in string.punctuation])
        tagged_words = nltk.pos_tag([w.lower() for w in phrase_no_punct.split(' ') if w.isalpha()])
        chosen_words = []
        for word, tag in tagged_words:
            if tag[0] in ['N', 'V', 'J']:
                # make noun singular
                if tag[0] == 'N':
                    word = self._lemmatizer.lemmatize(word, pos='n')
                # make verb infinitive
                elif tag[0] == 'V':
                    word = self._lemmatizer.lemmatize(word, pos='v')
                chosen_words.append(word.encode('ascii', 'ignore'))  # lemmatize makes word unicode
        return list(set(chosen_words))