python类download()的实例源码

utils.py 文件源码 项目:empythy 作者: ClimbsRocks 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def load_movie_reviews():

    # movie_reviews is a sizeable corpus to import, so only load it if we have to
    from nltk.corpus import movie_reviews
    try:
        movie_reviews.categories()
    except:
        import nltk
        print('This appears to be your first time using the NLTK Movie Reviews corpus. We will first download the necessary corpus (this is a one-time download that might take a little while')
        nltk.download('movie_reviews')
        from nltk.corpus import movie_reviews

    raw_data = []

    # NLTK's corpus is structured in an interesting way
    # first iterate through the two categories (pos and neg)
    for category in movie_reviews.categories():

        if category == 'pos':
            pretty_category_name = 'positive'
        elif category == 'neg':
            pretty_category_name = 'negative'

        # each of these categories is just fileids, so grab those
        for fileid in movie_reviews.fileids(category):

            # then each review is a NLTK class where each item in that class instance is a word
            review_words = movie_reviews.words(fileid)
            review_text = ''

            for word in review_words:
                review_text += ' ' + word

            review_dictionary = {
                'text': review_text,
                'sentiment': pretty_category_name
            }

            raw_data.append(review_dictionary)

    return raw_data
nltk_download.py 文件源码 项目:jenova 作者: dungba88 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def download():
    """skip unverified certificate and show download dialog"""
    try:
        create_unverified_https_context = ssl._create_unverified_context
    except AttributeError:
        pass
    else:
        ssl._create_default_https_context = create_unverified_https_context

    nltk.download()
nltk.py 文件源码 项目:master-thesis 作者: AndreasMadsen 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def download(self, name: str) -> None:
        if not self.exists(name):
            nltk.download(name, download_dir=self.nltk_dir)
setup.py 文件源码 项目:memex-dossier-open 作者: dossier 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def run(self):
        import nltk
        from memex_dossier.models.tests.test_features import nltk_data_packages
        for data_name in nltk_data_packages:
            print('nltk.download(%r)' % data_name)
            nltk.download(data_name)
__init__.py 文件源码 项目:memex-dossier-open 作者: dossier 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def nltk_data():
    for data_name in nltk_data_packages:
        print('nltk.download(%r)' % data_name)
        nltk.download(data_name)
cli.py 文件源码 项目:blabbr 作者: bfontaine 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def setup_nltk(self, **kw):
        import nltk
        from nltk.data import find

        tagger = "averaged_perceptron_tagger"

        try:
            find("taggers/%s" % tagger)
        except LookupError:
            click.echo("Downloading NTLK data (~2MB)...")
            nltk.download(tagger)
            return True

        return False
Text.py 文件源码 项目:smmry-alternate 作者: andersonpaac 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def initstopwords(self):
        try:
            s=set(stopwords.words('english'))
        except LookupError as e:
                import nltk
                nltk.download()
                s=set(stopwords.words('english'))
        st = LancasterStemmer()
        for each in s:
            self.stopwords.append(st.stem(each))

    #Given a dictionary of key: frequency, value: array of words
    #build the opposite
download_corpora.py 文件源码 项目:beepboop 作者: nicolehe 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def download_lite():
    for each in MIN_CORPORA:
        nltk.download(each)
download_corpora.py 文件源码 项目:beepboop 作者: nicolehe 项目源码 文件源码 阅读 44 收藏 0 点赞 0 评论 0
def download_all():
    for each in ALL_CORPORA:
        nltk.download(each)
setup.py 文件源码 项目:LinguisticAnalysis 作者: DucAnhPhi 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def install():
    for d in dependencies:
        pip.main(['install', d])

    # after nltk module was installed
    import nltk
    for data in nltk_data:
        nltk.download(data)
nltk_normalization.py 文件源码 项目:Quadflor 作者: quadflor 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def install_nltk_corpora(*packages):
        nltk_packages = list(packages)
        try:
            installed = (set(os.listdir(nltk.data.find("corpora"))) |
                         (set(os.listdir(nltk.data.find("taggers"))))) | \
                        (set(os.listdir(nltk.data.find("tokenizers"))))
        except LookupError:
            installed = set()
        if not set(nltk_packages) <= set(installed):
            nltk.download(nltk_packages)
extras.py 文件源码 项目:teem-tag 作者: P2Pvalue 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def build_dict_from_nltk(output_file, corpus=None, stopwords=None,
                         stemmer=Stemmer(), measure='IDF', verbose=False):
    '''
    @param output_file: the name of the file where the dictionary should be
                        saved
    @param corpus:      the NLTK corpus to use (defaults to nltk.corpus.reuters)
    @param stopwords:   a list of (not stemmed) stopwords (defaults to
                        nltk.corpus.reuters.words('stopwords'))
    @param stemmer:     the L{Stemmer} object to be used
    @param measure:     the measure used to compute the weights ('IDF'
                        i.e. 'inverse document frequency' or 'ICF' i.e.
                        'inverse collection frequency'; defaults to 'IDF')
    @param verbose:     whether information on the progress should be printed
                        on screen
    '''

    from build_dict import build_dict
    import nltk
    import pickle

    if not (corpus and stopwords):
        nltk.download('reuters')

    corpus = corpus or nltk.corpus.reuters
    stopwords = stopwords or nltk.corpus.reuters.words('stopwords')

    corpus_list = []

    if verbose: print 'Processing corpus...'
    for file in corpus.fileids():
        doc = [stemmer(Tag(w.lower())).stem for w in corpus.words(file)
               if w[0].isalpha()]
        corpus_list.append(doc)

    if verbose: print 'Processing stopwords...'
    stopwords = [stemmer(Tag(w.lower())).stem for w in stopwords]

    if verbose: print 'Building dictionary... '
    dictionary = build_dict(corpus_list, stopwords, measure)
    with open(output_file, 'wb') as out:
        pickle.dump(dictionary, out, -1)
download_corpora.py 文件源码 项目:kind2anki 作者: prz3m 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def download_lite():
    for each in MIN_CORPORA:
        nltk.download(each)
download_corpora.py 文件源码 项目:kind2anki 作者: prz3m 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def download_all():
    for each in ALL_CORPORA:
        nltk.download(each)
nltkmgr.py 文件源码 项目:sia-cog 作者: deepakkumar1984 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def download():
    nltk.download()
nltk_backend.py 文件源码 项目:luvina 作者: oarriaga 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def download_nltk_data(package_name='all'):
    """ download necessary data from NLTK
    args:
        package_name: string containing the package name to install
    returns:
        None
    """
    if package_name is 'all':
        data = ['punkt', 'wordnet', 'stopwords', 'averaged_perceptron_tagger']
        for package in data:
            download(package)
    else:
        download(package)
download_datasets.py 文件源码 项目:text-to-image 作者: paarthneekhara 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def create_data_paths():
    if not os.path.isdir(DATA_DIR):
        raise EnvironmentError('Needs to be run from project directory containing ' + DATA_DIR)
    needed_paths = [
        os.path.join(DATA_DIR, 'samples'),
        os.path.join(DATA_DIR, 'val_samples'),
        os.path.join(DATA_DIR, 'Models'),
    ]
    for p in needed_paths:
        make_sure_path_exists(p)


# adapted from http://stackoverflow.com/questions/51212/how-to-write-a-download-progress-indicator-in-python
tokenizers.py 文件源码 项目:acl2017-interactive_summarizer 作者: UKPLab 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _sentence_tokenizer(self, language):
        try:
            path = to_string("tokenizers/punkt/%s.pickle") % to_string(language)
            return nltk.data.load(path)
        except (LookupError, zipfile.BadZipfile):
            raise LookupError(
                "NLTK tokenizers are missing. Download them by following command: "
                '''python -c "import nltk; nltk.download('punkt')"'''
            )
Tokenizer.py 文件源码 项目:nlp_sum 作者: Zhujunnan 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def english_sentence_segment(text):
    """segment text into sentence"""
    try:
        sent_detector = nltk.data.load(
            'tokenizers/punkt/english.pickle'
        )

        extra_abbrev = ["e.g", "al", "i.e"]
        sent_detector._params.abbrev_types.update(extra_abbrev)
        return sent_detector.tokenize(text)
    except LookupError as e:
        raise LookupError(
            "NLTK tokenizers are missing. Download them by following command: "
            '''python -c "import nltk; nltk.download('punkt')"'''
        )
__NLPMODIFIED.py 文件源码 项目:PYSHA 作者: shafaypro 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def download_preferences(self):
        import nltk  # importing the natural language processing module
        nltk.download()  # opening the gui based Natural language processing download kit


问题


面经


文章

微信
公众号

扫码关注公众号