model.py 文件源码-python代码片段

def fit(self, author1, author2, wts1=None, wts2=None,
                              bootstrap=False, verbose=False):
        """
        :type author1: str
        :type author2: str
        :type wts1: str/List[str]
        :type wts2: str/List[str]
        :type verbose:bool
        :rtype: bool
        :
        : Prepares databases and tables/collections.
        :
        """

        self.__bootstrap__ = bootstrap

        cases = []
        for i, (author, wts) in enumerate([(author1, wts1), (author2, wts2)]):
            if not wts:
                wts = [wt.encode('ascii') \
                       for wt in self.__io__.mg_distinct(self.__MG_DOCS_COLL__,
                                                         'type',
                                                         { 'author':author } )]

            if not isinstance(wts, list):
                wts = [wts]

            cases += (author, wts, (1,-1)[i]),   # use 1, -1 to match output
                                                 # from sklearn's OneClassSVM


        self.__ctrl__ = cases[0]    # assign label 1 in y vector
        self.__case__ = cases[1]    # assign be label 0 in y vector
        self.__MG_TOKENS_COLL__ += '-' + cases[0][0] + \
                                   '-' + cases[1][0] + \
                                   '-' + \
                                   ''.join(wt[:3] for wt in cases[0][1]) + \
                                   '-' + \
                                   ''.join(wt[:3] for wt in cases[1][1]) + \
                                   '-' + \
                                   ('nobs','bs')[bootstrap]

        self.__PG_STATS_TBL__   += '_' + cases[0][0] + \
                                   '_' + cases[1][0] + \
                                   '_' + \
                                   ''.join(wt[:3] for wt in cases[0][1]) + \
                                   '_' + \
                                   ''.join(wt[:3] for wt in cases[1][1]) + \
                                   '_' + \
                                   ('nobs','bs')[bootstrap]



        if verbose:
            print 'Control:', self.__ctrl__
            print 'Case:   ', self.__case__
            print 'Saving tokens to', self.__MG_TOKENS_COLL__
            print 'Saving stats to', self.__PG_STATS_TBL__

        return self.__prep_sents__(verbose=verbose) # err in preparing sentences