def fit(self, author1, author2, wts1=None, wts2=None,
bootstrap=False, verbose=False):
"""
:type author1: str
:type author2: str
:type wts1: str/List[str]
:type wts2: str/List[str]
:type verbose:bool
:rtype: bool
:
: Prepares databases and tables/collections.
:
"""
self.__bootstrap__ = bootstrap
cases = []
for i, (author, wts) in enumerate([(author1, wts1), (author2, wts2)]):
if not wts:
wts = [wt.encode('ascii') \
for wt in self.__io__.mg_distinct(self.__MG_DOCS_COLL__,
'type',
{ 'author':author } )]
if not isinstance(wts, list):
wts = [wts]
cases += (author, wts, (1,-1)[i]), # use 1, -1 to match output
# from sklearn's OneClassSVM
self.__ctrl__ = cases[0] # assign label 1 in y vector
self.__case__ = cases[1] # assign be label 0 in y vector
self.__MG_TOKENS_COLL__ += '-' + cases[0][0] + \
'-' + cases[1][0] + \
'-' + \
''.join(wt[:3] for wt in cases[0][1]) + \
'-' + \
''.join(wt[:3] for wt in cases[1][1]) + \
'-' + \
('nobs','bs')[bootstrap]
self.__PG_STATS_TBL__ += '_' + cases[0][0] + \
'_' + cases[1][0] + \
'_' + \
''.join(wt[:3] for wt in cases[0][1]) + \
'_' + \
''.join(wt[:3] for wt in cases[1][1]) + \
'_' + \
('nobs','bs')[bootstrap]
if verbose:
print 'Control:', self.__ctrl__
print 'Case: ', self.__case__
print 'Saving tokens to', self.__MG_TOKENS_COLL__
print 'Saving stats to', self.__PG_STATS_TBL__
return self.__prep_sents__(verbose=verbose) # err in preparing sentences
评论列表
文章目录