def __init__(self, num_class=2):
"""
:type num_classes: int
:rtype: None
"""
self.__ctrl__ = None
self.__case__ = None
with open('../../.dbname', 'r') as f:
self.__DB_NAME__ = json.load(f)['dbname']
self.__MG_DOCS_COLL__ = 'raw-docs' # raw docs
self.__MG_SENTS_COLL__ = 'bag-of-sents' # raw sentences
self.__MG_TOKENS_COLL__ = 'sample-tokens' # clean tokens (words)
self.__PG_STATS_TBL__ = 'stats' # stylometric features
self.__PG_RESULTS_TBL__ = 'results_' + \
str(num_class) + \
'class' # cross val results
self.__PG_PROBAS_TBL__ = 'probabilities' # cross val probabilities
self.__model__ = Pipeline([ \
# ('scaler2', StandardScaler()),
# ('scaler', MinMaxScaler()),
# ('scaler3', Normalizer()),
('classifier', SVC(probability=True,
kernel='poly',
degree=2,
class_weight='balanced') \
if num_class-1 \
else OneClassSVM(kernel='rbf',
nu=0.7,
gamma=1./250))
])
print 'Instantiated classifier %s.' % \
self.__model__.named_steps['classifier'].__class__.__name__
self.__io__ = DBIO(MG_DB_NAME=self.__DB_NAME__,
PG_DB_NAME=self.__DB_NAME__)
self.__tagger__ = None # initialise if re-creating samples
self.__bootstrap__ = None # initialise in fit
评论列表
文章目录