public static Triple<Formatter,Float,Float> validate(LangDescriptor language,
List<InputDocument> documents,
InputDocument testDoc,
boolean saveOutput,
boolean computeEditDistance)
throws Exception
{
// kNNClassifier.resetCache();
Corpus corpus = new Corpus(documents, language);
corpus.train();
// System.out.printf("%d feature vectors\n", corpus.featureVectors.size());
Formatter formatter = new Formatter(corpus, language.indentSize);
String output = formatter.format(testDoc, false);
float editDistance = 0;
if ( computeEditDistance ) {
editDistance = normalizedLevenshteinDistance(testDoc.content, output);
}
ClassificationAnalysis analysis = new ClassificationAnalysis(testDoc, formatter.getAnalysisPerToken());
// System.out.println(testDoc.fileName+": edit distance = "+editDistance+", error rate = "+analysis.getErrorRate());
if ( saveOutput ) {
File dir = new File(outputDir+"/"+language.name);
if ( saveOutput ) {
dir = new File(outputDir+"/"+language.name);
dir.mkdir();
}
Utils.writeFile(dir.getPath()+"/"+new File(testDoc.fileName).getName(), output);
}
return new Triple<>(formatter, editDistance, analysis.getErrorRate());
}
SubsetValidator.java 文件源码
java
阅读 72
收藏 0
点赞 0
评论 0
项目:codebuff
作者:
评论列表
文章目录