LeaveOneOutValidator.java 文件源码

java
阅读 19 收藏 0 点赞 0 评论 0

项目:codebuff 作者:
public Triple<List<Formatter>,List<Float>,List<Float>> validateDocuments(FeatureMetaData[] injectWSFeatures,
                                                                         FeatureMetaData[] alignmentFeatures,
                                                                         boolean computeEditDistance,
                                                                         String outputDir)
    throws Exception
{
    List<Formatter> formatters = Collections.synchronizedList(new ArrayList<>());
    List<Float> distances = Collections.synchronizedList(new ArrayList<>());
    List<Float> errors = Collections.synchronizedList(new ArrayList<>());
    long start = System.nanoTime();
    try {
        List<String> allFiles = getFilenames(new File(rootDir), language.fileRegex);
        final List<InputDocument> documents = Tool.load(allFiles, language);
        final List<InputDocument> parsableDocuments = filter(documents, d -> d.tree!=null);
        long stop = System.nanoTime();
        System.out.printf("Load/parse all docs from %s time %d ms\n",
                          rootDir,
                          (stop-start)/1_000_000);

        int ncpu = Runtime.getRuntime().availableProcessors();
        if ( FORCE_SINGLE_THREADED ) {
            ncpu = 2;
        }
        ExecutorService pool = Executors.newFixedThreadPool(ncpu-1);
        List<Callable<Void>> jobs = new ArrayList<>();

        for (int i = 0; i<parsableDocuments.size(); i++) {
            final String fileName = parsableDocuments.get(i).fileName;
            Callable<Void> job = () -> {
                try {
                    Triple<Formatter, Float, Float> results =
                        validate(language, parsableDocuments, fileName,
                                 Formatter.DEFAULT_K, injectWSFeatures, alignmentFeatures,
                                 outputDir, computeEditDistance, false);
                    formatters.add(results.a);
                    float editDistance = results.b;
                    distances.add(editDistance);
                    Float errorRate = results.c;
                    errors.add(errorRate);
                }
                catch (Throwable t) {
                    t.printStackTrace(System.err);
                }
                return null;
            };
            jobs.add(job);
        }

        pool.invokeAll(jobs);
        pool.shutdown();
        pool.awaitTermination(60, TimeUnit.MINUTES);
    }
    finally {
        long final_stop = System.nanoTime();
        Double medianTrainingTime = median(trainingTimes);
        double medianFormattingPerMS = median(formattingTokensPerMS);
        System.out.printf("Total time %dms\n", (final_stop-start)/1_000_000);
        System.out.printf("Median training time %dms\n",
                          medianTrainingTime.intValue());
        System.out.printf("Median formatting time tokens per ms %5.4fms, min %5.4f max %5.4f\n",
                          medianFormattingPerMS,
                          BuffUtils.min(formattingTokensPerMS),
                          BuffUtils.max(formattingTokensPerMS));
    }
    return new Triple<>(formatters,distances,errors);
}
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号