SubsetValidator.java 文件源码

java
阅读 21 收藏 0 点赞 0 评论 0

项目:codebuff 作者:
public static float[] getMedianErrorRates(LangDescriptor language, int maxNumFiles, int trials) throws Exception {
        SubsetValidator validator = new SubsetValidator(language.corpusDir, language);
        List<InputDocument> documents = Tool.load(validator.allFiles, language);
        float[] medians = new float[Math.min(documents.size(),maxNumFiles)+1];

        int ncpu = Runtime.getRuntime().availableProcessors();
        if ( FORCE_SINGLE_THREADED ) {
            ncpu = 2;
        }
        ExecutorService pool = Executors.newFixedThreadPool(ncpu-1);
        List<Callable<Void>> jobs = new ArrayList<>();

        for (int i = 1; i<=Math.min(validator.allFiles.size(), maxNumFiles); i++) { // i is corpus subset size
            final int corpusSubsetSize = i;
            Callable<Void> job = () -> {
                try {
                    List<Float> errorRates = new ArrayList<>();
                    for (int trial = 1; trial<=trials; trial++) { // multiple trials per subset size
                        Pair<InputDocument, List<InputDocument>> sample = validator.selectSample(documents, corpusSubsetSize);
                        Triple<Formatter, Float, Float> results = validate(language, sample.b, sample.a, true, false);
//                  System.out.println(sample.a.fileName+" n="+corpusSubsetSize+": error="+results.c);
//              System.out.println("\tcorpus =\n\t\t"+Utils.join(sample.b.iterator(), "\n\t\t"));
                        errorRates.add(results.c);
                    }
                    Collections.sort(errorRates);
                    int n = errorRates.size();
                    float median = errorRates.get(n/2);
                    System.out.println("median "+language.name+" error rate for n="+corpusSubsetSize+" is "+median);
                    medians[corpusSubsetSize] = median;
                }
                catch (Throwable t) {
                    t.printStackTrace(System.err);
                }
                return null;
            };
            jobs.add(job);
        }

        pool.invokeAll(jobs);
        pool.shutdown();
        boolean terminated = pool.awaitTermination(60, TimeUnit.MINUTES);
        return medians;
    }
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号