public static Path buildClusters(Configuration conf, Path input, Path clustersIn, Path output,
DistanceMeasure measure, int maxIterations, String delta, boolean runSequential) throws IOException,
InterruptedException, ClassNotFoundException {
double convergenceDelta = Double.parseDouble(delta);
//从output/clusters-0/part-randomSeed文件里读出Cluster数据,放入到clusters变量中。
List<Cluster> clusters = Lists.newArrayList();
KMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException("No input clusters found in " + clustersIn + ". Check your -c argument.");
}
//把聚类策略(控制收敛程度)写进output/clusters-0/_policy文件中
//同时,每个簇cluster在output/clusters-0/下对应生成part-000xx文件
Path priorClustersPath = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
ClusteringPolicy policy = new KMeansClusteringPolicy(convergenceDelta);
ClusterClassifier prior = new ClusterClassifier(clusters, policy);
prior.writeToSeqFiles(priorClustersPath);
//开始迭代maxIterations次执行Map/Reduce
if (runSequential) {
ClusterIterator.iterateSeq(conf, input, priorClustersPath, output, maxIterations);
} else {
ClusterIterator.iterateMR(conf, input, priorClustersPath, output, maxIterations);
}
return output;
}
从上面代码中,可以得出 //从output/clusters-0/part-randomSeed文件里读出Cluster数据,放入到clusters变量中。
这个路径下从output/clusters-0/part-randomSeed可能没有文件,楼主检查下
|