本帖最后由 scorpionxie 于 2017-2-20 19:23 编辑
有10亿的测试数据分别存储在hdfs和hbase,想试下用spark测试下对两种存储方式下spark数据分析的性能,hdfs的没问题,但是读取hbase的时候报错。
[mw_shl_code=scala,true] conf.set(TableInputFormat.INPUT_TABLE, tableName)
conf.set(TableInputFormat.SCAN, convertScanToString(scan))
val rddScan = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])[/mw_shl_code]
错误日志
[mw_shl_code=shell,true]Job aborted due to stage failure: Task 61 in stage 0.0 failed 4 times, most recent failure: Lost task 61.3 in stage 0.0 (TID 97, 10.168.14.4): java.lang.RuntimeException: java.lang.OutOfMemoryError: unable to create new native thread
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:208)
at org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:326)
at org.apache.hadoop.hbase.client.ClientScanner.loadCache(ClientScanner.java:409)
at org.apache.hadoop.hbase.client.ClientScanner.next(ClientScanner.java:370)
at org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.nextKeyValue(TableRecordReaderImpl.java:205)
at org.apache.hadoop.hbase.mapreduce.TableRecordReader.nextKeyValue(TableRecordReader.java:147)
at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase$1.nextKeyValue(TableInputFormatBase.java:216)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:182)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:192)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:47)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.OutOfMemoryError: unable to create new native thread
at java.lang.Thread.start0(Native Method)
at java.lang.Thread.start(Thread.java:714)
at java.util.concurrent.ThreadPoolExecutor.addWorker(ThreadPoolExecutor.java:950)
at java.util.concurrent.ThreadPoolExecutor.execute(ThreadPoolExecutor.java:1357)
at org.apache.hadoop.hbase.client.ResultBoundedCompletionService.submit(ResultBoundedCompletionService.java:146)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.addCallsForCurrentReplica(ScannerCallableWithReplicas.java:287)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:170)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:60)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)
... 18 more
Driver stacktrace:[/mw_shl_code]
尝试过修改 /etc/security/limits.d/90-nproc.conf文件,没有效果
|
|