scala> val sqlContext = new org.apache.spark.sql.SQLContext(sc)
sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@1e2c0ff
scala> import sqlContext._
import sqlContext._
scala> case class People(name:String, age:Int)
defined class People
scala> val people = sc.textFile("hdfs://master:8020/usr/data/people.txt").map(_.split(",")).map(x => People(x(0), x(1).trim.toInt))
16/06/13 22:27:50 WARN util.SizeEstimator: Failed to check whether UseCompressedOops is set; assuming yes
16/06/13 22:27:50 INFO storage.MemoryStore: ensureFreeSpace(85352) called with curMem=0, maxMem=560497950
16/06/13 22:27:50 INFO storage.MemoryStore: Block broadcast_0 stored as values in memory (estimated size 83.4 KB, free 534.5 MB)
16/06/13 22:27:50 INFO storage.MemoryStore: ensureFreeSpace(20071) called with curMem=85352, maxMem=560497950
16/06/13 22:27:50 INFO storage.MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 19.6 KB, free 534.4 MB)
16/06/13 22:27:50 INFO storage.BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:45336 (size: 19.6 KB, free: 534.5 MB)
16/06/13 22:27:50 INFO spark.SparkContext: Created broadcast 0 from textFile at <console>:28
people: org.apache.spark.rdd.RDD[People] = MapPartitionsRDD[3] at map at <console>:28
scala> people.take(1)
16/06/13 22:28:10 INFO mapred.FileInputFormat: Total input paths to process : 1
16/06/13 22:28:10 INFO spark.SparkContext: Starting job: take at <console>:31
16/06/13 22:28:10 INFO scheduler.DAGScheduler: Got job 0 (take at <console>:31) with 1 output partitions
16/06/13 22:28:10 INFO scheduler.DAGScheduler: Final stage: ResultStage 0(take at <console>:31)
16/06/13 22:28:10 INFO scheduler.DAGScheduler: Parents of final stage: List()
16/06/13 22:28:10 INFO scheduler.DAGScheduler: Missing parents: List()
16/06/13 22:28:10 INFO scheduler.DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[3] at map at <console>:28), which has no missing parents
16/06/13 22:28:10 INFO storage.MemoryStore: ensureFreeSpace(3608) called with curMem=105423, maxMem=560497950
16/06/13 22:28:10 INFO storage.MemoryStore: Block broadcast_1 stored as values in memory (estimated size 3.5 KB, free 534.4 MB)
16/06/13 22:28:10 INFO storage.MemoryStore: ensureFreeSpace(2020) called with curMem=109031, maxMem=560497950
16/06/13 22:28:10 INFO storage.MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 2020.0 B, free 534.4 MB)
16/06/13 22:28:10 INFO storage.BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:45336 (size: 2020.0 B, free: 534.5 MB)
16/06/13 22:28:10 INFO spark.SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:861
16/06/13 22:28:10 INFO scheduler.DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[3] at map at <console>:28)
16/06/13 22:28:10 INFO scheduler.TaskSchedulerImpl: Adding task set 0.0 with 1 tasks
16/06/13 22:28:11 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, ANY, 2147 bytes)
16/06/13 22:28:11 INFO executor.Executor: Running task 0.0 in stage 0.0 (TID 0)
16/06/13 22:28:11 INFO rdd.HadoopRDD: Input split: hdfs://master:8020/usr/data/people.txt:0+61
16/06/13 22:28:11 INFO Configuration.deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
16/06/13 22:28:11 INFO Configuration.deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
16/06/13 22:28:11 INFO Configuration.deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
16/06/13 22:28:11 INFO Configuration.deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
16/06/13 22:28:11 INFO Configuration.deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
16/06/13 22:28:11 INFO executor.Executor: Finished task 0.0 in stage 0.0 (TID 0). 2438 bytes result sent to driver
16/06/13 22:28:11 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 491 ms on localhost (1/1)
16/06/13 22:28:11 INFO scheduler.DAGScheduler: ResultStage 0 (take at <console>:31) finished in 0.563 s
16/06/13 22:28:11 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
16/06/13 22:28:11 INFO scheduler.DAGScheduler: Job 0 finished: take at <console>:31, took 0.816382 s
res0: Array[People] = Array(People(liu,25))
scala> people.registerAsTable("People")
<console>:31: error: value registerAsTable is not a member of org.apache.spark.rdd.RDD[People]
people.registerAsTable("People")
^
scala> people.registerTempTable("People")
<console>:31: error: value registerTempTable is not a member of org.apache.spark.rdd.RDD[People]
people.registerTempTable("People")
^
scala> val peopleSchema = sqlContext.createSchemaRDD(people)
<console>:30: error: value createSchemaRDD is not a member of org.apache.spark.sql.SQLContext
val peopleSchema = sqlContext.createSchemaRDD(people)
^
scala> import sqlContext.createSchemaRDD
<console>:26: error: value createSchemaRDD is not a member of org.apache.spark.sql.SQLContext
import sqlContext.createSchemaRDD
^
scala> import sqlContext.implicits._
import sqlContext.implicits._
scala> people.registerTempTable("People")
<console>:34: error: value registerTempTable is not a member of org.apache.spark.rdd.RDD[People]
people.registerTempTable("People")
^
scala> people.registerAsTable("People")
<console>:34: error: value registerAsTable is not a member of org.apache.spark.rdd.RDD[People]
people.registerAsTable("People")
各种导入都不对,都提示 registerAsTable 或者 registerTempTable 不是 org.apache.spark.rdd.RDD 的成员。
这是为什么,环境还需要装别的什么东西吗,我就装了spark 和 Hadoop
|
|