scala> val file = sc.textFile("hdfs://Mhadoop:9000/user/hadoop/README.md")
file: org.apache.spark.rdd.RDD[String] = hdfs://Mhadoop:9000/user/hadoop/README.md MapPartitionsRDD[1] at textFile at <console>:21
file变量是一个MapPartitionsRDD;接着过滤spark这个词
scala> val sparks = file.filter(line => line.contains("spark"))
sparks: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[2] at filter at <console>:23