官方手冊 spark core
官方手冊 Apache - Spark,Hadoop,Kafka,Hbase
package main.scala
import org.apache.spark
import org.apache.spark.{SparkConf, SparkContext, SparkFiles}
//import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
//import org.apache.spark.ml.linalg.{Vector, Vectors}
//import org.apache.spark.sql._
//import org.apache.spark.sql.SparkSession
//import org.apache.hadoop
import scala.io.Source
//Exception in thread "main" java.lang.NoSuchMethodError: scala.util.matching.Regex
object jjj {
def main(args: Array[String]): Unit = {
// val spark = SparkSession
// .builder()
// .appName("WordCount")
// .master("local[*]")
// .enableHiveSupport()
// .getOrCreate()
val conf = new SparkConf()
conf.setAppName("WordCount")
conf.setMaster("local[*]")
val sc = new SparkContext(conf)
// val jjfile = sc.textFile("/jjProj/words.txt") //hdfs://192.168.43.99:9000
val files = "hdfs://jidafus-MBP:9000/jjProj/words.txt"
sc.addFile(files)
val path = SparkFiles.get("words.txt")
println(path)
val source = Source.fromFile(path)
val lineIterator = source.getLines
val lines =lineIterator.toArray
println("\n\n============= 1 =============")
println(lines.mkString(","))
println("============= 2 =============")
val textFileRDD = sc.textFile("hdfs://jidafus-MBP:9000/jjProj/words.txt")
val wordRDD = textFileRDD.flatMap(line => line.split(" "))
println("============= 3 =============")
val pairWordRDD = wordRDD.map(word => (word, 1))
println(pairWordRDD)
println("============= 4 =============")
val wordCountRDD = pairWordRDD.reduceByKey((a, b) => a + b)
println("\n\n")
wordCountRDD.saveAsTextFile("hdfs://jidafus-MBP:9000/jjProj/wordcount3")
}
}
