-
前提是windows下安裝pyspark
設定連接配接
-
用jupyter notebook編寫pyspark代碼
from pyspark.sql import SparkSession
# 環境配置
spark = SparkSession.builder.master("local").appName("test").enableHiveSupport().getOrCreate()
sc = spark.sparkContext
# 測試是否成功
rdd = sc.parallelize([("hello", 1)])
rdd.collect()