天天看點

spark sql 統計pv uv

話不多說,在開始之前先介紹下pv uv

uv:user views,count(distinct guid)

pv:page views,count(url)

直接上代碼

import com.alibaba.fastjson.JSON

import org.apache.spark.sql.SQLContext

import org.apache.spark.{SparkContext, SparkConf}

case class Log(appv:String, userid:String)

object DailyUV {

def main(args: Array[String]) {

val conf = new SparkConf().setAppName(“DailyUV”).setMaster(“local”)

val sc = new SparkContext(conf)

val sqlContext = new SQLContext(sc)

//過濾産品id為3的資料

val data = sc.textFile(“D:\work\data\row.dat”).filter(line=>line.contains(“\”pid\”:\”3”))

val mapData = data.map(line=>{

val json = JSON.parseObject(line)

//appv版本号

val appv = json.getString(“appv”)

//aid 使用者唯一标示

val userid = json.getString(“aid”)

(appv,userid)

})

import sqlContext.implicits._

//将RDD轉換為DF

val DF = mapData.map(log => Log(log._1,log._2)).toDF()

DF.registerTempTable(“log”)

val sqlStr =

s”“”

|select appv,

|count(userid) as pv ,

|count(distinct userid) as uv

| from log

| group by appv

“”“.stripMargin

val result = sqlContext.sql(sqlStr)
result.show()
sc.stop()
           

}

}

運作結果:

+——+—+—+

| appv| pv| uv|

+——+—+—+

|4.35.1| 1| 1|

|4.35.2| 1| 1|

| | 1| 1|

+——+—+—+

繼續閱讀