Spark编程--Spark SQL DataFrame
时间:2021-07-01 10:21:17
帮助过:16人阅读
com.zwq
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession;
object DataFrame {
def main(args: Array[String]): Unit =
{
val conf =
new SparkConf().setMaster("local").setAppName("DataFrameApp"
)
val spark =
SparkSession.builder().config(conf).getOrCreate()
import spark.implicits._
// val df = spark.read.json("resources/people.json")
// df.show()
val peopleDF = spark.read.format("json").load("resources/people.json"
)
peopleDF.select("name", "age").write.format("csv").save("resources/people.csv"
)
}
}
DataFrame常用操作
df.printSchema 打印模式信息
df.select(df("name"), df("age")+1).show()
df.filter(df("age">20).show()
df.groupBy("age").count().show()
//排序
df.sort(df("age").desc).show()
//多列排序
df.sort(df("age").desc, df("name").asc).show()
//对列进行重命名
df.select(df("name").as("username"), df("age")).show()
Spark编程--Spark SQL DataFrame
标签:name 操作 val park imp load appname json unit