//spark读取数据 Dataset<Row> df = spark.read().textFile(currentSrcPath, 1); Dataset<Row> df = spark.read().json(path); Dataset<Row> df = spark.read().orc(path); Dataset<Row> parquet = spark.read().parquet(path); //spark写入数据 df.write().mode("overwrite").text(outputPath); df.write().mode("overwrite").parquet(outputPath); df.write().mode("overwrite").orc(outputPath); //rdd转Dataset<Row> Dataset<Row> df = spark.createDataFrame(rowRDD, AdjustSchema.row); //list转Dataset Dataset<String> dataset = spark.createDataset(Collections.singletonList(Long.toString(startTime)), Encoders.STRING());
//从spark获取hadoop FileSystem FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
//构建schema public static StructType row = DataTypes.createStructType( Arrays.asList( DataTypes.createStructField("phone_name", StringType, true), DataTypes.createStructField("app_id", StringType, true) ... ));
原文:https://www.cnblogs.com/wangbin2188/p/12851952.html