参考:https://blog.csdn.net/Damionew/article/details/103918688
参考Spark Doc:https://spark.apache.org/docs/latest/sql-getting-started.html
1.首先Maven需要引入spark依赖和mysql驱动:
<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.12</artifactId> <version>2.4.4</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.12</artifactId> <version>2.4.4</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>8.0.16</version> </dependency>
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; public class JavaSparkSqlDemo { public static void main(String[] args){ SparkSession sparkSession = SparkSession .builder() .appName("JavaSparkSqlDemo") //Sets a name for the application .master("local") //Sets the Spark master URL to connect to .getOrCreate(); //获取或者新建一个 sparkSession //设置sparkSession数据连接 Dataset userDataset = sparkSession.read() .format("jdbc") .option("url","jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=utf-8") .option("dbtable","user") .option("driver","com.mysql.cj.jdbc.Driver") .option("user","root") .option("password","root") .load(); Dataset roleDataset = sparkSession.read() .format("jdbc") .option("url","jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=utf-8") .option("dbtable","role") .option("driver","com.mysql.cj.jdbc.Driver") .option("user","root") .option("password","root") .load(); //注册临时表后才能进行select等操作,必需,否则not found in database ‘default‘ userDataset.registerTempTable("user"); roleDataset.registerTempTable("role"); //SQL查询操作 //注意:1.所有用到的表需要在option和registerTempTable注册 Dataset<Row> sqlDF = sparkSession.sql("SELECT t1.id,t1.name,t2.role FROM USER t1 LEFT JOIN role t2 ON t1.id = t2.id "); sqlDF.show(); } }
原文:https://www.cnblogs.com/lshan/p/12846675.html