import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object joinRDD { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf().setAppName("My scala word count").setMaster("local") val sc = new SparkContext(conf) //join;根据key关联 val rdd: RDD[(Int, String)] = sc.makeRDD(List((3,"a"),(2,"a"),(1,"c")),3) val rdd2: RDD[(Int, Int)] = sc.makeRDD(Array((1,3),(2,6),(3,9))) val joinRDD: RDD[(Int, (String, Int))] = rdd.join(rdd2) joinRDD.collect().foreach(println) // (3,(a,9)) // (1,(c,3)) // (2,(a,6)) } } //
原文:https://www.cnblogs.com/hapyygril/p/13720503.html