首页 > 编程语言 > 详细

java spark转换算子join、leftOuterJoin、rightOuterJoin、fullOuterjoin

时间:2020-05-14 14:39:31      阅读:224      评论:0      收藏:0      [点我收藏+]
/**
 * # _*_ coding:utf-8 _*_
 * # Author:xiaoshubiao
 * # Time : 2020/5/14 8:33
 **/
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import scala.Tuple2;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class union_test {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("union_test");
        JavaSparkContext sc = new JavaSparkContext(conf);
        List<String> list = Arrays.asList("a","b","c","d","e");
        List<String> list2 = Arrays.asList("a","b","c","f","h");
        JavaRDD<String> parallelize = sc.parallelize(list,2);
        JavaRDD<String> parallelize2 = sc.parallelize(list2,2);
        JavaPairRDD javaPairRDD = parallelize.mapToPair(x -> new Tuple2(x, 1));
        JavaPairRDD javaPairRDD1 = parallelize2.mapToPair(x -> new Tuple2(x, 2));
        javaPairRDD.join(javaPairRDD1).collect().forEach(x->System.out.println("join"+x));
        javaPairRDD.leftOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("leftOuterJoin"+x));
        javaPairRDD.rightOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("rightOuterJoin"+x));
        javaPairRDD.fullOuterJoin(javaPairRDD1).collect().forEach(x->System.out.println("fullOuterJoin"+x));


    }
}

 

java spark转换算子join、leftOuterJoin、rightOuterJoin、fullOuterjoin

原文:https://www.cnblogs.com/7749ha/p/12888272.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!