因为默认的kettle-hadoop file插件并没有支持kerberos认证的hadoop集群。而公司集群开启了kerberos认证,所以需要修改下源码,支持下kerberos认证。
因为公司集群是2.6.5版本的,所以此处对应的hadoop版本是2.6的。
data-integration/plugins/pentaho-big-data-plugin/hadoop-configurations/hdp26/pentaho-hadoop-shims-hdp26-8.2.2018.11.00-342.jar
    public FileSystem getFileSystem(Configuration conf) throws IOException {
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
        JobConf jobConf = ShimUtils.asConfiguration(conf);
        //认证kerberos
        if (jobConf.getBoolean("hadoop.security.authorization", false)) {
            //设置java安全krb5配置,其中krb5.conf文件可以从成功开启kerberos的集群任意一台节点/etc/krb5.conf拿到,放置本地
            String krb5File = jobConf.get("krb5_file_location", "/etc/krb5.conf");
            // kerberos principal
            String kerUser = jobConf.get("krb5_user_principal");
            // 对应kerberos principal的keytab文件,从服务器获取放置本地
            String keyPath = jobConf.get("krb5_user_keytab");
            // 设置krb5.conf到环境变量
            System.setProperty("java.security.krb5.conf", krb5File);
            // 设置安全认证方式为kerberos
            UserGroupInformation.setConfiguration(jobConf);
            try {
                UserGroupInformation.loginUserFromKeytab(kerUser, keyPath);
            } catch (IOException e) {
                throw new RuntimeException("kerberos login failed:", e);
            }
        }
        FileSystemProxy var3;
        try {
            var3 = new FileSystemProxy(org.apache.hadoop.fs.FileSystem.get(jobConf));
        } finally {
            Thread.currentThread().setContextClassLoader(cl);
        }
        return var3;
    }pentaho-hadoop-shims-hdp26-8.2.2018.11.00-342.jar
<configuration>
    <property>
        <name>hadoop.security.authorization</name>
        <value>true</value>
    </property>
    <property>
        <name>hadoop.security.authentication</name>
        <value>kerberos</value>
    </property>
</configuration><configuration>
    <property>
        <name>krb5_file_location</name>
        <value>C:/Windows/krb5.ini</value>
    </property>
    <property>
        <name>krb5_user_principal</name>
        <value>hadoop/standalone@XTEEN.COM</value>
    </property>
    <property>
        <name>krb5_user_keytab</name>
        <value>D:/Coder/Software/pdi-ce-8.2.0/data-integration/plugins/pentaho-big-data-plugin/hadoop-configurations/hdp26/hadoop.keytab</value>
    </property>
</configuration>


Kettle-Hadoop file插件支持kerberos
原文:https://www.cnblogs.com/xteen/p/12392443.html