1、创建目录
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14 |
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class MakeDir { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path("/user/hadoop/hdfs/xxxx"); fs.create(path); fs.close(); }} |
2、删除目录
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 |
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteDir { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path("/user/hadoop/hdfs/xxxx"); fs.delete(path); fs.close(); }} |
3、写文件
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 |
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class WriteFile { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path("/user/hadoop/hdfs/xxxx.txt"); FSDataOutputStream out = fs.create(path); out.writeUTF("da jia hao,cai shi zhen de hao!"); fs.close(); }} |
4、读文件
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 |
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class ReadFile { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path("/user/hadoop/hdfs/xxxx.txt"); if(fs.exists(path)){ FSDataInputStream is = fs.open(path); FileStatus status = fs.getFileStatus(path); byte[] buffer = new
byte[Integer.parseInt(String.valueOf(status.getLen()))]; is.readFully(0, buffer); is.close(); fs.close(); System.out.println(buffer.toString()); } }} |
5、上传本地文件到HDFS
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 |
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyFromLocalFile { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path src = new
Path("/home/hadoop/xxxx.txt"); Path dst = new
Path("/user/hadoop/hdfs/"); fs.copyFromLocalFile(src, dst); fs.close(); }} |
6、删除文件
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 |
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteFile { public
static void main(String[] args) throws
IOException { Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path("/user/hadoop/hdfs/xxxx.txt"); fs.delete(path); fs.close(); }} |
7、获取给定目录下的所有子目录以及子文件
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 |
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class GetAllChildFile { static
Configuration conf = new
Configuration(); public
static void main(String[] args)throws
IOException { FileSystem fs = FileSystem.get(conf); Path path = new
Path("/user/hadoop"); getFile(path,fs); //fs.close(); } public
static void getFile(Path path,FileSystem fs) throws
IOException { FileStatus[] fileStatus = fs.listStatus(path); for(int
i=0;i<fileStatus.length;i++){ if(fileStatus[i].isDir()){ Path p = new
Path(fileStatus[i].getPath().toString()); getFile(p,fs); }else{ System.out.println(fileStatus[i].getPath().toString()); } } }} |
8、查找某个文件在HDFS集群的位置
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 |
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public
class FindFile { public
static void main(String[] args) throws
IOException { getFileLocal(); } /** * 查找某个文件在HDFS集群的位置 * @Title: * @Description: * @param * @return * @throws */ public
static void getFileLocal() throws
IOException{ Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new
Path("/user/hadoop/hdfs/xxxx.txt"); FileStatus status = fs.getFileStatus(path); BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen()); int
length = locations.length; for(int
i=0;i<length;i++){ String[] hosts = locations[i].getHosts(); System.out.println("block_"
+ i + "_location:"
+ hosts[i]); } } } |
9、HDFS集群上所有节点名称信息
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 |
package
com.hadoop.file;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import
org.apache.hadoop.hdfs.protocol.DatanodeInfo;public
class FindFile { public
static void main(String[] args) throws
IOException { getHDFSNode(); } /** * HDFS集群上所有节点名称信息 * @Title: * @Description: * @param * @return * @throws */ public
static void getHDFSNode() throws
IOException{ Configuration conf = new
Configuration(); FileSystem fs = FileSystem.get(conf); DistributedFileSystem dfs = (DistributedFileSystem)fs; DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats(); for(int
i=0;i<dataNodeStats.length;i++){ System.out.println("DataNode_"
+ i + "_Node:"
+ dataNodeStats[i].getHostName()); } } } |
伪分布环境下操作FileSystem时候会出现异常:
Java代码如下:
|
1
2 |
FileSystem fs = FileSystem.get(conf); |
抛出异常如下:
Exception in thread
"main" java.lang.IllegalArgumentException: Wrong FS:
hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in, expected:
file:///
at
org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at
org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357)
at
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245)
at
org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125)
at
org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283)
at
org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356)
at
com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23)
解决方案:
将hadoop的core-site.xml和hdfs-site.xml放到当前工程下(Eclipse工作目录的bin文件夹下面)即可。

总结:
因为是访问远程的HDFS 需要通过URI来获得FileSystem。
Hadoop HDFS文件操作的Java代码,布布扣,bubuko.com
原文:http://www.cnblogs.com/wuzhenquan/p/3617751.html