Professional Documents
Culture Documents
2013-08-11
HDFS HDFS
HDFS
// FileSystem
public class FileCopyWithProgress {
public static void main(String[] args) throws Exception {
String localSrc = args[0];
String dst = args[1];
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
Configuration conf = new Configuration();
// FileSystem HDFS
FileSystem fs = FileSystem.get(URI.create(dst), conf);
OutputStream out = fs.create(new Path(dst), new Progressable() {
//MapReduce
public void progress() {
System.out.print(".");
}
});
IOUtils.copyBytes(in, out, 4096, true);
}
}
hadoop FileCopyWithProgress input/1.txt hdfs://localhost/user/hadoop/1.txt
// FileSystem API
public class FileSystemCat {
public static void main(String[] args) throws Exception {
String uri = args[0];
Configuration conf = new Configuration();
// FileSystem HDFS
FileSystem fs = FileSystem.get(URI.create(uri), conf);
InputStream in = null;
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
1
}
}
}
hadoop FileSystemCat hdfs://localhost/user/tom/quangle.txt
HDFS FileSystem
FileSystem
FileSystem
FileSystem
CACHE: cache
cache
statisticsTable:
key: CACHE
statistics:
deleteOnExit: Java
clientFinalizer:
FileSystem
FileSystem
getFileBlockLocations:
exists:
isFile:
getContentSummary:
listStatus:
globStatus: Linux
getHomeDirectory:
get/set*etWorkingDirectory:
3
copyFromLocalFile:
copyToLocalFile:
moveFromLocalFile:
moveToLocalFile:
getFileStatus:
setPermission:
setOwner:
setTimes:
getAllStatistics:
getStatistics:
uri
FileSystem.Cache
HashMapMap
Key FileSystem
Key
4
org.apache.hadoop.security.UserGroupInformation
schemeauthority ugi
FileSystem.Statistics
bytesWritten: AtomicLong
Path
/
URI
BlockLocation
5
HDFS Block
FileStatus
/ UnixLinux
private Path path;
private long length;
private boolean isdir;
private short block_replication;
private long blocksize;
private long modification_time;
private long access_time;
private FsPermission permission;
private String owner;
private String group;
/
/
/
/
/
/
/
FsPermission
/ POSIX
FSDataOutputStream
DataOutputStream Syncable
sync
FSDataOutputStream FileSystem
FSDataInputStream
DataInputStream Seekable
6
PositionReadable seek
FSDataInputStream FileSystem
FileSystem FileSystem
FileSystem.get
/** Returns the FileSystem for this URI's scheme and authority. The scheme
* of the URI determines a configuration property name,
* <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
* The entire URI is passed to the FileSystem instance's initialize method.
*/
public static FileSystem get(URI uri, Configuration conf) throws IOException {
String scheme = uri.getScheme();
String authority = uri.getAuthority();
if (scheme == null) {
return get(conf);
}
if (authority == null) {
// no authority
URI defaultUri = getDefaultUri(conf);
if (scheme.equals(defaultUri.getScheme())
// if scheme matches default
&& defaultUri.getAuthority() != null) { // & default has authority
return get(defaultUri, conf);
// return default
}
}
String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
if (conf.getBoolean(disableCacheName, false)) {
return createFileSystem(uri, conf);
}
return CACHE.get(uri, conf);
}
FileSystem
private static FileSystem createFileSystem(URI uri, Configuration conf
) throws IOException {
Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null);
7
FileSystem
Scheme hdfs FileSystem fs.hdfs.impl
org.apache.hadoop.hdfs.DistributedFileSystem
JAVA
DistributedFileSystem DFS
FileSystem HDFS
DistributedFileSystem
DFSClient DFSClient Hadoop FileSystem
Hadoop
DistributedFileSystem
HDFS
org.apache.hadoop.fs DistributedFileSystem
DFSClien DFSClient
hdfs-default.xml
hdfs-site.xml namenode HDFS
uri namenode
makeQualifiedgetPathName
DFSClient Hadoop
ClientProtocol NameNode
Socket DataNode /Hadoop
DistributedFileSystem DFSClient
DFSClient
DFSClient
DFSClient
MAX_BLOCK_ACQUIRE_FAILURES 3
10
TCP_WINDOW_SIZE TCP
dfs.socket.timeout
dfs.datanode.socket.write.timeout
dfs.write.packet.size packet
dfs.client.max.block.acquire.failures
mapred.task.id map reduce ID clientName
DFSClient_ clientName DFSClient_
dfs.block.size
11
dfs.replication
DFSClient RPC
namenode
checkOpen
clientRunning ;
getBlockLocations namenode LocatedBlocks
LocatedBlocks datanode
BlockLocation BlockLocation ;
getFileChecksum checksum
datanode checksum
checksum MD5 datanode
datanode MD5 MD5
bestNode deadNodes
12