You are on page 1of 12

Hadoop 10HDFS

2013-08-11

HDFS HDFS
HDFS

// FileSystem
public class FileCopyWithProgress {
public static void main(String[] args) throws Exception {
String localSrc = args[0];
String dst = args[1];
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
Configuration conf = new Configuration();
// FileSystem HDFS
FileSystem fs = FileSystem.get(URI.create(dst), conf);
OutputStream out = fs.create(new Path(dst), new Progressable() {
//MapReduce
public void progress() {
System.out.print(".");
}
});
IOUtils.copyBytes(in, out, 4096, true);
}
}
hadoop FileCopyWithProgress input/1.txt hdfs://localhost/user/hadoop/1.txt
// FileSystem API
public class FileSystemCat {
public static void main(String[] args) throws Exception {
String uri = args[0];
Configuration conf = new Configuration();
// FileSystem HDFS
FileSystem fs = FileSystem.get(URI.create(uri), conf);
InputStream in = null;
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
1

}
}
}
hadoop FileSystemCat hdfs://localhost/user/tom/quangle.txt

HDFS FileSystem

FileSystem

FileSystem
FileSystem


CACHE: cache
cache
statisticsTable:
key: CACHE
statistics:
deleteOnExit: Java

clientFinalizer:
FileSystem
FileSystem

getFileBlockLocations:

exists:
isFile:
getContentSummary:

listStatus:
globStatus: Linux

getHomeDirectory:
get/set*etWorkingDirectory:
3

copyFromLocalFile:
copyToLocalFile:
moveFromLocalFile:
moveToLocalFile:
getFileStatus:
setPermission:
setOwner:
setTimes:
getAllStatistics:
getStatistics:

get: URI FileSystem


createFileSystem
createFileSystem: URI scheme scheme
FileSystem
Hadoop HDFS

FileSystem fs = FileSystem.get(URI.create(dst), conf);

uri

FileSystem.Cache
HashMapMap
Key FileSystem
Key
4

scheme: URI URIhttp://server/index.html


scheme http
authority: URI authority server
authority
ugi:

org.apache.hadoop.security.UserGroupInformation
schemeauthority ugi

FileSystem.Statistics

scheme: HDFS hdfs


bytesRead: AtomicLong

bytesWritten: AtomicLong

private final String scheme;


private AtomicLong bytesRead = new AtomicLong();
private AtomicLong bytesWritten = new AtomicLong();
private AtomicInteger readOps = new AtomicInteger();
private AtomicInteger largeReadOps = new AtomicInteger();
private AtomicInteger writeOps = new AtomicInteger();

Path
/
URI

BlockLocation
5

HDFS Block

private String[] hosts; //hostnames of datanodes


private String[] names; //hostname:portNumber of datanodes
private String[] topologyPaths; // full path name in network topology
private long offset; //offset of the of the block in the file
private long length; //

FileStatus
/ UnixLinux
private Path path;
private long length;
private boolean isdir;
private short block_replication;
private long blocksize;
private long modification_time;
private long access_time;
private FsPermission permission;
private String owner;
private String group;

/
/

/
/
/
/
/

FsPermission
/ POSIX

FSDataOutputStream
DataOutputStream Syncable
sync

FSDataOutputStream FileSystem

FSDataInputStream
DataInputStream Seekable
6

PositionReadable seek

FSDataInputStream FileSystem

FileSystem FileSystem
FileSystem.get
/** Returns the FileSystem for this URI's scheme and authority. The scheme
* of the URI determines a configuration property name,
* <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
* The entire URI is passed to the FileSystem instance's initialize method.
*/
public static FileSystem get(URI uri, Configuration conf) throws IOException {
String scheme = uri.getScheme();
String authority = uri.getAuthority();
if (scheme == null) {
return get(conf);
}

// no scheme: use default FS

if (authority == null) {
// no authority
URI defaultUri = getDefaultUri(conf);
if (scheme.equals(defaultUri.getScheme())
// if scheme matches default
&& defaultUri.getAuthority() != null) { // & default has authority
return get(defaultUri, conf);
// return default
}
}
String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
if (conf.getBoolean(disableCacheName, false)) {
return createFileSystem(uri, conf);
}
return CACHE.get(uri, conf);
}

FileSystem
private static FileSystem createFileSystem(URI uri, Configuration conf
) throws IOException {
Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null);
7

LOG.debug("Creating filesystem for " + uri);


if (clazz == null) {
throw new IOException("No FileSystem for scheme: " + uri.getScheme());
}
FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
fs.initialize(uri, conf);
return fs;
}

FileSystem
Scheme hdfs FileSystem fs.hdfs.impl
org.apache.hadoop.hdfs.DistributedFileSystem
JAVA
DistributedFileSystem DFS
FileSystem HDFS
DistributedFileSystem
DFSClient DFSClient Hadoop FileSystem
Hadoop
DistributedFileSystem

HDFS
org.apache.hadoop.fs DistributedFileSystem
DFSClien DFSClient
hdfs-default.xml
hdfs-site.xml namenode HDFS
uri namenode

checkPath schemeport authority


9

makeQualifiedgetPathName

DFSClient Hadoop
ClientProtocol NameNode
Socket DataNode /Hadoop
DistributedFileSystem DFSClient
DFSClient
DFSClient

DFSClient
MAX_BLOCK_ACQUIRE_FAILURES 3
10

TCP_WINDOW_SIZETCP 128KB seek

TCP_WINDOW_SIZE TCP

rpcNamenode RPC namenode


namenode rcpNamenode Retry
RetryPolicy
leasechecker
defaultBlockSize 64MB
defaultReplication 3
socketTimeoutsocket 60
datanodeWriteTimeoutdatanode 480
writePacketSize packet 64KB
maxBlockAcquireFailures 3
datanode
DSClient

dfs.socket.timeout
dfs.datanode.socket.write.timeout
dfs.write.packet.size packet
dfs.client.max.block.acquire.failures
mapred.task.id map reduce ID clientName
DFSClient_ clientName DFSClient_
dfs.block.size
11

dfs.replication
DFSClient RPC
namenode

checkOpen
clientRunning ;
getBlockLocations namenode LocatedBlocks
LocatedBlocks datanode
BlockLocation BlockLocation ;
getFileChecksum checksum
datanode checksum
checksum MD5 datanode
datanode MD5 MD5
bestNode deadNodes

12