You are on page 1of 14

Hadoop 7NameNode

2013-08-03

HDFS NameNode DataNode


NameNodeinode
UNIX SecondaryNameNode
NameNode HDFS

=>
=>DataNode

=> NameNode
=>DataNode DataNode
NameNode DataNode DataNode
InterDatanodeProtocol ClientDatanodeProtocol NameNode

ClientProtocol NameNode
HDFS GFS HDFS POSIX
org.apache.hadoop.fs.FileSystem HDFS

DatanodeProtocol DataNode NameNode DataNode


2

register DataNode
sendHeartbeat/blockReport/blockReceived DataNode offerService
errorReport NameNode Block
BlockReceiver

DataBlockScanner

nextGenerationStamp

commitBlockSynchronization lease lease

NamenodeProtocol NameNode NameNode

namenode :bin/hadoop namenode bin/hadoop

java

org.apache.hadoop.hdfs.server.namenode.NameNodemain --> createNameNode -->


NameNode --> initialize
NameNode :
public class NameNode implements ClientProtocol, DatanodeProtocol,
NamenodeProtocol, FSConstants,
RefreshAuthorizationPolicyProtocol,
4

RefreshUserMappingsProtocol {
//
static{
Configuration.addDefaultResource("hdfs-default.xml");
Configuration.addDefaultResource("hdfs-site.xml");
}

public static final int DEFAULT_PORT = 8020; //


public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
public
static
final
Log
stateChangeLog
=
LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
public FSNamesystem namesystem; // TODO: This should private. Use getNamesystem()
instead. // Datanode
/** RPC server */
private Server server;
/** RPC server for HDFS Services communication.
BackupNode, Datanodes and all other services
should be connecting to this server if it is
configured. Clients should only go to NameNode#server
*/
private Server serviceRpcServer;
/** RPC server address */
private InetSocketAddress serverAddress = null;
/** RPC server for DN address */
protected InetSocketAddress serviceRPCAddress = null;
/** httpServer */
private HttpServer httpServer;
/** HTTP server address */
private InetSocketAddress httpAddress = null;
private Thread emptier;
/** only used for testing purposes */
private boolean stopRequested = false;
/** Is service level authorization enabled? */
private boolean serviceAuthEnabled = false;

static NameNodeInstrumentation myMetrics;


//

FSNamesystem org.apache.hadoop.hdfs.server.namenode
Namenode
5

HttpServer org.apache.hadoop.http Jetty


Namenode HTTP Namenode

//
public static NameNode createNameNode(String argv[],
Configuration conf) throws IOException {
...
StartupOption startOpt = parseArguments(argv);
...
switch (startOpt) {
case FORMAT: // namenode namenode
boolean aborted = format(conf, true);
System.exit(aborted ? 1 : 0);
case FINALIZE: // hadoop
aborted = finalize(conf, true);
System.exit(aborted ? 1 : 0);
default:
}
...
// NameNode initialize
NameNode namenode = new NameNode(conf);
return namenode;
}
private void initialize(Configuration conf) throws IOException {
...
// fsimage edits log
this.namesystem = new FSNamesystem(this, conf);
....
// RPCServer rpc 10 8020
this.server = RPC.getServer(this, socAddr.getHostName(),
socAddr.getPort(), handlerCount, false, conf, namesystem
.getDelegationTokenSecretManager());
startHttpServer(conf);// http http://namenode:50070 hdfs

....
this.server.start(); // RPC server
....
// fs.trash.interval
60
startTrashEmptier(conf);
}
6

public static void main(String argv[]) throws Exception {


try {
...
NameNode namenode = createNameNode(argv, null);
if (namenode != null)
namenode.join();
}
...
}
}

org.apache.hadoop.hdfs.server.namenode.FSNamesystem Namenode
NameNode
FSNamesystem NameNode FSNamesystem

=> FSImage

=>DataNode DataNode

DataNode
DataNodeLRU
FSNamesystem
public class FSNamesystem implements FSConstants, FSNamesystemMBean,
NameNodeMXBean, MetricsSource {
//
public FSDirectory dir;
//BlocksMap Block inode
Datanode
final
BlocksMap
blocksMap
=
new
BlocksMap(DEFAULT_INITIAL_MAP_CAPACITY,DEFAULT_MAP_LOAD_FACTOR);
//
7

public CorruptReplicasMap corruptReplicas = new CorruptReplicasMap();


//datanode
NavigableMap<String, DatanodeDescriptor> datanodeMap = new TreeMap<String,
DatanodeDescriptor>();
//datanodeMap DatanodeDescriptorHeartbeatMonitor

ArrayList<DatanodeDescriptor> heartbeats = new ArrayList<DatanodeDescriptor>();


//

private UnderReplicatedBlocks neededReplications = new UnderReplicatedBlocks();


//
private PendingReplicationBlocks pendingReplications;
//
public LeaseManager leaseManager = new LeaseManager(this);
Daemon hbthread = null; // FSNamesystem heartbeatCheck
Datanode
public Daemon lmthread = null; // LeaseMonitor thread
Daemon smmthread = null; //
threshold

public Daemon replthread = null; // :


Datanode ;
private ReplicationMonitor replmon = null; // Replication metrics
// Datanode -> DatanodeDescriptor
private Host2NodesMap host2DataNodeMap = new Host2NodesMap();

//
Data CenterRack

NetworkTopology clusterMap = new NetworkTopology();


// DNS-name/IP-address -> RackID

private DNSToSwitchMapping dnsToSwitchMapping;


//
ReplicationTargetChooser replicator;
// Datanode Datanode Namenode
8

Namenode
private HostsFileReader hostsReader;
}

FSNamesystem
private void initialize(NameNode nn, Configuration conf) throws IOException {
this.systemStart = now();
setConfigurationParameters(conf);
dtSecretManager = createDelegationTokenSecretManager(conf);
this.nameNodeAddress = nn.getNameNodeAddress();
this.registerMBean(conf); // register the MBean for the FSNamesystemStutus
this.dir = new FSDirectory(this, conf);
StartupOption startOpt = NameNode.getStartupOption(conf);
// fsimage edits
this.dir.loadFSImage(getNamespaceDirs(conf),
getNamespaceEditsDirs(conf), startOpt);
long timeTakenToLoadFSImage = now() - systemStart;
LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
NameNode.getNameNodeMetrics().setFsImageLoadTime(timeTakenToLoadFSImage);
this.safeMode = new SafeModeInfo(conf);
setBlockTotal();
pendingReplications = new PendingReplicationBlocks(
conf.getInt("dfs.replication.pending.timeout.sec",
-1) * 1000L);
if (isAccessTokenEnabled) {
accessTokenHandler = new BlockTokenSecretManager(true,
accessKeyUpdateInterval, accessTokenLifetime);
}
this.hbthread = new Daemon(new HeartbeatMonitor());// Datanode

this.lmthread = new Daemon(leaseManager.new Monitor());//

this.replmon = new ReplicationMonitor();


this.replthread = new Daemon(replmon); //
hbthread.start();
lmthread.start();
replthread.start();
// datanode
this.hostsReader = new HostsFileReader(conf.get("dfs.hosts",""),
conf.get("dfs.hosts.exclude",""));
//,
this.dnthread = new Daemon(new DecommissionManager(this).new Monitor(
conf.getInt("dfs.namenode.decommission.interval", 30),
9

conf.getInt("dfs.namenode.decommission.nodes.per.interval", 5)));
dnthread.start();
this.dnsToSwitchMapping = ReflectionUtils.newInstance(
conf.getClass("topology.node.switch.mapping.impl", ScriptBasedMapping.class,
DNSToSwitchMapping.class), conf);
/* If the dns to swith mapping supports cache, resolve network
* locations of those hosts in the include list,
* and store the mapping in the cache; so future calls to resolve
* will be fast.
*/
if (dnsToSwitchMapping instanceof CachedDNSToSwitchMapping) {
dnsToSwitchMapping.resolve(new ArrayList<String>(hostsReader.getHosts()));
}
InetSocketAddress socAddr = NameNode.getAddress(conf);
this.nameNodeHostName = socAddr.getHostName();
registerWith(DefaultMetricsSystem.INSTANCE);
}

FSDirectory FSNamesystem

FSDirectory hdfs
INode file/block
INode inode Field

INodeDirectory INodeDirectory INode
INode
INodeFile INodeFile INode INodeDirectory
INodeFile
Datanode
INodeFileUnderConstruction HDFS
Namenode
10

INodeFile
INodeFile Hadoop
INodeFileUnderConstruction INodeFile INodeFile
INodeFileUnderConstruction
INodeFileUnderConstruction
HDFS Datanode

FSDirectory FSDirectory

filename->blockset
FSImage fsImage

class FSDirectory implements FSConstants, Closeable {


final INodeDirectoryWithQuota rootDir;// INodeDirectory
hdfs ,
FSImage fsImage; // FSImage ,
}

FSDirectory(FSNamesystem ns, Configuration conf) {


this(new FSImage(), ns, conf);
...
}
FSDirectory(FSImage fsImage, FSNamesystem ns, Configuration conf) {
rootDir = new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME,
ns.createFsOwnerPermissions(new FsPermission((short)0755)),
Integer.MAX_VALUE, -1);
this.fsImage = fsImage;
....
namesystem = ns;
....
}
//FSNamesystem FSDirectory dir loadFSImage fsimage
edits
11

void loadFSImage(Collection<File> dataDirs,Collection<File> editsDirs,StartupOption startOpt)


throws IOException {
// format before starting up if requested
if (startOpt == StartupOption.FORMAT) {// FORMAT

fsImage.setStorageDirectories(dataDirs, editsDirs);// FSImage


${dfs.name.dir},/tmp/hadoop/dfs/name,
fsImage.format();// FSImage
startOpt = StartupOption.REGULAR;
}
try {
if (fsImage.recoverTransitionRead(dataDirs, editsDirs, startOpt)) { //
(${dfs.name.dir})
fsImage.saveNamespace(true);
}
FSEditLog editLog = fsImage.getEditLog();
assert editLog != null : "editLog must be initialized";
if (!editLog.isOpen())
editLog.open();
fsImage.setCheckpointDirectories(null, null);
}
...
}

loadFSImage FSImage
FSImage EditLog
FSImage
EditLog
EditLog FSImage FSImage EditLog
FSImage
namenode
namenode
hdfs
rpc namenode
namenode FSNamesystem namesystem
namesystem

12

namesystem FSDirectory dir dir

dir FSImage fsImage fsImage hdfs


EditLog
Secondrary Namenoe () namenode EditLog
fsimage fsimage EditLog

INode*
NameNode
inode inode INode*

INode*

INode INodeDirectory
13

INodeFileINodeDirectoryWithQuota
INodeFileUnderConstruction
HDFS
INode name/modificationTime accessTime
parent permission
HDFS UNIX/Linux UNIX
groupuser IDpermission
INode long
INode get set
collectSubtreeBlocksAndClear INode
BlockcomputeContentSummary INode

INodeDirectory HDFS
private List<INode> children;
/INodeDirectory get
set INodeDirectoryWithQuota INodeDirectory
INodeDirectory NameSpace
INodeFile HDFS
protected BlockInfo blocks[] = null;
Block BlockInfo Block
INodeFileUnderConstruction
clientName
clientMachine
DataNode clientNode targets

14