You are on page 1of 7

Hadoop 9Heartbeat

2013-08-11

Hadoop RPC 1. hadoop master/slave master Namenode Jobtracker slave Datanode Tasktracker 2. master ipc server slave 3. slave master 3 master heartbeat.recheck.interval master master slave 4. namenode datanode jobtracker tasktracker Datanode Namenode Datanode offerService
/** * Main loop for the DataNode. Runs until shutdown, * forever calling remote NameNode functions. */ public void offerService() throws Exception { LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" + " Initial delay: " + initialBlockReportDelay + "msec"); // // Now loop for a long time.... // while (shouldRun) { try { long startTime = now();
1

// // Every so often, send heartbeat or block-report // if (startTime - lastHeartbeat > heartBeatInterval) { // // All heartbeat messages include following info: // -- Datanode name // -- data transfer port // -- Total capacity // -- Bytes remaining // lastHeartbeat = startTime; DatanodeCommand[] cmds = namenode.sendHeartbeat(dnRegistration, data.getCapacity(), data.getDfsUsed(), data.getRemaining(), xmitsInProgress.get(), getXceiverCount()); myMetrics.addHeartBeat(now() - startTime); //LOG.info("Just sent heartbeat, with name " + localName); if (!processCommand(cmds)) continue; } } // while (shouldRun) } // offerService

Hadoop Datanode Namenode 2 2 JVM Datanode namenode namenode public DatanodeProtocol namenode = null; NameNode
public class NameNode implements ClientProtocol, DatanodeProtocol, NamenodeProtocol, FSConstants, RefreshAuthorizationPolicyProtocol, RefreshUserMappingsProtocol

namenode DatanodeProtocol Hadoop RPC


2

Datanode Namenode sendHeartbeat() DataNode NameNode DataNode startDataNode


// connect to name node this.namenode = (DatanodeProtocol) RPC.waitForProxy(DatanodeProtocol.class, DatanodeProtocol.versionID, nameNodeAddr, conf);

namenode Namenode Datanode Namenode RPC Datanode Namenode heartbeat 1) datanode namenode proxy 2) datanode namenode proxy sendHeartbeat 3) datanode namenode ( ) Invocation client.call 4) client call Invocation Call 5) client call namenode 6) namenode namenode Call process DatanodeCommand[] sendHeartbeat
/** * Data node notify the name node that it is alive * Return an array of block-oriented commands for the datanode to execute. * This will be either a transfer or a delete operation. */ public DatanodeCommand[] sendHeartbeat(DatanodeRegistration nodeReg,
3

long capacity, long dfsUsed, long remaining, int xmitsInProgress, int xceiverCount) throws IOException { verifyRequest(nodeReg); return namesystem.handleHeartbeat(nodeReg, capacity, dfsUsed, remaining, xceiverCount, xmitsInProgress); }

DataNode NameNode DatanodeRegistration DatanodeCommand DatanodeCommand

DatanodeProtocol DatanodeCommand
/** * Determines actions that data node should perform * when receiving a datanode command. */ final static int DNA_UNKNOWN = 0; // unknown action final static int DNA_TRANSFER = 1; // transfer blocks to another datanode final static int DNA_INVALIDATE = 2; // invalidate blocks final static int DNA_SHUTDOWN = 3; // shutdown node final static int DNA_REGISTER = 4; // re-register final static int DNA_FINALIZE = 5; // finalize previous upgrade
4

final static int DNA_RECOVERBLOCK = 6; // request a block recovery final static int DNA_ACCESSKEYUPDATE = 7; // update access key final static int DNA_BALANCERBANDWIDTHUPDATE = 8; // update balancer bandwidth

FSNamesystem.handleHeartbeat 1 getDatanode DatanodeDescriptor nodeinfo null NameNode StorageID DatanodeCommand.REGISTER DataNode 2 isDecommissioned DisallowedDatanodeException 3 nodeinfo DatanodeCommand.REGISTER DataNode 4 capacityTotalcapacityUsedcapacityRemaining totalLoad 5 DatanodeCommand
/** * The given node has reported in. This method should: * 1) Record the heartbeat, so the datanode isn't timed out * 2) Adjust usage stats for future block allocation * * If a substantial amount of time passed since the last datanode * heartbeat then request an immediate block report. * * @return an array of datanode commands * @throws IOException */ DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg, long capacity, long dfsUsed, long remaining, int xceiverCount, int xmitsInProgress) throws IOException { DatanodeCommand cmd = null; synchronized (heartbeats) { synchronized (datanodeMap) { DatanodeDescriptor nodeinfo = null; try { nodeinfo = getDatanode(nodeReg);
5

} catch(UnregisteredDatanodeException e) { return new DatanodeCommand[]{DatanodeCommand.REGISTER}; } // Check if this datanode should actually be shutdown instead. if (nodeinfo != null && shouldNodeShutdown(nodeinfo)) { setDatanodeDead(nodeinfo); throw new DisallowedDatanodeException(nodeinfo); } if (nodeinfo == null || !nodeinfo.isAlive) { return new DatanodeCommand[]{DatanodeCommand.REGISTER}; } updateStats(nodeinfo, false); nodeinfo.updateHeartbeat(capacity, dfsUsed, remaining, xceiverCount); updateStats(nodeinfo, true); //check lease recovery cmd = nodeinfo.getLeaseRecoveryCommand(Integer.MAX_VALUE); if (cmd != null) { return new DatanodeCommand[] {cmd}; } ArrayList<DatanodeCommand> cmds = new ArrayList<DatanodeCommand>(); //check pending replication cmd = nodeinfo.getReplicationCommand( maxReplicationStreams - xmitsInProgress); if (cmd != null) { cmds.add(cmd); } //check block invalidation cmd = nodeinfo.getInvalidateBlocks(blockInvalidateLimit); if (cmd != null) { cmds.add(cmd); } // check access key update if (isAccessTokenEnabled && nodeinfo.needKeyUpdate) { cmds.add(new KeyUpdateCommand(accessTokenHandler.exportKeys())); nodeinfo.needKeyUpdate = false; } // check for balancer bandwidth update if (nodeinfo.getBalancerBandwidth() > 0) { cmds.add(new BalancerBandwidthCommand(nodeinfo.getBalancerBandwidth()));
6

// set back to 0 to indicate that datanode has been sent the new value nodeinfo.setBalancerBandwidth(0); } if (!cmds.isEmpty()) { return cmds.toArray(new DatanodeCommand[cmds.size()]); } } } //check distributed upgrade cmd = getDistributedUpgradeCommand(); if (cmd != null) { return new DatanodeCommand[] {cmd}; } return null; }