You are on page 1of 11

Hadoop 8Lease

2013-08-06

(Lease)Lease LeaseNameNode LeaseLease 3 NameNode code

LeaseManager Lease LeaseManager Monitor Lease holder lastUpdate paths LeaseManager Lease LeaseManager addLease Lease renewLease remove add LeaseManager Monitor Lease Lease FSNamesystem internalReleaseLease LeaseManager

Hadoop UNIX FsAction org.apache.hadoop.fs.permission FsAction FsPermission / applyUMask FsPermission PermissionStatus FsPermission INode PermissionStatus long SerialNumberManager PermissionStatus SerialNumberManager FSImage SerialNumberManager
2

SerialNumberManager INode long FsPermissionMODE USERGROUP PermissionChecker Lease Management hadoop lease GFS hadoop lease client GFS lease client datanode hadoop -- append client write client append lease wirte lease management 1 createwritecomplete lease lease 2 lease lease

lease management
create ClientProtocol create INode client ClientProtocol INode completed
3

client lease client lease lease writer client lease lease client create Namenode create
public void create(String src, FsPermission masked, String clientName, boolean overwrite, boolean createParent, short replication, long blockSize ) throws IOException { String clientMachine = getClientMachine(); if (stateChangeLog.isDebugEnabled()) { stateChangeLog.debug("*DIR* NameNode.create: file " +src+" for "+clientName+" at "+clientMachine); } if (!checkPathLength(src)) { throw new IOException("create: Pathname too long. Limit " + MAX_PATH_LENGTH + " characters, " + MAX_PATH_DEPTH + " levels."); } namesystem.startFile(src, new PermissionStatus(UserGroupInformation.getCurrentUser().getShortUserName(), null, masked), clientName, clientMachine, overwrite, createParent, replication, blockSize); myMetrics.incrNumFilesCreated(); myMetrics.incrNumCreateFileOps(); }

FsNamesystem startFile startFileInternal append create


private synchronized void startFileInternal(String src, PermissionStatus permissions, String holder, String clientMachine, boolean overwrite,
4

boolean append, boolean createParent, short replication, long blockSize ) throws IOException { if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src + ", holder=" + holder + ", clientMachine=" + clientMachine + ", createParent=" + createParent + ", replication=" + replication + ", overwrite=" + overwrite + ", append=" + append); } if (isInSafeMode()) throw new SafeModeException("Cannot create file" + src, safeMode); if (!DFSUtil.isValidName(src)) { throw new IOException("Invalid file name: " + src); } // Verify that the destination does not exist as a directory already. boolean pathExists = dir.exists(src); if (pathExists && dir.isDir(src)) { throw new IOException("Cannot create file "+ src + "; already exists as a directory."); } if (isPermissionEnabled) { if (append || (overwrite && pathExists)) { checkPathAccess(src, FsAction.WRITE); } else { checkAncestorAccess(src, FsAction.WRITE); } } if (!createParent) { verifyParentDir(src); } try { INode myFile = dir.getFileINode(src); recoverLeaseInternal(myFile, src, holder, clientMachine, false);

try { verifyReplication(src, replication, clientMachine); } catch(IOException e) { throw new IOException("failed to create "+e.getMessage()); } if (append) { if (myFile == null) { throw new FileNotFoundException("failed to append to non-existent file " + src + " on client " + clientMachine); } else if (myFile.isDirectory()) { throw new IOException("failed to append to directory " + src +" on client " + clientMachine); } } else if (!dir.isValidToCreate(src)) { if (overwrite) { delete(src, true); } else { throw new IOException("failed to create file " + src +" on client " + clientMachine +" either because the filename is invalid or the file exists"); } } DatanodeDescriptor clientNode = host2DataNodeMap.getDatanodeByHost(clientMachine); if (append) { // // Replace current node with a INodeUnderConstruction. // Recreate in-memory lease record. // INodeFile node = (INodeFile) myFile; INodeFileUnderConstruction cons = new INodeFileUnderConstruction( node.getLocalNameBytes(), node.getReplication(), node.getModificationTime(), node.getPreferredBlockSize(), node.getBlocks(), node.getPermissionStatus(), holder, clientMachine, clientNode); dir.replaceNode(src, node, cons);
6

leaseManager.addLease(cons.clientName, src); } else { // Now we can add the name to the filesystem. This file has no // blocks associated with it. // checkFsObjectLimit(); // increment global generation stamp long genstamp = nextGenerationStamp(); INodeFileUnderConstruction newNode = dir.addFile(src, permissions, replication, blockSize, holder, clientMachine, clientNode, genstamp); if (newNode == null) { throw new IOException("DIR* NameSystem.startFile: " + "Unable to add file to namespace."); } leaseManager.addLease(newNode.clientName, src); if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: " +"add "+src+" to namespace for "+holder); } } } catch (IOException ie) { NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " +ie.getMessage()); throw ie; } }

newNode leaseManager.addLease (newNode.clientName, src);


/** * Adds (or re-adds) the lease for the specified file. */ synchronized Lease addLease(String holder, String src) { Lease lease = getLease(holder); if (lease == null) { lease = new Lease(holder); leases.put(holder, lease); sortedLeases.add(lease); } else { renewLease(lease); }
7

sortedLeasesByPath.put(src, lease); lease.paths.add(src); return lease; }

lease lease lease lease lease management write client lease completed create
private void finalizeINodeFileUnderConstruction(String src, INodeFileUnderConstruction pendingFile) throws IOException { NameNode.stateChangeLog.info("Removing lease on file " + src + " from client " + pendingFile.clientName); leaseManager.removeLease(pendingFile.clientName, src); // The file is no longer pending. // Create permanent INode, update blockmap INodeFile newFile = pendingFile.convertToInodeFile(); dir.replaceNode(src, pendingFile, newFile); // close file and persist block allocations for this file dir.closeFile(src, newFile); checkReplicationFactor(newFile); }

client lease pendingFile INodeFile create lease client complete INodeFile

lease management
FsNamesystem initialize this.lmthread = new Daemon(leaseManager.new Monitor());
8

lmthread.start();
/****************************************************** * Monitor checks for leases that have expired, * and disposes of them. ******************************************************/ class Monitor implements Runnable { final String name = getClass().getSimpleName(); /** Check leases periodically. */ public void run() { for(; fsnamesystem.isRunning(); ) { synchronized(fsnamesystem) { checkLeases(); } try { Thread.sleep(2000); } catch(InterruptedException ie) { if (LOG.isDebugEnabled()) { LOG.debug(name + " is interrupted", ie); } } } } }

/** Check the leases beginning from the oldest. */ synchronized void checkLeases() { for(; sortedLeases.size() > 0; ) { final Lease oldest = sortedLeases.first(); if (!oldest.expiredHardLimit()) { return; // } LOG.info("Lease " + oldest + " has expired hard limit"); final List<String> removing = new ArrayList<String>(); // need to create a copy of the oldest lease paths, becuase // internalReleaseLease() removes paths corresponding to empty files, // i.e. it needs to modify the collection being iterated over
9

// causing ConcurrentModificationException String[] leasePaths = new String[oldest.getPaths().size()]; oldest.getPaths().toArray(leasePaths); for(String p : leasePaths) { try { fsnamesystem.internalReleaseLeaseOne(oldest, p); } catch (IOException e) { LOG.error("Cannot release the path "+p+" in the lease "+oldest, e); removing.add(p); } } for(String p : removing) { removeLease(oldest, p); } } }

fsnamesystem.internalReleaseLease(oldest, p); lease lease removeLease(oldest, p); lease lease LeaseManager private SortedMap leases = new TreeMap();holder->lease map private SortedSet sortedLeases = new TreeSet(); lease private SortedMap sortedLeasesByPath = new TreeMap();paths->leases map addLease lease leases sortedLeases lease client lease lease sortedLeasesByPath --lease recovery * Lease Recovery Algorithm * 1) Namenode retrieves lease information
10

* 2) For each file f in the lease, consider the last block b of f * 2.1) Get the datanodes which contains b * 2.2) Assign one of the datanodes as the primary datanode p

* 2.3) p obtains a new generation stamp form the namenode * 2.4) p get the block info from each datanode * 2.5) p computes the minimum block length * 2.6) p updates the datanodes, which have a valid generation stamp, * with the new generation stamp and the minimum block length * 2.7) p acknowledges the namenode the update results

* 2.8) Namenode updates the BlockInfo * 2.9) Namenode removes f from the lease * and removes the lease once all files have been removed * 2.10) Namenode commit changes to edit log

11