Professional Documents
Culture Documents
2013-08-06
LeaseManager Lease LeaseManager Monitor Lease holder lastUpdate paths LeaseManager Lease LeaseManager addLease Lease renewLease remove add LeaseManager Monitor Lease Lease FSNamesystem internalReleaseLease LeaseManager
Hadoop UNIX FsAction org.apache.hadoop.fs.permission FsAction FsPermission / applyUMask FsPermission PermissionStatus FsPermission INode PermissionStatus long SerialNumberManager PermissionStatus SerialNumberManager FSImage SerialNumberManager
2
SerialNumberManager INode long FsPermissionMODE USERGROUP PermissionChecker Lease Management hadoop lease GFS hadoop lease client GFS lease client datanode hadoop -- append client write client append lease wirte lease management 1 createwritecomplete lease lease 2 lease lease
lease management
create ClientProtocol create INode client ClientProtocol INode completed
3
client lease client lease lease writer client lease lease client create Namenode create
public void create(String src, FsPermission masked, String clientName, boolean overwrite, boolean createParent, short replication, long blockSize ) throws IOException { String clientMachine = getClientMachine(); if (stateChangeLog.isDebugEnabled()) { stateChangeLog.debug("*DIR* NameNode.create: file " +src+" for "+clientName+" at "+clientMachine); } if (!checkPathLength(src)) { throw new IOException("create: Pathname too long. Limit " + MAX_PATH_LENGTH + " characters, " + MAX_PATH_DEPTH + " levels."); } namesystem.startFile(src, new PermissionStatus(UserGroupInformation.getCurrentUser().getShortUserName(), null, masked), clientName, clientMachine, overwrite, createParent, replication, blockSize); myMetrics.incrNumFilesCreated(); myMetrics.incrNumCreateFileOps(); }
boolean append, boolean createParent, short replication, long blockSize ) throws IOException { if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src + ", holder=" + holder + ", clientMachine=" + clientMachine + ", createParent=" + createParent + ", replication=" + replication + ", overwrite=" + overwrite + ", append=" + append); } if (isInSafeMode()) throw new SafeModeException("Cannot create file" + src, safeMode); if (!DFSUtil.isValidName(src)) { throw new IOException("Invalid file name: " + src); } // Verify that the destination does not exist as a directory already. boolean pathExists = dir.exists(src); if (pathExists && dir.isDir(src)) { throw new IOException("Cannot create file "+ src + "; already exists as a directory."); } if (isPermissionEnabled) { if (append || (overwrite && pathExists)) { checkPathAccess(src, FsAction.WRITE); } else { checkAncestorAccess(src, FsAction.WRITE); } } if (!createParent) { verifyParentDir(src); } try { INode myFile = dir.getFileINode(src); recoverLeaseInternal(myFile, src, holder, clientMachine, false);
try { verifyReplication(src, replication, clientMachine); } catch(IOException e) { throw new IOException("failed to create "+e.getMessage()); } if (append) { if (myFile == null) { throw new FileNotFoundException("failed to append to non-existent file " + src + " on client " + clientMachine); } else if (myFile.isDirectory()) { throw new IOException("failed to append to directory " + src +" on client " + clientMachine); } } else if (!dir.isValidToCreate(src)) { if (overwrite) { delete(src, true); } else { throw new IOException("failed to create file " + src +" on client " + clientMachine +" either because the filename is invalid or the file exists"); } } DatanodeDescriptor clientNode = host2DataNodeMap.getDatanodeByHost(clientMachine); if (append) { // // Replace current node with a INodeUnderConstruction. // Recreate in-memory lease record. // INodeFile node = (INodeFile) myFile; INodeFileUnderConstruction cons = new INodeFileUnderConstruction( node.getLocalNameBytes(), node.getReplication(), node.getModificationTime(), node.getPreferredBlockSize(), node.getBlocks(), node.getPermissionStatus(), holder, clientMachine, clientNode); dir.replaceNode(src, node, cons);
6
leaseManager.addLease(cons.clientName, src); } else { // Now we can add the name to the filesystem. This file has no // blocks associated with it. // checkFsObjectLimit(); // increment global generation stamp long genstamp = nextGenerationStamp(); INodeFileUnderConstruction newNode = dir.addFile(src, permissions, replication, blockSize, holder, clientMachine, clientNode, genstamp); if (newNode == null) { throw new IOException("DIR* NameSystem.startFile: " + "Unable to add file to namespace."); } leaseManager.addLease(newNode.clientName, src); if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: " +"add "+src+" to namespace for "+holder); } } } catch (IOException ie) { NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " +ie.getMessage()); throw ie; } }
lease lease lease lease lease management write client lease completed create
private void finalizeINodeFileUnderConstruction(String src, INodeFileUnderConstruction pendingFile) throws IOException { NameNode.stateChangeLog.info("Removing lease on file " + src + " from client " + pendingFile.clientName); leaseManager.removeLease(pendingFile.clientName, src); // The file is no longer pending. // Create permanent INode, update blockmap INodeFile newFile = pendingFile.convertToInodeFile(); dir.replaceNode(src, pendingFile, newFile); // close file and persist block allocations for this file dir.closeFile(src, newFile); checkReplicationFactor(newFile); }
lease management
FsNamesystem initialize this.lmthread = new Daemon(leaseManager.new Monitor());
8
lmthread.start();
/****************************************************** * Monitor checks for leases that have expired, * and disposes of them. ******************************************************/ class Monitor implements Runnable { final String name = getClass().getSimpleName(); /** Check leases periodically. */ public void run() { for(; fsnamesystem.isRunning(); ) { synchronized(fsnamesystem) { checkLeases(); } try { Thread.sleep(2000); } catch(InterruptedException ie) { if (LOG.isDebugEnabled()) { LOG.debug(name + " is interrupted", ie); } } } } }
/** Check the leases beginning from the oldest. */ synchronized void checkLeases() { for(; sortedLeases.size() > 0; ) { final Lease oldest = sortedLeases.first(); if (!oldest.expiredHardLimit()) { return; // } LOG.info("Lease " + oldest + " has expired hard limit"); final List<String> removing = new ArrayList<String>(); // need to create a copy of the oldest lease paths, becuase // internalReleaseLease() removes paths corresponding to empty files, // i.e. it needs to modify the collection being iterated over
9
// causing ConcurrentModificationException String[] leasePaths = new String[oldest.getPaths().size()]; oldest.getPaths().toArray(leasePaths); for(String p : leasePaths) { try { fsnamesystem.internalReleaseLeaseOne(oldest, p); } catch (IOException e) { LOG.error("Cannot release the path "+p+" in the lease "+oldest, e); removing.add(p); } } for(String p : removing) { removeLease(oldest, p); } } }
fsnamesystem.internalReleaseLease(oldest, p); lease lease removeLease(oldest, p); lease lease LeaseManager private SortedMap leases = new TreeMap();holder->lease map private SortedSet sortedLeases = new TreeSet(); lease private SortedMap sortedLeasesByPath = new TreeMap();paths->leases map addLease lease leases sortedLeases lease client lease lease sortedLeasesByPath --lease recovery * Lease Recovery Algorithm * 1) Namenode retrieves lease information
10
* 2) For each file f in the lease, consider the last block b of f * 2.1) Get the datanodes which contains b * 2.2) Assign one of the datanodes as the primary datanode p
* 2.3) p obtains a new generation stamp form the namenode * 2.4) p get the block info from each datanode * 2.5) p computes the minimum block length * 2.6) p updates the datanodes, which have a valid generation stamp, * with the new generation stamp and the minimum block length * 2.7) p acknowledges the namenode the update results
* 2.8) Namenode updates the BlockInfo * 2.9) Namenode removes f from the lease * and removes the lease once all files have been removed * 2.10) Namenode commit changes to edit log
11