From 00c63e9645021923dbbef1d868873dbf5a9fc2f2 Mon Sep 17 00:00:00 2001 From: Sercan Tekin Date: Mon, 5 Jun 2023 10:13:21 -0400 Subject: [PATCH] HIVE-27317: Temporary (local) session files cleanup improvements --- .../session/TestClearDanglingScratchDir.java | 62 +++++++++++++++++ .../ql/session/ClearDanglingScratchDir.java | 67 ++++++++++++++----- 2 files changed, 113 insertions(+), 16 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java index 82d3db5910b9..0286b6c96c0f 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java @@ -18,12 +18,14 @@ package org.apache.hadoop.hive.ql.session; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.PrintStream; import java.util.UUID; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -129,4 +131,64 @@ public void testClearDanglingScratchDir() throws Exception { Assert.assertEquals(StringUtils.countMatches(stderr.toString(), "removed"), 1); ss.close(); } + + /** + * Testing behaviour of ClearDanglingScratchDir service over local tmp files/dirs + * @throws Exception + */ + @Test + public void testLocalDanglingFilesCleaning() throws Exception { + HiveConf conf = new HiveConf(); + conf.set("fs.default.name", "file:///"); + FileSystem fs = FileSystem.get(conf); + + // constants + String appId = "appId_" + System.currentTimeMillis(); + String userName = System.getProperty("user.name"); + String hdfs = "hdfs"; + String inuse = "inuse.lck"; + String l = File.separator; + + // simulating hdfs dangling dir and its inuse.lck file + Path hdfsRootDir = new Path( HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR) + l + userName + l + hdfs); + Path hdfsSessionDir = new Path(hdfsRootDir + l + userName + l + appId); + Path hdfsSessionLock = new Path(hdfsSessionDir + l + inuse); + fs.create(hdfsSessionLock); + + // simulating local dangling files + String localTmpDir = HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR); + Path localSessionDir = new Path(localTmpDir + l + appId); + Path localPipeOutFileRemove = new Path(localTmpDir + l + + appId + "-started-with-session-name.pipeout"); + Path localPipeOutFileNotRemove = new Path(localTmpDir + l + + "not-started-with-session-name-" + appId + ".pipeout"); + Path localPipeOutFileFailRemove = new Path(localTmpDir + l + + appId + "-started-with-session-name-but-fail-delete.pipeout"); + + // Create dirs/files + fs.mkdirs(localSessionDir); + fs.create(localPipeOutFileRemove); + fs.create(localPipeOutFileNotRemove); + fs.create(localPipeOutFileFailRemove); + + // Set permission for localPipeOutFileFailRemove file as not writable + // This will avoid file to be deleted as we check whether it is writable or not first + fs.setPermission(localPipeOutFileFailRemove, FsPermission.valueOf("-r--r--r--")); + + // the main service will be identifying which session files/dirs are dangling + ClearDanglingScratchDir clearDanglingScratchDirMain = new ClearDanglingScratchDir(false, + false, true, hdfsRootDir.toString(), conf); + clearDanglingScratchDirMain.run(); + + // localSessionDir and localPipeOutFileRemove should be removed + // localPipeOutFileNotRemove and localPipeOutFileFailRemove should not be removed + Assert.assertFalse("Local session dir '" + localSessionDir + + "' still exists, should have been removed!", fs.exists(localSessionDir)); + Assert.assertFalse("Local .pipeout file '" + localPipeOutFileRemove + + "' still exists, should have been removed!", fs.exists(localPipeOutFileRemove)); + Assert.assertTrue("Local .pipeout file '" + localPipeOutFileNotRemove + + "' does not exist, should have not been removed!", fs.exists(localPipeOutFileNotRemove)); + Assert.assertTrue("Local .pipeout file '" + localPipeOutFileFailRemove + + "' does not exist, should have not been removed!", fs.exists(localPipeOutFileFailRemove)); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java b/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java index 8d83a00e476f..62105dcec5a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java @@ -17,10 +17,13 @@ */ package org.apache.hadoop.hive.ql.session; +import java.io.File; import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; import java.util.List; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -53,6 +56,9 @@ * lease after 10 min, ie, the HDFS file hold by the dead HiveCli/HiveServer2 is writable * again after 10 min. Once it become writable, cleardanglingscratchDir will be able to * remove it + * 4. Additional functionality; once it is decided which session scratch dirs are residual, + * while removing them from hdfs, we will remove them from local tmp location as well. + * Please see {@link ClearDanglingScratchDir#removeLocalTmpFiles(String, String)}. */ public class ClearDanglingScratchDir implements Runnable { private static final Logger LOG = LoggerFactory.getLogger(ClearDanglingScratchDir.class); @@ -141,25 +147,26 @@ public void run() { // if the file is currently held by a writer if(AlreadyBeingCreatedException.class.getName().equals(eAppend.getClassName())){ inuse = true; - } else if (UnsupportedOperationException.class.getName().equals(eAppend.getClassName())) { - // Append is not supported in the cluster, try to use create - try { - IOUtils.closeStream(fs.create(lockFilePath, false)); - } catch (RemoteException eCreate) { - if (AlreadyBeingCreatedException.class.getName().equals(eCreate.getClassName())){ - // If the file is held by a writer, will throw AlreadyBeingCreatedException - inuse = true; - } else { - consoleMessage("Unexpected error:" + eCreate.getMessage()); - } - } catch (FileAlreadyExistsException eCreateNormal) { - // Otherwise, throw FileAlreadyExistsException, which means the file owner is - // dead - removable = true; - } } else { consoleMessage("Unexpected error:" + eAppend.getMessage()); } + } catch (UnsupportedOperationException eUnsupported) { + // In Hadoop-3, append method is not supported. + // This is an alternative check to make sure whether a file is in use or not. + // Trying to open the file. If it is in use, it will throw IOException. + try { + IOUtils.closeStream(fs.create(lockFilePath, false)); + } catch (RemoteException eCreate) { + if (AlreadyBeingCreatedException.class.getName().equals(eCreate.getClassName())){ + // If the file is held by a writer, will throw AlreadyBeingCreatedException + inuse = true; + } else { + consoleMessage("Unexpected error:" + eCreate.getMessage()); + } + } catch (FileAlreadyExistsException eCreateNormal) { + // Otherwise, throw FileAlreadyExistsException, which means the file owner is dead + removable = true; + } } if (inuse) { // Cannot open the lock file for writing, must be held by a live process @@ -179,6 +186,7 @@ public void run() { return; } consoleMessage("Removing " + scratchDirToRemove.size() + " scratch directories"); + String localTmpDir = HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR); for (Path scratchDir : scratchDirToRemove) { if (dryRun) { System.out.println(scratchDir); @@ -192,6 +200,8 @@ public void run() { consoleMessage(message); } } + // cleaning up on local file system as well + removeLocalTmpFiles(scratchDir.getName(), localTmpDir); } } } catch (IOException e) { @@ -236,4 +246,29 @@ static Options createOptions() { return result; } + + /** + * While deleting dangling scratch dirs from hdfs, we can clean corresponding local files as well + * @param sessionName prefix to determine removable tmp files + * @param localTmpdir local tmp file location + */ + private void removeLocalTmpFiles(String sessionName, String localTmpdir) { + File[] files = new File(localTmpdir).listFiles(fn -> fn.getName().startsWith(sessionName)); + boolean success; + if (files != null) { + for (File file : files) { + success = false; + if (file.canWrite()) { + success = file.delete(); + } + if (success) { + consoleMessage("While removing '" + sessionName + "' dangling scratch dir from HDFS, " + + "local tmp session file '" + file.getPath() + "' has been cleaned as well."); + } else if (file.getName().startsWith(sessionName)) { + consoleMessage("Even though '" + sessionName + "' is marked as dangling session dir, " + + "local tmp session file '" + file.getPath() + "' could not be removed."); + } + } + } + } } \ No newline at end of file