From 0121838c9223829b86014d651412e8240748f804 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Thu, 13 Feb 2025 23:41:05 -0800 Subject: [PATCH 01/19] Refactoring ListPath Call --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 165 +----------------- .../contracts/services/ListResponseData.java | 66 +++++++ .../fs/azurebfs/services/AbfsBlobClient.java | 114 ++++++------ .../fs/azurebfs/services/AbfsClient.java | 82 ++++++++- .../fs/azurebfs/services/AbfsDfsClient.java | 46 ++++- .../azurebfs/services/AbfsHttpOperation.java | 7 +- .../azurebfs/services/AbfsRestOperation.java | 8 + .../services/VersionedFileStatus.java | 127 ++++++++++++++ 8 files changed, 399 insertions(+), 216 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResponseData.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index e42343a6e8380f..c0ce94d6763212 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -77,8 +77,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.enums.Trilean; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper; @@ -114,6 +113,7 @@ import org.apache.hadoop.fs.azurebfs.services.ListingSupport; import org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials; import org.apache.hadoop.fs.azurebfs.services.StaticRetryPolicy; +import org.apache.hadoop.fs.azurebfs.services.VersionedFileStatus; import org.apache.hadoop.fs.azurebfs.utils.Base64; import org.apache.hadoop.fs.azurebfs.utils.CRC64; import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; @@ -1277,57 +1277,14 @@ public String listStatus(final Path path, final String startFrom, do { try (AbfsPerfInfo perfInfo = startTracking("listStatus", "listPath")) { - AbfsRestOperation op = listingClient.listPath(relativePath, false, - abfsConfiguration.getListMaxResults(), continuation, - tracingContext); + ListResponseData listResponseData = listingClient.listPath(relativePath, + false, abfsConfiguration.getListMaxResults(), continuation, + tracingContext, identityTransformer, this.uri); + AbfsRestOperation op = listResponseData.getOp(); perfInfo.registerResult(op.getResult()); - continuation = listingClient.getContinuationFromResponse(op.getResult()); - ListResultSchema retrievedSchema = op.getResult().getListResultSchema(); - if (retrievedSchema == null) { - throw new AbfsRestOperationException( - AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), - AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), - "listStatusAsync path not found", - null, op.getResult()); - } - - long blockSize = abfsConfiguration.getAzureBlockSize(); - - for (ListResultEntrySchema entry : retrievedSchema.paths()) { - final String owner = identityTransformer.transformIdentityForGetRequest(entry.owner(), true, userName); - final String group = identityTransformer.transformIdentityForGetRequest(entry.group(), false, primaryUserGroup); - final String encryptionContext = entry.getXMsEncryptionContext(); - final FsPermission fsPermission = entry.permissions() == null - ? new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL) - : AbfsPermission.valueOf(entry.permissions()); - final boolean hasAcl = AbfsPermission.isExtendedAcl(entry.permissions()); - - long lastModifiedMillis = 0; - long contentLength = entry.contentLength() == null ? 0 : entry.contentLength(); - boolean isDirectory = entry.isDirectory() == null ? false : entry.isDirectory(); - if (entry.lastModified() != null && !entry.lastModified().isEmpty()) { - lastModifiedMillis = DateTimeUtils.parseLastModifiedTime( - entry.lastModified()); - } - - Path entryPath = new Path(File.separator + entry.name()); - entryPath = entryPath.makeQualified(this.uri, entryPath); - - fileStatuses.add( - new VersionedFileStatus( - owner, - group, - fsPermission, - hasAcl, - contentLength, - isDirectory, - 1, - blockSize, - lastModifiedMillis, - entryPath, - entry.eTag(), - encryptionContext)); - } + continuation = listResponseData.getContinuationToken(); + List fileStatusList = listResponseData.getFileStatusList(); + fileStatuses.addAll(fileStatusList); perfInfo.registerSuccess(true); countAggregate++; @@ -1931,110 +1888,6 @@ private AbfsPerfInfo startTracking(String callerName, String calleeName) { return new AbfsPerfInfo(abfsPerfTracker, callerName, calleeName); } - /** - * A File status with version info extracted from the etag value returned - * in a LIST or HEAD request. - * The etag is included in the java serialization. - */ - static final class VersionedFileStatus extends FileStatus - implements EtagSource { - - /** - * The superclass is declared serializable; this subclass can also - * be serialized. - */ - private static final long serialVersionUID = -2009013240419749458L; - - /** - * The etag of an object. - * Not-final so that serialization via reflection will preserve the value. - */ - private String version; - - private String encryptionContext; - - private VersionedFileStatus( - final String owner, final String group, final FsPermission fsPermission, final boolean hasAcl, - final long length, final boolean isdir, final int blockReplication, - final long blocksize, final long modificationTime, final Path path, - final String version, final String encryptionContext) { - super(length, isdir, blockReplication, blocksize, modificationTime, 0, - fsPermission, - owner, - group, - null, - path, - hasAcl, false, false); - - this.version = version; - this.encryptionContext = encryptionContext; - } - - /** Compare if this object is equal to another object. - * @param obj the object to be compared. - * @return true if two file status has the same path name; false if not. - */ - @Override - public boolean equals(Object obj) { - if (!(obj instanceof FileStatus)) { - return false; - } - - FileStatus other = (FileStatus) obj; - - if (!this.getPath().equals(other.getPath())) {// compare the path - return false; - } - - if (other instanceof VersionedFileStatus) { - return this.version.equals(((VersionedFileStatus) other).version); - } - - return true; - } - - /** - * Returns a hash code value for the object, which is defined as - * the hash code of the path name. - * - * @return a hash code value for the path name and version - */ - @Override - public int hashCode() { - int hash = getPath().hashCode(); - hash = 89 * hash + (this.version != null ? this.version.hashCode() : 0); - return hash; - } - - /** - * Returns the version of this FileStatus - * - * @return a string value for the FileStatus version - */ - public String getVersion() { - return this.version; - } - - @Override - public String getEtag() { - return getVersion(); - } - - public String getEncryptionContext() { - return encryptionContext; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder( - "VersionedFileStatus{"); - sb.append(super.toString()); - sb.append("; version='").append(version).append('\''); - sb.append('}'); - return sb.toString(); - } - } - /** * Permissions class contain provided permission and umask in octalNotation. * If the object is created for namespace-disabled account, the permission and diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResponseData.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResponseData.java new file mode 100644 index 00000000000000..7196fc30bcac53 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResponseData.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contracts.services; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; + +public class ListResponseData { + + private List fileStatusList; + private Map renamePendingJsonPaths; + private AbfsRestOperation executedRestOperation; + private String continuationToken; + + public List getFileStatusList() { + return fileStatusList; + } + + public void setFileStatusList(final List fileStatusList) { + this.fileStatusList = fileStatusList; + } + + public Map getRenamePendingJsonPaths() { + return renamePendingJsonPaths; + } + + public void setRenamePendingJsonPaths(final Map renamePendingJsonPaths) { + this.renamePendingJsonPaths = renamePendingJsonPaths; + } + + public AbfsRestOperation getOp() { + return executedRestOperation; + } + + public void setOp(final AbfsRestOperation executedRestOperation) { + this.executedRestOperation = executedRestOperation; + } + + public String getContinuationToken() { + return continuationToken; + } + + public void setContinuationToken(final String continuationToken) { + this.continuationToken = continuationToken; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index b54ce1a4dac7ef..8413d72047d97b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; +import java.net.URI; import java.net.URL; import java.net.URLDecoder; import java.net.URLEncoder; @@ -36,6 +37,7 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; import java.util.List; @@ -50,6 +52,7 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.Path; @@ -60,6 +63,7 @@ import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; @@ -70,11 +74,12 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListXmlParser; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -343,15 +348,16 @@ public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) * @return executed rest operation containing response from server. * @throws AzureBlobFileSystemException if rest operation or response parsing fails. */ - public AbfsRestOperation listPath(final String relativePath, final boolean recursive, - final int listMaxResults, final String continuation, TracingContext tracingContext) - throws IOException { - return listPath(relativePath, recursive, listMaxResults, continuation, tracingContext, true); + public ListResponseData listPath(final String relativePath, final boolean recursive, + final int listMaxResults, final String continuation, TracingContext tracingContext, + IdentityTransformerInterface identityTransformer, URI uri) throws IOException { + return listPath(relativePath, recursive, listMaxResults, continuation, tracingContext, identityTransformer, uri, true); } - public AbfsRestOperation listPath(final String relativePath, final boolean recursive, + public ListResponseData listPath(final String relativePath, final boolean recursive, final int listMaxResults, final String continuation, TracingContext tracingContext, - boolean is404CheckRequired) throws AzureBlobFileSystemException { + IdentityTransformerInterface identityTransformer, URI uri, boolean is404CheckRequired) + throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); @@ -373,16 +379,21 @@ public AbfsRestOperation listPath(final String relativePath, final boolean recur url, requestHeaders); - op.execute(tracingContext); + InputStream listResultInputStream = op.executeAndGetContentInputStream(tracingContext); + ListResponseData listResponseData = parseListPathResults(listResultInputStream, identityTransformer, uri); + listResponseData.setContinuationToken(getContinuationFromResponse(op.getResult())); + // Filter the paths for which no rename redo operation is performed. - fixAtomicEntriesInListResults(op, tracingContext); + retryRenameOnAtomicEntriesInListResults(op, tracingContext, listResponseData.getRenamePendingJsonPaths()); + if (isEmptyListResults(op.getResult()) && is404CheckRequired) { // If the list operation returns no paths, we need to check if the path is a file. // If it is a file, we need to return the file in the list. // If it is a non-existing path, we need to throw a FileNotFoundException. if (relativePath.equals(ROOT_PATH)) { // Root Always exists as directory. It can be a empty listing. - return op; + listResponseData.setOp(op); + return listResponseData; } AbfsRestOperation pathStatus = this.getPathStatus(relativePath, tracingContext, null, false); BlobListResultSchema listResultSchema = getListResultSchemaFromPathStatus(relativePath, pathStatus); @@ -392,9 +403,10 @@ public AbfsRestOperation listPath(final String relativePath, final boolean recur url, requestHeaders); listOp.hardSetGetListStatusResult(HTTP_OK, listResultSchema); - return listOp; + listResponseData.setOp(listOp); + return listResponseData; } - return op; + return listResponseData; } /** @@ -405,8 +417,9 @@ public AbfsRestOperation listPath(final String relativePath, final boolean recur * @param tracingContext tracing context * @throws AzureBlobFileSystemException if rest operation or response parsing fails. */ - private void fixAtomicEntriesInListResults(final AbfsRestOperation op, - final TracingContext tracingContext) throws AzureBlobFileSystemException { + private void retryRenameOnAtomicEntriesInListResults(final AbfsRestOperation op, + final TracingContext tracingContext, + Map renamePendingJsonPaths) throws AzureBlobFileSystemException { /* * Crashed HBase log rename recovery is done by Filesystem.getFileStatus and * Filesystem.listStatus. @@ -417,20 +430,13 @@ private void fixAtomicEntriesInListResults(final AbfsRestOperation op, || op.getResult().getStatusCode() != HTTP_OK) { return; } - BlobListResultSchema listResultSchema - = (BlobListResultSchema) op.getResult().getListResultSchema(); - if (listResultSchema == null) { + if (renamePendingJsonPaths == null || renamePendingJsonPaths.isEmpty()) { return; } - List filteredEntries = new ArrayList<>(); - for (BlobListResultEntrySchema entry : listResultSchema.paths()) { - if (!takeListPathAtomicRenameKeyAction(entry.path(), - entry.contentLength().intValue(), tracingContext)) { - filteredEntries.add(entry); - } - } - listResultSchema.withPaths(filteredEntries); + for (Map.Entry entry : renamePendingJsonPaths.entrySet()) { + retryRenameOnAtomicKeyPath(entry.getKey(), entry.getValue(), tracingContext); + } } /**{@inheritDoc}*/ @@ -1589,10 +1595,11 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) * Parse the XML response body returned by ListBlob API on Blob Endpoint. * @param stream InputStream contains the response from server. * @return BlobListResultSchema containing the list of entries. - * @throws IOException if parsing fails. */ @Override - public ListResultSchema parseListPathResults(final InputStream stream) throws IOException { + public ListResponseData parseListPathResults(final InputStream stream, + IdentityTransformerInterface identityTransformer, URI uri) + throws AzureBlobFileSystemException { if (stream == null) { return null; } @@ -1601,12 +1608,18 @@ public ListResultSchema parseListPathResults(final InputStream stream) throws IO final SAXParser saxParser = saxParserThreadLocal.get(); saxParser.reset(); listResultSchema = new BlobListResultSchema(); - saxParser.parse(stream, new BlobListXmlParser(listResultSchema, getBaseUrl().toString())); + saxParser.parse(stream, + new BlobListXmlParser(listResultSchema, getBaseUrl().toString())); } catch (SAXException | IOException e) { throw new RuntimeException(e); } - return removeDuplicateEntries(listResultSchema); + try { + return listResponseDataParsingXmlListResult(listResultSchema, + identityTransformer, uri); + } catch (IOException e) { + throw new AbfsDriverException(e); + } } /** @@ -1801,23 +1814,16 @@ public void takeGetPathStatusAtomicRenameKeyAction(final Path path, * @param renamePendingJsonLen length of the pendingJson file. * @param tracingContext tracing context. * - * @return true if action is taken. * @throws AzureBlobFileSystemException server error */ - private boolean takeListPathAtomicRenameKeyAction(final Path path, + + private void retryRenameOnAtomicKeyPath(final Path path, final int renamePendingJsonLen, final TracingContext tracingContext) throws AzureBlobFileSystemException { - if (path == null || path.isRoot() || !isAtomicRenameKey( - path.toUri().getPath()) || !path.toUri() - .getPath() - .endsWith(RenameAtomicity.SUFFIX)) { - return false; - } try { - RenameAtomicity renameAtomicity - = getRedoRenameAtomicity(path, renamePendingJsonLen, - tracingContext); + RenameAtomicity renameAtomicity = getRedoRenameAtomicity(path, + renamePendingJsonLen, tracingContext); renameAtomicity.redo(); } catch (AbfsRestOperationException ex) { /* @@ -1833,7 +1839,6 @@ private boolean takeListPathAtomicRenameKeyAction(final Path path, throw ex; } } - return true; } @VisibleForTesting @@ -1910,8 +1915,10 @@ private List getMetadataHeadersList(final Hashtable uniqueEntries = new ArrayList<>(); + private ListResponseData listResponseDataParsingXmlListResult( + BlobListResultSchema listResultSchema, IdentityTransformerInterface identityTransformer, URI uri) throws IOException { + List fileStatuses = new ArrayList<>(); + Map renamePendingJsonPaths = new HashMap<>(); TreeMap nameToEntryMap = new TreeMap<>(); for (BlobListResultEntrySchema entry : listResultSchema.paths()) { @@ -1919,18 +1926,27 @@ private BlobListResultSchema removeDuplicateEntries(BlobListResultSchema listRes // This is a blob entry. It is either a file or a marker blob. // In both cases we will add this. nameToEntryMap.put(entry.name(), entry); + fileStatuses.add(getFileStatusFromEntry(entry, identityTransformer, uri)); + + if (entry.path() != null && !entry.path().isRoot() + && isAtomicRenameKey(entry.path().toUri().getPath()) + && entry.path().toUri().getPath().endsWith(RenameAtomicity.SUFFIX)) { + renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); + } } else { // This is a BlobPrefix entry. It is a directory with file inside // This might have already been added as a marker blob. if (!nameToEntryMap.containsKey(entry.name())) { nameToEntryMap.put(entry.name(), entry); + fileStatuses.add(getFileStatusFromEntry(entry, identityTransformer, uri)); } } } - uniqueEntries.addAll(nameToEntryMap.values()); - listResultSchema.withPaths(uniqueEntries); - return listResultSchema; + ListResponseData listResponseData = new ListResponseData(); + listResponseData.setFileStatusList(fileStatuses); + listResponseData.setRenamePendingJsonPaths(renamePendingJsonPaths); + return listResponseData; } /** @@ -1984,9 +2000,9 @@ private static String decodeMetadataAttribute(String encoded) @VisibleForTesting public boolean isNonEmptyDirectory(String path, TracingContext tracingContext) throws AzureBlobFileSystemException { - AbfsRestOperation listOp = listPath(path, false, 1, null, tracingContext, - false); - return !isEmptyListResults(listOp.getResult()); + ListResponseData listResponseData = listPath(path, false, 1, null, tracingContext, + null, null, false); + return !isEmptyListResults(listResponseData.getOp().getResult()); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index b6aca80768249b..3f759224e64429 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -19,12 +19,14 @@ package org.apache.hadoop.fs.azurebfs.services; import java.io.Closeable; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.InetAddress; import java.net.MalformedURLException; +import java.net.URI; import java.net.URL; import java.net.URLEncoder; import java.net.UnknownHostException; @@ -42,11 +44,13 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import com.jcraft.jsch.IO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -66,18 +70,27 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultEntrySchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformer; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; +import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; import org.apache.hadoop.fs.azurebfs.utils.EncryptionType; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.store.LogExactlyOnce; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import org.apache.hadoop.thirdparty.com.google.common.base.Strings; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.FutureCallback; @@ -495,9 +508,9 @@ public abstract AbfsRestOperation setFilesystemProperties(Hashtable getXMSProperties(AbfsHttpOperation res * @throws UnsupportedEncodingException if decoding fails */ public abstract String decodeAttribute(byte[] value) throws UnsupportedEncodingException; + + private String getPrimaryUserGroup() throws IOException { + String primaryUserGroup; + if (!getAbfsConfiguration().getSkipUserGroupMetadataDuringInitialization()) { + try { + primaryUserGroup = UserGroupInformation.getCurrentUser().getPrimaryGroupName(); + } catch (IOException ex) { + LOG.error("Failed to get primary group for {}, using user name as primary group name", + getPrimaryUser()); + primaryUserGroup = getPrimaryUser(); + } + } else { + //Provide a default group name + primaryUserGroup = getPrimaryUser(); + } + return primaryUserGroup; + } + + private String getPrimaryUser() throws IOException { + return UserGroupInformation.getCurrentUser().getShortUserName(); + } + + protected VersionedFileStatus getFileStatusFromEntry( + ListResultEntrySchema entry, + IdentityTransformerInterface identityTransformer, + URI uri) throws IOException { + final String owner = identityTransformer.transformIdentityForGetRequest( + entry.owner(), true, getPrimaryUser()); + final String group = identityTransformer.transformIdentityForGetRequest( + entry.group(), false, getPrimaryUserGroup()); + final String encryptionContext = entry.getXMsEncryptionContext(); + final FsPermission fsPermission = entry.permissions() == null + ? new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL) + : AbfsPermission.valueOf(entry.permissions()); + final boolean hasAcl = AbfsPermission.isExtendedAcl(entry.permissions()); + + long lastModifiedMillis = 0; + long contentLength = entry.contentLength() == null ? 0 : entry.contentLength(); + boolean isDirectory = entry.isDirectory() == null ? false : entry.isDirectory(); + if (entry.lastModified() != null && !entry.lastModified().isEmpty()) { + lastModifiedMillis = DateTimeUtils.parseLastModifiedTime( + entry.lastModified()); + } + + Path entryPath = new Path(File.separator + entry.name()); + entryPath = entryPath.makeQualified(uri, entryPath); + return new VersionedFileStatus( + owner, + group, + fsPermission, + hasAcl, + contentLength, + isDirectory, + 1, + getAbfsConfiguration().getAzureBlockSize(), + lastModifiedMillis, + entryPath, + entry.eTag(), + encryptionContext); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 8505b533ce4c49..b42e00f80cec3a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -18,11 +18,13 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; +import java.net.URI; import java.net.URL; import java.nio.ByteBuffer; import java.nio.CharBuffer; @@ -30,6 +32,7 @@ import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; +import java.util.ArrayList; import java.util.Hashtable; import java.util.List; import java.util.Map; @@ -40,6 +43,7 @@ import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -55,15 +59,24 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidAbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidFileSystemPropertyException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultSchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.Base64; +import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import static org.apache.commons.lang3.StringUtils.isEmpty; @@ -304,11 +317,12 @@ public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) * @throws AzureBlobFileSystemException if rest operation or response parsing fails. */ @Override - public AbfsRestOperation listPath(final String relativePath, + public ListResponseData listPath(final String relativePath, final boolean recursive, final int listMaxResults, final String continuation, - TracingContext tracingContext) throws IOException { + TracingContext tracingContext, + IdentityTransformerInterface identityTransformer, URI uri) throws IOException { final List requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); @@ -328,8 +342,11 @@ public AbfsRestOperation listPath(final String relativePath, final AbfsRestOperation op = getAbfsRestOperation( AbfsRestOperationType.ListPaths, HTTP_METHOD_GET, url, requestHeaders); - op.execute(tracingContext); - return op; + InputStream listResultInputStream = op.executeAndGetContentInputStream(tracingContext); + ListResponseData listResponseData = parseListPathResults(listResultInputStream, identityTransformer, uri); + listResponseData.setContinuationToken(getContinuationFromResponse(op.getResult())); + listResponseData.setOp(op); + return listResponseData; } /** @@ -1401,7 +1418,8 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) * @throws IOException if parsing fails. */ @Override - public ListResultSchema parseListPathResults(final InputStream stream) throws IOException { + public ListResponseData parseListPathResults(final InputStream stream, + IdentityTransformerInterface identityTransformer, URI uri) throws IOException { DfsListResultSchema listResultSchema; try { final ObjectMapper objectMapper = new ObjectMapper(); @@ -1410,7 +1428,23 @@ public ListResultSchema parseListPathResults(final InputStream stream) throws IO LOG.error("Unable to deserialize list results", ex); throw ex; } - return listResultSchema; + + if (listResultSchema == null) { + throw new AbfsRestOperationException( + AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), + AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), + "listStatusAsync path not found", + null); + } + + List fileStatuses = new ArrayList<>(); + for (DfsListResultEntrySchema entry : listResultSchema.paths()) { + fileStatuses.add(getFileStatusFromEntry(entry, identityTransformer, uri)); + } + ListResponseData listResponseData = new ListResponseData(); + listResponseData.setFileStatusList(fileStatuses); + listResponseData.setRenamePendingJsonPaths(null); + return listResponseData; } @Override diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 017a4f4180750c..600e75f8017824 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -75,6 +75,7 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable { private String requestId = ""; private String expectedAppendPos = ""; private ListResultSchema listResultSchema = null; + private InputStream listResultStream = null; private List blockIdList = null; // metrics @@ -395,7 +396,7 @@ final void parseResponse(final byte[] buffer, if (url.toString().contains(QUERY_PARAM_COMP + EQUAL + BLOCKLIST)) { parseBlockListResponse(stream); } else { - parseListFilesResponse(stream); + listResultStream = stream; } } else { if (buffer != null) { @@ -580,6 +581,10 @@ public final long getRecvLatency() { return recvResponseTimeMs; } + public final InputStream getListResultStream() { + return listResultStream; + } + /** * Set response status code for the server call. * diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index c019fcbc3d3a76..2ad7290a8dd3dd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.azurebfs.services; import java.io.IOException; +import java.io.InputStream; import java.io.UncheckedIOException; import java.net.HttpURLConnection; import java.net.URL; @@ -26,6 +27,7 @@ import java.time.Duration; import java.util.List; +import com.jcraft.jsch.IO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -268,6 +270,12 @@ String getSasToken() { this.bufferLength = bufferLength; } + public InputStream executeAndGetContentInputStream(TracingContext tracingContext) + throws AzureBlobFileSystemException { + execute(tracingContext); + return result.getListResultStream(); + } + /** * Execute a AbfsRestOperation. Track the Duration of a request if * abfsCounters isn't null. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java new file mode 100644 index 00000000000000..476ca5da3f7c6b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.fs.EtagSource; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; + +/** + * A File status with version info extracted from the etag value returned + * in a LIST or HEAD request. + * The etag is included in the java serialization. + */ +public class VersionedFileStatus extends FileStatus implements EtagSource { + + /** + * The superclass is declared serializable; this subclass can also + * be serialized. + */ + private static final long serialVersionUID = -2009013240419749458L; + + /** + * The etag of an object. + * Not-final so that serialization via reflection will preserve the value. + */ + private String version; + + private String encryptionContext; + + public VersionedFileStatus( + final String owner, final String group, final FsPermission fsPermission, final boolean hasAcl, + final long length, final boolean isdir, final int blockReplication, + final long blocksize, final long modificationTime, final Path path, + final String version, final String encryptionContext) { + super(length, isdir, blockReplication, blocksize, modificationTime, 0, + fsPermission, + owner, + group, + null, + path, + hasAcl, false, false); + + this.version = version; + this.encryptionContext = encryptionContext; + } + + /** Compare if this object is equal to another object. + * @param obj the object to be compared. + * @return true if two file status has the same path name; false if not. + */ + @Override + public boolean equals(Object obj) { + if (!(obj instanceof FileStatus)) { + return false; + } + + FileStatus other = (FileStatus) obj; + + if (!this.getPath().equals(other.getPath())) {// compare the path + return false; + } + + if (other instanceof VersionedFileStatus) { + return this.version.equals(((VersionedFileStatus) other).version); + } + + return true; + } + + /** + * Returns a hash code value for the object, which is defined as + * the hash code of the path name. + * + * @return a hash code value for the path name and version + */ + @Override + public int hashCode() { + int hash = getPath().hashCode(); + hash = 89 * hash + (this.version != null ? this.version.hashCode() : 0); + return hash; + } + + /** + * Returns the version of this FileStatus + * + * @return a string value for the FileStatus version + */ + public String getVersion() { + return this.version; + } + + @Override + public String getEtag() { + return getVersion(); + } + + public String getEncryptionContext() { + return encryptionContext; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "VersionedFileStatus{"); + sb.append(super.toString()); + sb.append("; version='").append(version).append('\''); + sb.append('}'); + return sb.toString(); + } +} \ No newline at end of file From 4eb7c8ab99d33d5978d9b59d9227a3cefdfc5fe9 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Fri, 14 Feb 2025 01:15:49 -0800 Subject: [PATCH 02/19] Test Changes' --- .../fs/azurebfs/services/AbfsClient.java | 20 +++++++++++++------ .../azurebfs/services/AbfsHttpOperation.java | 12 +++++------ .../fs/azurebfs/services/ListActionTaker.java | 2 +- .../hadoop/fs/azurebfs/ITestAbfsClient.java | 8 ++++---- .../azurebfs/ITestAbfsCustomEncryption.java | 7 ++++--- .../ITestAzureBlobFileSystemCreate.java | 5 +++-- .../ITestAzureBlobFileSystemDelete.java | 15 ++++++++------ .../ITestAzureBlobFileSystemFileStatus.java | 2 +- .../ITestAzureBlobFileSystemListStatus.java | 4 ++-- .../ITestAzureBlobFileSystemRename.java | 16 +++++++++------ .../services/TestListActionTaker.java | 5 ++++- .../azurebfs/utils/DirectoryStateHelper.java | 2 +- 12 files changed, 59 insertions(+), 39 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 3f759224e64429..cd23081a2a5e2a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1773,10 +1773,16 @@ protected VersionedFileStatus getFileStatusFromEntry( ListResultEntrySchema entry, IdentityTransformerInterface identityTransformer, URI uri) throws IOException { - final String owner = identityTransformer.transformIdentityForGetRequest( - entry.owner(), true, getPrimaryUser()); - final String group = identityTransformer.transformIdentityForGetRequest( - entry.group(), false, getPrimaryUserGroup()); + final String owner, group; + if (identityTransformer != null) { + owner = identityTransformer.transformIdentityForGetRequest( + entry.owner(), true, getPrimaryUser()); + group = identityTransformer.transformIdentityForGetRequest( + entry.group(), false, getPrimaryUserGroup()); + } else { + owner = null; + group = null; + } final String encryptionContext = entry.getXMsEncryptionContext(); final FsPermission fsPermission = entry.permissions() == null ? new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL) @@ -1785,14 +1791,16 @@ protected VersionedFileStatus getFileStatusFromEntry( long lastModifiedMillis = 0; long contentLength = entry.contentLength() == null ? 0 : entry.contentLength(); - boolean isDirectory = entry.isDirectory() == null ? false : entry.isDirectory(); + boolean isDirectory = entry.isDirectory() != null && entry.isDirectory(); if (entry.lastModified() != null && !entry.lastModified().isEmpty()) { lastModifiedMillis = DateTimeUtils.parseLastModifiedTime( entry.lastModified()); } Path entryPath = new Path(File.separator + entry.name()); - entryPath = entryPath.makeQualified(uri, entryPath); + if (uri != null) { + entryPath = new Path(uri.getPath(), entryPath); + } return new VersionedFileStatus( owner, group, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 600e75f8017824..44abfcf4345228 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -486,12 +486,12 @@ private void processStorageErrorResponse() { * @param stream InputStream contains the list results. * @throws IOException if the response cannot be deserialized. */ - private void parseListFilesResponse(final InputStream stream) throws IOException { - if (stream == null || listResultSchema != null) { - return; - } - listResultSchema = client.parseListPathResults(stream); - } +// private void parseListFilesResponse(final InputStream stream) throws IOException { +// if (stream == null || listResultSchema != null) { +// return; +// } +// listResultSchema = client.parseListPathResults(stream); +// } private void parseBlockListResponse(final InputStream stream) throws IOException { if (stream == null || blockIdList != null) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java index b87f16ae46107c..91676ab28e99bd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java @@ -231,7 +231,7 @@ protected String listAndEnqueue(final ListBlobQueue listBlobQueue, op = getAbfsClient().listPath(path.toUri().getPath(), true, queueAvailableSizeForProduction, continuationToken, - tracingContext); + tracingContext, null, null).getOp(); } catch (AzureBlobFileSystemException ex) { throw ex; } catch (IOException ex) { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java index baa57da2881605..eb6dbc269fc370 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java @@ -65,7 +65,7 @@ public void testContinuationTokenHavingEqualSign() throws Exception { try { AbfsRestOperation op = abfsClient .listPath("/", true, LIST_MAX_RESULTS, "===========", - getTestTracingContext(fs, true)); + getTestTracingContext(fs, true), null, null).getOp(); Assert.assertTrue(false); } catch (AbfsRestOperationException ex) { Assert.assertEquals("InvalidQueryParameterValue", ex.getErrorCode().getErrorCode()); @@ -106,7 +106,7 @@ public void testListPathWithValidListMaxResultsValues() AbfsRestOperation op = getFileSystem().getAbfsClient().listPath( directory.toString(), false, getListMaxResults(), null, - getTestTracingContext(getFileSystem(), true)); + getTestTracingContext(getFileSystem(), true), null, null).getOp(); List list = op.getResult().getListResultSchema().paths(); String continuationToken = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); @@ -141,7 +141,7 @@ public void testListPathWithValueGreaterThanServerMaximum() AbfsRestOperation op = getFileSystem().getAbfsClient().listPath( directory.toString(), false, getListMaxResults(), null, - getTestTracingContext(getFileSystem(), true)); + getTestTracingContext(getFileSystem(), true), null, null).getOp(); List list = op.getResult().getListResultSchema().paths(); String continuationToken = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); @@ -179,7 +179,7 @@ private List listPath(String directory) throws IOException { return getFileSystem().getAbfsClient() .listPath(directory, false, getListMaxResults(), null, - getTestTracingContext(getFileSystem(), true)).getResult() + getTestTracingContext(getFileSystem(), true), null, null).getOp().getResult() .getListResultSchema().paths(); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java index ee88ebccf6d0f8..880413d6cbd14b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java @@ -54,6 +54,7 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.VersionedFileStatus; import org.apache.hadoop.fs.azurebfs.utils.EncryptionType; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.impl.OpenFileParameters; @@ -300,10 +301,10 @@ private AbfsRestOperation callOperation(AzureBlobFileSystem fs, */ FileStatus status = fs.listStatus(testPath)[0]; Assertions.assertThat(status) - .isInstanceOf(AzureBlobFileSystemStore.VersionedFileStatus.class); + .isInstanceOf(VersionedFileStatus.class); Assertions.assertThat( - ((AzureBlobFileSystemStore.VersionedFileStatus) status).getEncryptionContext()) + ((VersionedFileStatus) status).getEncryptionContext()) .isNotNull(); try (FSDataInputStream in = fs.openFileWithOptions(testPath, @@ -342,7 +343,7 @@ true, new BlobAppendRequestParameters(BLOCK_ID, null)), getTestTracingContext(fs, false)); case LISTSTATUS: return client.listPath(path, false, 5, null, - getTestTracingContext(fs, true)); + getTestTracingContext(fs, true), null, null).getOp(); case RENAME: TracingContext tc = getTestTracingContext(fs, true); return client.renamePath(path, new Path(path + "_2").toString(), diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index b3314894d3f995..cde55e645a6495 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -61,6 +61,7 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.services.RenameAtomicity; +import org.apache.hadoop.fs.azurebfs.services.VersionedFileStatus; import org.apache.hadoop.fs.azurebfs.utils.DirectoryStateHelper; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; @@ -239,8 +240,8 @@ private AzureBlobFileSystem createJsonFile(Path path, Path renameJson) throws IO doReturn(client).when(store).getClient(); fs.setWorkingDirectory(new Path(ROOT_PATH)); fs.mkdirs(new Path(path, "test3")); - AzureBlobFileSystemStore.VersionedFileStatus fileStatus - = (AzureBlobFileSystemStore.VersionedFileStatus) fs.getFileStatus(path); + VersionedFileStatus fileStatus + = (VersionedFileStatus) fs.getFileStatus(path); new RenameAtomicity(path, new Path("/hbase/test4"), renameJson, getTestTracingContext(fs, true), fileStatus.getEtag(), diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java index 60bc39979d9d64..a1639aa40c0fe5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java @@ -20,6 +20,7 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.net.URI; import java.nio.file.AccessDeniedException; import java.util.ArrayList; import java.util.List; @@ -40,6 +41,7 @@ import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; @@ -293,12 +295,13 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception { doCallRealMethod().when(mockClient) .listPath(Mockito.nullable(String.class), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), - Mockito.nullable(TracingContext.class)); + Mockito.nullable(TracingContext.class), Mockito.nullable( + IdentityTransformerInterface.class), Mockito.nullable(URI.class)); doCallRealMethod().when((AbfsBlobClient) mockClient) .listPath(Mockito.nullable(String.class), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), Mockito.nullable(TracingContext.class), - Mockito.anyBoolean()); + Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class), Mockito.anyBoolean()); doCallRealMethod().when((AbfsBlobClient) mockClient) .getPathStatus(Mockito.nullable(String.class), Mockito.nullable(TracingContext.class), Mockito.nullable(ContextEncryptionAdapter.class), Mockito.anyBoolean()); @@ -391,12 +394,12 @@ public void testDeleteImplicitDirWithSingleListResults() throws Exception { boolean recursive = answer.getArgument(1); String continuation = answer.getArgument(3); TracingContext context = answer.getArgument(4); - return client.listPath(path, recursive, 1, continuation, context); + return client.listPath(path, recursive, 1, continuation, context, null, null); }) .when(spiedClient) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), - Mockito.any(TracingContext.class)); + Mockito.any(TracingContext.class), Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class)); client.deleteBlobPath(new Path("/testDir/dir1"), null, getTestTracingContext(fs, true)); fs.delete(new Path("/testDir/dir1"), true); @@ -543,14 +546,14 @@ public void testProducerStopOnDeleteFailure() throws Exception { }) .when(spiedClient) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), - Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class)); intercept(AccessDeniedException.class, () -> { fs.delete(new Path("/src"), true); }); Mockito.verify(spiedClient, Mockito.times(1)) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), - Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class)); } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java index 87b1ea4321d416..190dcea7c8bc3c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java @@ -255,7 +255,7 @@ public void testListStatusIsCalledForImplicitPathOnBlobEndpoint() throws Excepti fs.getFileStatus(implicitPath); Mockito.verify(abfsClient, Mockito.times(1)).getPathStatus(any(), eq(false), any(), any()); - Mockito.verify(abfsClient, Mockito.times(1)).listPath(any(), eq(false), eq(1), any(), any(), eq(false)); + Mockito.verify(abfsClient, Mockito.times(1)).listPath(any(), eq(false), eq(1), any(), any(), any(), any(), eq(false)); } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index e563e082b80cce..ddd940fb3eaf62 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -176,12 +176,12 @@ public void testListPathTracingContext() throws Exception { Mockito.verify(spiedClient, times(1)).listPath( "/", false, spiedFs.getAbfsStore().getAbfsConfiguration().getListMaxResults(), - null, spiedTracingContext); + null, spiedTracingContext, null, null); // 2. With continuation token Mockito.verify(spiedClient, times(1)).listPath( "/", false, spiedFs.getAbfsStore().getAbfsConfiguration().getListMaxResults(), - TEST_CONTINUATION_TOKEN, spiedTracingContext); + TEST_CONTINUATION_TOKEN, spiedTracingContext, null, null); // Assert that none of the API calls used the same tracing header. Mockito.verify(spiedTracingContext, times(0)).constructHeader(any(), any(), any()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index abd45eae0e0873..6e01eb95e6c3bd 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.net.URI; import java.nio.charset.StandardCharsets; import java.nio.file.AccessDeniedException; import java.util.ArrayList; @@ -45,6 +46,7 @@ import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; @@ -56,6 +58,7 @@ import org.apache.hadoop.fs.azurebfs.services.BlobRenameHandler; import org.apache.hadoop.fs.azurebfs.services.RenameAtomicity; import org.apache.hadoop.fs.azurebfs.services.RenameAtomicityTestUtils; +import org.apache.hadoop.fs.azurebfs.services.VersionedFileStatus; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.statistics.IOStatisticAssertions; @@ -779,8 +782,8 @@ public void testRenameCompleteBeforeRenameAtomicityRedo() throws Exception { /* * Create renameJson file. */ - AzureBlobFileSystemStore.VersionedFileStatus fileStatus - = (AzureBlobFileSystemStore.VersionedFileStatus) fs.getFileStatus(path); + VersionedFileStatus fileStatus + = (VersionedFileStatus) fs.getFileStatus(path); int jsonLen = new RenameAtomicity(path, new Path("/hbase/test4"), renameJson, getTestTracingContext(fs, true), fileStatus.getEtag(), @@ -1343,12 +1346,13 @@ public void testBlobRenameWithListGivingPaginatedResultWithOneObjectPerList() String continuation = answer.getArgument(3); TracingContext context = answer.getArgument(4); return getFileSystem().getAbfsClient() - .listPath(path, recursive, 1, continuation, context); + .listPath(path, recursive, 1, continuation, context, null, null); }) .when(spiedClient) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), - Mockito.any(TracingContext.class)); + Mockito.any(TracingContext.class), Mockito.nullable( + IdentityTransformerInterface.class), Mockito.nullable(URI.class)); fs.rename(new Path("/testDir/dir1"), new Path("/testDir/dir2")); for (int i = 0; i < 10; i++) { Assertions.assertThat(fs.exists(new Path("/testDir/dir2/file" + i))) @@ -1426,13 +1430,13 @@ public void testProducerStopOnRenameFailure() throws Exception { listCallInvocation[0]++; return getFileSystem().getAbfsClient().listPath(answer.getArgument(0), answer.getArgument(1), 1, - answer.getArgument(3), answer.getArgument(4)); + answer.getArgument(3), answer.getArgument(4), answer.getArgument(5), answer.getArgument(6)); } return answer.callRealMethod(); }) .when(spiedClient) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), - Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class)); intercept(AccessDeniedException.class, () -> { fs.rename(new Path("/src"), new Path("/dst")); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java index f9c774b7c32b03..4b4a1502328ef5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.azurebfs.services; import java.io.IOException; +import java.net.URI; import java.util.List; import org.assertj.core.api.Assertions; @@ -32,6 +33,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_AZURE_LIST_MAX_RESULTS; @@ -135,7 +137,8 @@ protected void addPaths(final List paths, return op; }).when(client) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), - Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable( + IdentityTransformerInterface.class), Mockito.nullable(URI.class)); listActionTaker.listRecursiveAndTakeAction(); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DirectoryStateHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DirectoryStateHelper.java index 3becb274a48ba0..530323b76df60f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DirectoryStateHelper.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DirectoryStateHelper.java @@ -73,7 +73,7 @@ public static boolean isImplicitDirectory(Path path, AzureBlobFileSystem fs, // 2nd condition: listPaths should return some entries. AbfsRestOperation op = client.listPath( - relativePath, false, 1, null, testTracingContext, false); + relativePath, false, 1, null, testTracingContext, null, null, false).getOp(); if (op != null && op.getResult() != null) { int listSize = op.getResult().getListResultSchema().paths().size(); if (listSize > 0) { From 5a4576ebe50a56869e59648c189b7bc128848699 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Wed, 19 Feb 2025 22:01:52 -0800 Subject: [PATCH 03/19] More refraction --- .../fs/azurebfs/services/AbfsBlobClient.java | 28 ++++++------------- .../fs/azurebfs/services/AbfsClient.java | 13 ++------- .../fs/azurebfs/services/AbfsDfsClient.java | 13 +++++---- .../azurebfs/services/AbfsHttpOperation.java | 9 ++++-- .../fs/azurebfs/services/ListActionTaker.java | 2 ++ .../ITestAzureBlobFileSystemListStatus.java | 4 +-- 6 files changed, 29 insertions(+), 40 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 8413d72047d97b..36de29f8684815 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -378,10 +378,9 @@ public ListResponseData listPath(final String relativePath, final boolean recurs HTTP_METHOD_GET, url, requestHeaders); - - InputStream listResultInputStream = op.executeAndGetContentInputStream(tracingContext); - ListResponseData listResponseData = parseListPathResults(listResultInputStream, identityTransformer, uri); - listResponseData.setContinuationToken(getContinuationFromResponse(op.getResult())); + op.execute(tracingContext); + ListResponseData listResponseData = parseListPathResults(op.getResult(), identityTransformer, uri); + listResponseData.setOp(op); // Filter the paths for which no rename redo operation is performed. retryRenameOnAtomicEntriesInListResults(op, tracingContext, listResponseData.getRenamePendingJsonPaths()); @@ -1551,18 +1550,6 @@ public boolean checkUserError(int responseStatusCode) { && responseStatusCode != HTTP_CONFLICT); } - /** - * Get the continuation token from the response from BLOB Endpoint Listing. - * Continuation Token will be present in XML List response body. - * @param result The response from the server. - * @return The continuation token. - */ - @Override - public String getContinuationFromResponse(AbfsHttpOperation result) { - BlobListResultSchema listResultSchema = (BlobListResultSchema) result.getListResultSchema(); - return listResultSchema.getNextMarker(); - } - /** * Get the User-defined metadata on a path from response headers of * GetBlobProperties API on Blob Endpoint. @@ -1593,13 +1580,14 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) /** * Parse the XML response body returned by ListBlob API on Blob Endpoint. - * @param stream InputStream contains the response from server. + * @param result InputStream contains the response from server. * @return BlobListResultSchema containing the list of entries. */ @Override - public ListResponseData parseListPathResults(final InputStream stream, + public ListResponseData parseListPathResults(AbfsHttpOperation result, IdentityTransformerInterface identityTransformer, URI uri) throws AzureBlobFileSystemException { + InputStream stream = result.getListResultStream(); if (stream == null) { return null; } @@ -1610,6 +1598,7 @@ public ListResponseData parseListPathResults(final InputStream stream, listResultSchema = new BlobListResultSchema(); saxParser.parse(stream, new BlobListXmlParser(listResultSchema, getBaseUrl().toString())); + result.setListResultSchema(listResultSchema); } catch (SAXException | IOException e) { throw new RuntimeException(e); } @@ -1946,6 +1935,7 @@ && isAtomicRenameKey(entry.path().toUri().getPath()) ListResponseData listResponseData = new ListResponseData(); listResponseData.setFileStatusList(fileStatuses); listResponseData.setRenamePendingJsonPaths(renamePendingJsonPaths); + listResponseData.setContinuationToken(listResultSchema.getNextMarker()); return listResponseData; } @@ -2019,8 +2009,6 @@ private boolean isEmptyListResults(AbfsHttpOperation result) { if (isEmptyList) { LOG.debug("List call returned empty results without any continuation token."); return true; - } else if (result != null && !(result.getListResultSchema() instanceof BlobListResultSchema)) { - throw new RuntimeException("List call returned unexpected results over Blob Endpoint."); } return false; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index cd23081a2a5e2a..b3c3d7d83ca8b3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1691,11 +1691,11 @@ protected boolean isRenameResilience() { /** * Parses response of Listing API from server based on Endpoint used. - * @param stream InputStream of the response + * @param result InputStream of the response * @return ListResultSchema * @throws IOException if parsing fails */ - public abstract ListResponseData parseListPathResults(InputStream stream, + public abstract ListResponseData parseListPathResults(AbfsHttpOperation result, IdentityTransformerInterface identityTransformer, URI uri) throws IOException; /** @@ -1714,13 +1714,6 @@ public abstract ListResponseData parseListPathResults(InputStream stream, */ public abstract StorageErrorResponseSchema processStorageErrorResponse(InputStream stream) throws IOException; - /** - * Returns continuation token from server response based on Endpoint used. - * @param result response from server - * @return continuation token - */ - public abstract String getContinuationFromResponse(AbfsHttpOperation result); - /** * Returns user-defined metadata from server response based on Endpoint used. * @param result response from server @@ -1799,7 +1792,7 @@ protected VersionedFileStatus getFileStatusFromEntry( Path entryPath = new Path(File.separator + entry.name()); if (uri != null) { - entryPath = new Path(uri.getPath(), entryPath); + entryPath = entryPath.makeQualified(uri, entryPath); } return new VersionedFileStatus( owner, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index b42e00f80cec3a..cf37ab2380be38 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -342,8 +342,8 @@ public ListResponseData listPath(final String relativePath, final AbfsRestOperation op = getAbfsRestOperation( AbfsRestOperationType.ListPaths, HTTP_METHOD_GET, url, requestHeaders); - InputStream listResultInputStream = op.executeAndGetContentInputStream(tracingContext); - ListResponseData listResponseData = parseListPathResults(listResultInputStream, identityTransformer, uri); + op.execute(tracingContext); + ListResponseData listResponseData = parseListPathResults(op.getResult(), identityTransformer, uri); listResponseData.setContinuationToken(getContinuationFromResponse(op.getResult())); listResponseData.setOp(op); return listResponseData; @@ -1394,7 +1394,6 @@ public boolean checkUserError(int responseStatusCode) { * @param result The response from the server. * @return The continuation token. */ - @Override public String getContinuationFromResponse(AbfsHttpOperation result) { return result.getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); } @@ -1414,16 +1413,18 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) /** * Parse the list file response from DFS ListPath API in Json format - * @param stream InputStream contains the list results. + * @param result InputStream contains the list results. * @throws IOException if parsing fails. */ @Override - public ListResponseData parseListPathResults(final InputStream stream, + public ListResponseData parseListPathResults(AbfsHttpOperation result, IdentityTransformerInterface identityTransformer, URI uri) throws IOException { + InputStream listResultInputStream = result.getListResultStream(); DfsListResultSchema listResultSchema; try { final ObjectMapper objectMapper = new ObjectMapper(); - listResultSchema = objectMapper.readValue(stream, DfsListResultSchema.class); + listResultSchema = objectMapper.readValue(listResultInputStream, DfsListResultSchema.class); + result.setListResultSchema(listResultSchema); } catch (IOException ex) { LOG.error("Unable to deserialize list results", ex); throw ex; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 44abfcf4345228..4d4f3a5c6ca3b5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -384,7 +384,8 @@ final void parseResponse(final byte[] buffer, // consume the input stream to release resources int totalBytesRead = 0; - try (InputStream stream = getContentInputStream()) { + try { + InputStream stream = getContentInputStream(); if (isNullInputStream(stream)) { return; } @@ -396,7 +397,7 @@ final void parseResponse(final byte[] buffer, if (url.toString().contains(QUERY_PARAM_COMP + EQUAL + BLOCKLIST)) { parseBlockListResponse(stream); } else { - listResultStream = stream; + listResultStream = getContentInputStream(); } } else { if (buffer != null) { @@ -673,6 +674,10 @@ protected boolean isConnectionDisconnectedOnError() { return connectionDisconnectedOnError; } + protected void setListResultSchema(final ListResultSchema listResultSchema) { + this.listResultSchema = listResultSchema; + } + public static class AbfsHttpOperationWithFixedResult extends AbfsHttpOperation { /** * Creates an instance to represent fixed results. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java index 91676ab28e99bd..cfd9d41e821beb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java @@ -31,12 +31,14 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index ddd940fb3eaf62..88d14b24073f58 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -71,7 +71,7 @@ */ public class ITestAzureBlobFileSystemListStatus extends AbstractAbfsIntegrationTest { - private static final int TEST_FILES_NUMBER = 6000; + private static final int TEST_FILES_NUMBER = 6; private static final String TEST_CONTINUATION_TOKEN = "continuation"; public ITestAzureBlobFileSystemListStatus() throws Exception { @@ -81,7 +81,7 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); - config.set(AZURE_LIST_MAX_RESULTS, "5000"); + config.set(AZURE_LIST_MAX_RESULTS, "5"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); From a12335c71f9fc4dffe52f3a92b9d583a1e9290c4 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Wed, 19 Feb 2025 22:49:04 -0800 Subject: [PATCH 04/19] Removing Unnecessary code changes --- .../fs/azurebfs/services/AbfsBlobClient.java | 29 +++++++++++++--- .../fs/azurebfs/services/AbfsClient.java | 34 ++++++++++++------- .../azurebfs/services/AbfsHttpOperation.java | 13 ------- .../azurebfs/services/AbfsRestOperation.java | 8 ----- .../fs/azurebfs/services/ListActionTaker.java | 2 -- .../services/VersionedFileStatus.java | 2 +- 6 files changed, 46 insertions(+), 42 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 36de29f8684815..246494d3b336ad 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -75,6 +75,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListXmlParser; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; @@ -348,6 +349,7 @@ public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) * @return executed rest operation containing response from server. * @throws AzureBlobFileSystemException if rest operation or response parsing fails. */ + @Override public ListResponseData listPath(final String relativePath, final boolean recursive, final int listMaxResults, final String continuation, TracingContext tracingContext, IdentityTransformerInterface identityTransformer, URI uri) throws IOException { @@ -378,6 +380,7 @@ public ListResponseData listPath(final String relativePath, final boolean recurs HTTP_METHOD_GET, url, requestHeaders); + op.execute(tracingContext); ListResponseData listResponseData = parseListPathResults(op.getResult(), identityTransformer, uri); listResponseData.setOp(op); @@ -390,18 +393,28 @@ public ListResponseData listPath(final String relativePath, final boolean recurs // If it is a file, we need to return the file in the list. // If it is a non-existing path, we need to throw a FileNotFoundException. if (relativePath.equals(ROOT_PATH)) { - // Root Always exists as directory. It can be a empty listing. - listResponseData.setOp(op); + // Root Always exists as directory. It can be an empty listing. return listResponseData; } AbfsRestOperation pathStatus = this.getPathStatus(relativePath, tracingContext, null, false); BlobListResultSchema listResultSchema = getListResultSchemaFromPathStatus(relativePath, pathStatus); + List fileStatusList = new ArrayList<>(); + Map renamePendingJsonPaths = new HashMap<>(); + for (BlobListResultEntrySchema entry : listResultSchema.paths()) { + fileStatusList.add(getFileStatusFromEntry(entry, identityTransformer, uri)); + if (isRenamePendingJsonPathEntry(entry)) { + renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); + } + } AbfsRestOperation listOp = getAbfsRestOperation( AbfsRestOperationType.ListBlobs, HTTP_METHOD_GET, url, requestHeaders); listOp.hardSetGetListStatusResult(HTTP_OK, listResultSchema); + listResponseData.setFileStatusList(fileStatusList); + listResponseData.setContinuationToken(null); + listResponseData.setRenamePendingJsonPaths(renamePendingJsonPaths); listResponseData.setOp(listOp); return listResponseData; } @@ -1917,9 +1930,7 @@ private ListResponseData listResponseDataParsingXmlListResult( nameToEntryMap.put(entry.name(), entry); fileStatuses.add(getFileStatusFromEntry(entry, identityTransformer, uri)); - if (entry.path() != null && !entry.path().isRoot() - && isAtomicRenameKey(entry.path().toUri().getPath()) - && entry.path().toUri().getPath().endsWith(RenameAtomicity.SUFFIX)) { + if (isRenamePendingJsonPathEntry(entry)) { renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); } } else { @@ -1939,6 +1950,12 @@ && isAtomicRenameKey(entry.path().toUri().getPath()) return listResponseData; } + private boolean isRenamePendingJsonPathEntry(BlobListResultEntrySchema entry) { + return entry.path() != null && !entry.path().isRoot() + && isAtomicRenameKey(entry.path().toUri().getPath()) + && entry.path().toUri().getPath().endsWith(RenameAtomicity.SUFFIX); + } + /** * When listing is done on a file, Blob Endpoint returns the empty listing * but DFS Endpoint returns the file status as one of the entries. @@ -1990,6 +2007,8 @@ private static String decodeMetadataAttribute(String encoded) @VisibleForTesting public boolean isNonEmptyDirectory(String path, TracingContext tracingContext) throws AzureBlobFileSystemException { + // This method is only called internally to determine state of a path + // and hence don't need identity transformation to happen. ListResponseData listResponseData = listPath(path, false, 1, null, tracingContext, null, null, false); return !isEmptyListResults(listResponseData.getOp().getResult()); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index b3c3d7d83ca8b3..b4e475c19e3677 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1691,7 +1691,7 @@ protected boolean isRenameResilience() { /** * Parses response of Listing API from server based on Endpoint used. - * @param result InputStream of the response + * @param result AbfsHttpOperation of list Operation * @return ListResultSchema * @throws IOException if parsing fails */ @@ -1741,7 +1741,7 @@ public abstract Hashtable getXMSProperties(AbfsHttpOperation res */ public abstract String decodeAttribute(byte[] value) throws UnsupportedEncodingException; - private String getPrimaryUserGroup() throws IOException { + private String getPrimaryUserGroup() throws AzureBlobFileSystemException { String primaryUserGroup; if (!getAbfsConfiguration().getSkipUserGroupMetadataDuringInitialization()) { try { @@ -1758,23 +1758,31 @@ private String getPrimaryUserGroup() throws IOException { return primaryUserGroup; } - private String getPrimaryUser() throws IOException { - return UserGroupInformation.getCurrentUser().getShortUserName(); + private String getPrimaryUser() throws AzureBlobFileSystemException { + try { + return UserGroupInformation.getCurrentUser().getUserName(); + } catch (IOException ex) { + throw new AbfsDriverException(ex); + } } protected VersionedFileStatus getFileStatusFromEntry( ListResultEntrySchema entry, IdentityTransformerInterface identityTransformer, - URI uri) throws IOException { + URI uri) throws AzureBlobFileSystemException { final String owner, group; - if (identityTransformer != null) { - owner = identityTransformer.transformIdentityForGetRequest( - entry.owner(), true, getPrimaryUser()); - group = identityTransformer.transformIdentityForGetRequest( - entry.group(), false, getPrimaryUserGroup()); - } else { - owner = null; - group = null; + try{ + if (identityTransformer != null) { + owner = identityTransformer.transformIdentityForGetRequest( + entry.owner(), true, getPrimaryUser()); + group = identityTransformer.transformIdentityForGetRequest( + entry.group(), false, getPrimaryUserGroup()); + } else { + owner = null; + group = null; + } + } catch (IOException ex) { + throw new AbfsDriverException(ex); } final String encryptionContext = entry.getXMsEncryptionContext(); final FsPermission fsPermission = entry.permissions() == null diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 4d4f3a5c6ca3b5..e47aec5b93b262 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -481,19 +481,6 @@ private void processStorageErrorResponse() { */ protected abstract InputStream getErrorStream() throws IOException; - /** - * Parse the list file response - * - * @param stream InputStream contains the list results. - * @throws IOException if the response cannot be deserialized. - */ -// private void parseListFilesResponse(final InputStream stream) throws IOException { -// if (stream == null || listResultSchema != null) { -// return; -// } -// listResultSchema = client.parseListPathResults(stream); -// } - private void parseBlockListResponse(final InputStream stream) throws IOException { if (stream == null || blockIdList != null) { return; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index 2ad7290a8dd3dd..c019fcbc3d3a76 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -19,7 +19,6 @@ package org.apache.hadoop.fs.azurebfs.services; import java.io.IOException; -import java.io.InputStream; import java.io.UncheckedIOException; import java.net.HttpURLConnection; import java.net.URL; @@ -27,7 +26,6 @@ import java.time.Duration; import java.util.List; -import com.jcraft.jsch.IO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -270,12 +268,6 @@ String getSasToken() { this.bufferLength = bufferLength; } - public InputStream executeAndGetContentInputStream(TracingContext tracingContext) - throws AzureBlobFileSystemException { - execute(tracingContext); - return result.getListResultStream(); - } - /** * Execute a AbfsRestOperation. Track the Duration of a request if * abfsCounters isn't null. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java index cfd9d41e821beb..91676ab28e99bd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java @@ -31,14 +31,12 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java index 476ca5da3f7c6b..42a15a99305165 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java @@ -124,4 +124,4 @@ public String toString() { sb.append('}'); return sb.toString(); } -} \ No newline at end of file +} From 604b900ca56355312e953d5f4b87e65d61113c49 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Thu, 20 Feb 2025 20:59:19 -0800 Subject: [PATCH 05/19] Fixing HNS Test --- .../azurebfs/services/AbfsHttpOperation.java | 9 +- .../ITestAzureBlobFileSystemFileStatus.java | 1 + .../ITestAzureBlobFileSystemListStatus.java | 119 +++++++++++++++++- .../azurebfs/services/AbfsClientTestUtil.java | 12 +- .../TestAbfsRestOperationMockFailures.java | 10 +- .../services/TestListActionTaker.java | 5 +- 6 files changed, 142 insertions(+), 14 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index e47aec5b93b262..6e989b6f555a64 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -221,6 +221,10 @@ public ListResultSchema getListResultSchema() { return listResultSchema; } + public final InputStream getListResultStream() { + return listResultStream; + } + /** * Get response header value for the given headerKey. * @@ -568,11 +572,6 @@ public final long getSendLatency() { public final long getRecvLatency() { return recvResponseTimeMs; } - - public final InputStream getListResultStream() { - return listResultStream; - } - /** * Set response status code for the server call. * diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java index 190dcea7c8bc3c..8bbafedf8b0ec2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java @@ -98,6 +98,7 @@ private FileStatus validateStatus(final AzureBlobFileSystem fs, final Path name, assertTrue(errorInStatus + "not a file", fileStatus.isFile()); } } + assertPathDns(fileStatus.getPath()); return fileStatus; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 88d14b24073f58..da128213a427f3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -18,9 +18,13 @@ package org.apache.hadoop.fs.azurebfs; +import javax.annotation.Nullable; +import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.net.SocketTimeoutException; +import java.net.URI; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; @@ -28,6 +32,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.assertj.core.api.Assertions; import org.junit.Test; import org.mockito.Mockito; import org.mockito.stubbing.Stubber; @@ -44,8 +50,10 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; +import org.apache.hadoop.fs.azurebfs.utils.DirectoryStateHelper; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; @@ -53,6 +61,7 @@ import static java.net.HttpURLConnection.HTTP_OK; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; @@ -62,6 +71,7 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.nullable; import static org.mockito.Mockito.times; import static org.mockito.Mockito.when; @@ -174,9 +184,9 @@ public void testListPathTracingContext() throws Exception { // Assert that there were 2 paginated ListPath calls were made 1 and 2. // 1. Without continuation token Mockito.verify(spiedClient, times(1)).listPath( - "/", false, - spiedFs.getAbfsStore().getAbfsConfiguration().getListMaxResults(), - null, spiedTracingContext, null, null); + eq("/"), eq(false), + eq(spiedFs.getAbfsStore().getAbfsConfiguration().getListMaxResults()), + eq(null), eq(spiedTracingContext), any(IdentityTransformerInterface.class), any(URI.class)); // 2. With continuation token Mockito.verify(spiedClient, times(1)).listPath( "/", false, @@ -343,4 +353,107 @@ public void testRenameTrailingPeriodFile() throws IOException { assertTrue("Attempt to create file that ended with a dot should" + " throw IllegalArgumentException", exceptionThrown); } + + + + /** + * Test to verify that listStatus returns the correct file status all types + * of paths viz. implicit, explicit, file. + * @throws Exception if there is an error or test assertions fails. + */ + @Test + public void testListStatusWithImplicitExplicitChildren() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.setWorkingDirectory(new Path(ROOT_PATH)); + Path root = new Path(ROOT_PATH); + + // Create an implicit directory under root + Path dir = new Path("a"); + createAzCopyFolder(dir); + + // Assert that implicit directory is returned + FileStatus[] fileStatuses = fs.listStatus(root); + Assertions.assertThat(fileStatuses.length).isEqualTo(1); + assertImplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); + + // Create a marker blob for the directory. + fs.mkdirs(dir); + + // Assert that only one entry of explicit directory is returned + fileStatuses = fs.listStatus(root); + Assertions.assertThat(fileStatuses.length).isEqualTo(1); + assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); + + // Create a file under root + Path file1 = new Path("b"); + fs.create(file1); + + // Assert that two entries are returned in alphabetic order. + fileStatuses = fs.listStatus(root); + Assertions.assertThat(fileStatuses.length).isEqualTo(2); + assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); + assertFileFileStatus(fileStatuses[1], fs.makeQualified(file1)); + + // Create another implicit directory under root. + Path dir2 = new Path("c"); + createAzCopyFolder(dir2); + + // Assert that three entries are returned in alphabetic order. + fileStatuses = fs.listStatus(root); + Assertions.assertThat(fileStatuses.length).isEqualTo(3); + assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); + assertFileFileStatus(fileStatuses[1], fs.makeQualified(file1)); + assertImplicitDirectoryFileStatus(fileStatuses[2], fs.makeQualified(dir2)); + } + + /** + * Test to verify that listStatus returns the correct file status when called on an implicit path + * @throws Exception if there is an error or test assertions fails. + */ + @Test + public void testListStatusOnImplicitDirectoryPath() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path implicitPath = new Path("/implicitDir"); + createAzCopyFolder(implicitPath); + + FileStatus[] statuses = fs.listStatus(implicitPath); + Assertions.assertThat(statuses.length).isGreaterThanOrEqualTo(1); + assertImplicitDirectoryFileStatus(statuses[0], fs.makeQualified(statuses[0].getPath())); + + FileStatus[] statuses1 = fs.listStatus(new Path(statuses[0].getPath().toString())); + Assertions.assertThat(statuses1.length).isGreaterThanOrEqualTo(1); + assertFileFileStatus(statuses1[0], fs.makeQualified(statuses1[0].getPath())); + } + + private void assertFileFileStatus(final FileStatus fileStatus, + final Path qualifiedPath) { + Assertions.assertThat(fileStatus.getPath()).isEqualTo(qualifiedPath); + Assertions.assertThat(fileStatus.isFile()).isEqualTo(true); + Assertions.assertThat(fileStatus.isDirectory()).isEqualTo(false); + Assertions.assertThat(fileStatus.getModificationTime()).isNotEqualTo(0); + } + + private void assertImplicitDirectoryFileStatus(final FileStatus fileStatus, + final Path qualifiedPath) throws Exception { + assertDirectoryFileStatus(fileStatus, qualifiedPath); + DirectoryStateHelper.isImplicitDirectory(qualifiedPath, getFileSystem(), + getTestTracingContext(getFileSystem(), true)); + Assertions.assertThat(fileStatus.getModificationTime()).isEqualTo(0); + } + + private void assertExplicitDirectoryFileStatus(final FileStatus fileStatus, + final Path qualifiedPath) throws Exception { + assertDirectoryFileStatus(fileStatus, qualifiedPath); + DirectoryStateHelper.isExplicitDirectory(qualifiedPath, getFileSystem(), + getTestTracingContext(getFileSystem(), true)); + Assertions.assertThat(fileStatus.getModificationTime()).isNotEqualTo(0); + } + + private void assertDirectoryFileStatus(final FileStatus fileStatus, + final Path qualifiedPath) { + Assertions.assertThat(fileStatus.getPath()).isEqualTo(qualifiedPath); + Assertions.assertThat(fileStatus.isDirectory()).isEqualTo(true); + Assertions.assertThat(fileStatus.isFile()).isEqualTo(false); + Assertions.assertThat(fileStatus.getLen()).isEqualTo(0); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index bc6c69ccc8fac6..c8c50bf950912b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -34,10 +34,12 @@ import org.apache.hadoop.fs.azurebfs.AbfsCountersImpl; import org.assertj.core.api.Assertions; import org.mockito.AdditionalMatchers; +import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.util.functional.FunctionRaisingIOE; @@ -89,11 +91,15 @@ public static void setMockAbfsRestOperationForListPathOperation( new ArrayList<>(), spiedClient.getAbfsConfiguration() )); + ListResponseData listResponseData = Mockito.spy(new ListResponseData()); + listResponseData.setRenamePendingJsonPaths(null); + listResponseData.setOp(abfsRestOperation); + listResponseData.setFileStatusList(new ArrayList<>()); Mockito.doReturn(abfsRestOperation).when(spiedClient).getAbfsRestOperation( eq(AbfsRestOperationType.ListPaths), any(), any(), any()); - addGeneralMockBehaviourToAbfsClient(spiedClient, exponentialRetryPolicy, staticRetryPolicy, intercept); + addGeneralMockBehaviourToAbfsClient(spiedClient, exponentialRetryPolicy, staticRetryPolicy, intercept, listResponseData); addGeneralMockBehaviourToRestOpAndHttpOp(abfsRestOperation, httpOperation); functionRaisingIOE.apply(httpOperation); @@ -202,7 +208,8 @@ public static void addMockBehaviourToRestOpAndHttpOp(final AbfsRestOperation abf public static void addGeneralMockBehaviourToAbfsClient(final AbfsClient abfsClient, final ExponentialRetryPolicy exponentialRetryPolicy, final StaticRetryPolicy staticRetryPolicy, - final AbfsThrottlingIntercept intercept) throws IOException, URISyntaxException { + final AbfsThrottlingIntercept intercept, + final ListResponseData listResponseData) throws IOException, URISyntaxException { Mockito.doReturn(OAuth).when(abfsClient).getAuthType(); Mockito.doReturn("").when(abfsClient).getAccessToken(); AbfsConfiguration abfsConfiguration = Mockito.mock(AbfsConfiguration.class); @@ -215,6 +222,7 @@ public static void addGeneralMockBehaviourToAbfsClient(final AbfsClient abfsClie .when(intercept) .sendingRequest(any(), nullable(AbfsCounters.class)); Mockito.doNothing().when(intercept).updateMetrics(any(), any()); + Mockito.doReturn(listResponseData).when(abfsClient).parseListPathResults(any(), any(), any()); // Returning correct retry policy based on failure reason Mockito.doReturn(exponentialRetryPolicy).when(abfsClient).getExponentialRetryPolicy(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java index 8ee3a71f358cbe..8252143ff978ec 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; @@ -201,7 +202,8 @@ public void testRetryPolicyWithDifferentFailureReasons() throws Exception { StaticRetryPolicy staticRetryPolicy = Mockito.mock(StaticRetryPolicy.class); AbfsThrottlingIntercept intercept = Mockito.mock( AbfsThrottlingIntercept.class); - addGeneralMockBehaviourToAbfsClient(abfsClient, exponentialRetryPolicy, staticRetryPolicy, intercept); + addGeneralMockBehaviourToAbfsClient(abfsClient, exponentialRetryPolicy, staticRetryPolicy, intercept, Mockito.mock( + ListResponseData.class)); AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation( AbfsRestOperationType.ReadFile, @@ -289,7 +291,8 @@ private void testClientRequestIdForStatusRetry(int status, StaticRetryPolicy staticRetryPolicy = Mockito.mock(StaticRetryPolicy.class); AbfsThrottlingIntercept intercept = Mockito.mock( AbfsThrottlingIntercept.class); - addGeneralMockBehaviourToAbfsClient(abfsClient, exponentialRetryPolicy, staticRetryPolicy, intercept); + addGeneralMockBehaviourToAbfsClient(abfsClient, exponentialRetryPolicy, staticRetryPolicy, intercept, Mockito.mock( + ListResponseData.class)); // Create a readfile operation that will fail AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation( @@ -356,7 +359,8 @@ private void testClientRequestIdForTimeoutRetry(Exception[] exceptions, StaticRetryPolicy staticRetryPolicy = Mockito.mock(StaticRetryPolicy.class); AbfsThrottlingIntercept intercept = Mockito.mock( AbfsThrottlingIntercept.class); - addGeneralMockBehaviourToAbfsClient(abfsClient, exponentialRetryPolicy, staticRetryPolicy, intercept); + addGeneralMockBehaviourToAbfsClient(abfsClient, exponentialRetryPolicy, staticRetryPolicy, intercept, Mockito.mock( + ListResponseData.class)); AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation( AbfsRestOperationType.ReadFile, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java index 4b4a1502328ef5..c6c9fbfa49bfb8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -65,9 +66,11 @@ public void testProducerResumeOnlyOnConsumerLagBecomesTolerable() throws Mockito.doReturn(DEFAULT_FS_AZURE_PRODUCER_QUEUE_MAX_SIZE) .when(abfsConfiguration) .getProducerQueueMaxSize(); + ListResponseData listResponseData = Mockito.mock(ListResponseData.class); AbfsRestOperation op = Mockito.mock(AbfsRestOperation.class); AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); Mockito.doReturn(httpOperation).when(op).getResult(); + Mockito.doReturn(op).when(listResponseData).getOp(); BlobListResultSchema listResultSchema = Mockito.mock( BlobListResultSchema.class); Mockito.doReturn(listResultSchema) @@ -134,7 +137,7 @@ protected void addPaths(final List paths, occurrences[0]++; Assertions.assertThat((int) answer.getArgument(2)) .isEqualTo(DEFAULT_AZURE_LIST_MAX_RESULTS); - return op; + return listResponseData; }).when(client) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable( From f3c4fe678773eb785748321d3514558aaefc258a Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Thu, 27 Feb 2025 02:17:33 -0800 Subject: [PATCH 06/19] Test Fixes --- .../services/AbfsAHCHttpOperation.java | 26 +++++-------------- .../fs/azurebfs/services/AbfsBlobClient.java | 2 ++ .../azurebfs/services/AbfsHttpOperation.java | 4 ++- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java index 3ed70965db923a..a68efec6268a77 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java @@ -194,26 +194,14 @@ String getConnResponseMessage() throws IOException { public void processResponse(final byte[] buffer, final int offset, final int length) throws IOException { - try { - if (!isPayloadRequest) { - prepareRequest(); - LOG.debug("Sending request: {}", httpRequestBase); - httpResponse = executeRequest(); - LOG.debug("Request sent: {}; response {}", httpRequestBase, - httpResponse); - } - parseResponseHeaderAndBody(buffer, offset, length); - } finally { - if (httpResponse != null) { - try { - EntityUtils.consume(httpResponse.getEntity()); - } finally { - if (httpResponse instanceof CloseableHttpResponse) { - ((CloseableHttpResponse) httpResponse).close(); - } - } - } + if (!isPayloadRequest) { + prepareRequest(); + LOG.debug("Sending request: {}", httpRequestBase); + httpResponse = executeRequest(); + LOG.debug("Request sent: {}; response {}", httpRequestBase, + httpResponse); } + parseResponseHeaderAndBody(buffer, offset, length); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 7d160803c5d2aa..279cd0b36eb933 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -1225,6 +1225,8 @@ public AbfsRestOperation getPathStatus(final String path, if (op.getResult().getStatusCode() == HTTP_NOT_FOUND && isImplicitCheckRequired && isNonEmptyDirectory(path, tracingContext)) { // Implicit path found. + // Create a marker blob at this path. + this.createMarkerAtPath(path, null, contextEncryptionAdapter, tracingContext); AbfsRestOperation successOp = getSuccessOp( AbfsRestOperationType.GetPathStatus, HTTP_METHOD_HEAD, url, requestHeaders); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 6e989b6f555a64..627f7bf443c464 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; @@ -401,7 +403,7 @@ final void parseResponse(final byte[] buffer, if (url.toString().contains(QUERY_PARAM_COMP + EQUAL + BLOCKLIST)) { parseBlockListResponse(stream); } else { - listResultStream = getContentInputStream(); + listResultStream = stream; } } else { if (buffer != null) { From a0bf5f1fde6a682ac262ca673a8fc6c6483ce4b7 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Sun, 2 Mar 2025 21:58:34 -0800 Subject: [PATCH 07/19] Removing Unused Import --- .../hadoop/fs/azurebfs/AzureBlobFileSystemStore.java | 2 -- .../apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java | 1 - .../org/apache/hadoop/fs/azurebfs/services/AbfsClient.java | 5 ----- .../apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java | 7 ------- .../hadoop/fs/azurebfs/services/AbfsHttpOperation.java | 2 -- 5 files changed, 17 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 1a7323b1749674..077e38aaa9412c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.azurebfs; import java.io.Closeable; -import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.UnsupportedEncodingException; @@ -59,7 +58,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.EtagSource; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 279cd0b36eb933..4dda1c49ff19bb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -75,7 +75,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListXmlParser; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 6ce3ad31fefa93..987a86f4afa849 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -44,13 +44,11 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import com.jcraft.jsch.IO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -70,16 +68,13 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; -import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; -import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformer; import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index f8c15e6e06b253..8564a1b84b7d95 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.azurebfs.services; -import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -66,8 +65,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; @@ -75,11 +72,7 @@ import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.Base64; -import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import static org.apache.commons.lang3.StringUtils.isEmpty; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 627f7bf443c464..4ff4bdac4a169f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -18,8 +18,6 @@ package org.apache.hadoop.fs.azurebfs.services; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; From b37c3d2bdb5f8771b1dbad0a179be307cf3444b1 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 3 Mar 2025 08:10:56 -0800 Subject: [PATCH 08/19] Test fixes --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 9 +++++-- .../fs/azurebfs/services/AbfsBlobClient.java | 27 ++++++++----------- .../fs/azurebfs/services/AbfsClient.java | 24 +++++++++++++---- .../fs/azurebfs/services/AbfsDfsClient.java | 10 +++---- .../fs/azurebfs/services/ListActionTaker.java | 2 +- .../hadoop/fs/azurebfs/ITestAbfsClient.java | 8 +++--- .../azurebfs/ITestAbfsCustomEncryption.java | 2 +- .../ITestAzureBlobFileSystemDelete.java | 14 +++++----- .../ITestAzureBlobFileSystemFileStatus.java | 2 +- .../ITestAzureBlobFileSystemListStatus.java | 8 +++--- .../ITestAzureBlobFileSystemRename.java | 9 +++---- .../azurebfs/services/AbfsClientTestUtil.java | 2 +- .../services/TestListActionTaker.java | 3 +-- .../azurebfs/utils/DirectoryStateHelper.java | 2 +- 14 files changed, 65 insertions(+), 57 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 077e38aaa9412c..fdc4b56593c2b5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1264,7 +1264,7 @@ public String listStatus(final Path path, final String startFrom, if (startFrom != null && !startFrom.isEmpty()) { /* * Blob Endpoint Does not support startFrom yet. Fallback to DFS Client. - * startFrom remains null for all HDFS APIs. This is only for internal use. + * startFrom remains null for all HDFS APIs. This is used only for tests. */ listingClient = getClient(AbfsServiceType.DFS); continuation = getIsNamespaceEnabled(tracingContext) @@ -1277,7 +1277,7 @@ public String listStatus(final Path path, final String startFrom, try (AbfsPerfInfo perfInfo = startTracking("listStatus", "listPath")) { ListResponseData listResponseData = listingClient.listPath(relativePath, false, abfsConfiguration.getListMaxResults(), continuation, - tracingContext, identityTransformer, this.uri); + tracingContext, this.uri); AbfsRestOperation op = listResponseData.getOp(); perfInfo.registerResult(op.getResult()); continuation = listResponseData.getContinuationToken(); @@ -2027,6 +2027,11 @@ void setNamespaceEnabled(Trilean isNamespaceEnabled){ this.isNamespaceEnabled = isNamespaceEnabled; } + @VisibleForTesting + public URI getUri() { + return this.uri; + } + private void updateInfiniteLeaseDirs() { this.azureInfiniteLeaseDirSet = new HashSet<>(Arrays.asList( abfsConfiguration.getAzureInfiniteLeaseDirs().split(AbfsHttpConstants.COMMA))); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 4dda1c49ff19bb..0c48f9371f983b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -350,14 +350,12 @@ public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) */ @Override public ListResponseData listPath(final String relativePath, final boolean recursive, - final int listMaxResults, final String continuation, TracingContext tracingContext, - IdentityTransformerInterface identityTransformer, URI uri) throws IOException { - return listPath(relativePath, recursive, listMaxResults, continuation, tracingContext, identityTransformer, uri, true); + final int listMaxResults, final String continuation, TracingContext tracingContext, URI uri) throws IOException { + return listPath(relativePath, recursive, listMaxResults, continuation, tracingContext, uri, true); } public ListResponseData listPath(final String relativePath, final boolean recursive, - final int listMaxResults, final String continuation, TracingContext tracingContext, - IdentityTransformerInterface identityTransformer, URI uri, boolean is404CheckRequired) + final int listMaxResults, final String continuation, TracingContext tracingContext, URI uri, boolean is404CheckRequired) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); @@ -381,7 +379,7 @@ public ListResponseData listPath(final String relativePath, final boolean recurs requestHeaders); op.execute(tracingContext); - ListResponseData listResponseData = parseListPathResults(op.getResult(), identityTransformer, uri); + ListResponseData listResponseData = parseListPathResults(op.getResult(), uri); listResponseData.setOp(op); // Filter the paths for which no rename redo operation is performed. @@ -400,7 +398,7 @@ public ListResponseData listPath(final String relativePath, final boolean recurs List fileStatusList = new ArrayList<>(); Map renamePendingJsonPaths = new HashMap<>(); for (BlobListResultEntrySchema entry : listResultSchema.paths()) { - fileStatusList.add(getFileStatusFromEntry(entry, identityTransformer, uri)); + fileStatusList.add(getFileStatusFromEntry(entry, uri)); if (isRenamePendingJsonPathEntry(entry)) { renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); } @@ -1594,8 +1592,7 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) * @return BlobListResultSchema containing the list of entries. */ @Override - public ListResponseData parseListPathResults(AbfsHttpOperation result, - IdentityTransformerInterface identityTransformer, URI uri) + public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { InputStream stream = result.getListResultStream(); if (stream == null) { @@ -1614,8 +1611,7 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, } try { - return listResponseDataParsingXmlListResult(listResultSchema, - identityTransformer, uri); + return listResponseDataParsingXmlListResult(listResultSchema, uri); } catch (IOException e) { throw new AbfsDriverException(e); } @@ -1915,7 +1911,7 @@ private List getMetadataHeadersList(final Hashtable fileStatuses = new ArrayList<>(); Map renamePendingJsonPaths = new HashMap<>(); TreeMap nameToEntryMap = new TreeMap<>(); @@ -1925,7 +1921,7 @@ private ListResponseData listResponseDataParsingXmlListResult( // This is a blob entry. It is either a file or a marker blob. // In both cases we will add this. nameToEntryMap.put(entry.name(), entry); - fileStatuses.add(getFileStatusFromEntry(entry, identityTransformer, uri)); + fileStatuses.add(getFileStatusFromEntry(entry, uri)); if (isRenamePendingJsonPathEntry(entry)) { renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); @@ -1935,7 +1931,7 @@ private ListResponseData listResponseDataParsingXmlListResult( // This might have already been added as a marker blob. if (!nameToEntryMap.containsKey(entry.name())) { nameToEntryMap.put(entry.name(), entry); - fileStatuses.add(getFileStatusFromEntry(entry, identityTransformer, uri)); + fileStatuses.add(getFileStatusFromEntry(entry, uri)); } } } @@ -2006,8 +2002,7 @@ public boolean isNonEmptyDirectory(String path, TracingContext tracingContext) throws AzureBlobFileSystemException { // This method is only called internally to determine state of a path // and hence don't need identity transformation to happen. - ListResponseData listResponseData = listPath(path, false, 1, null, tracingContext, - null, null, false); + ListResponseData listResponseData = listPath(path, false, 1, null, tracingContext, null, false); return !isEmptyListResults(listResponseData.getOp().getResult()); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 987a86f4afa849..92888ff6bb42a0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.lang.reflect.InvocationTargetException; import java.net.HttpURLConnection; import java.net.InetAddress; import java.net.MalformedURLException; @@ -49,6 +50,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -75,6 +77,7 @@ import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformer; import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; @@ -123,6 +126,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SEMICOLON; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.UTF_8; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_IDENTITY_TRANSFORM_CLASS; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_DELETE_CONSIDERED_IDEMPOTENT; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.SERVER_SIDE_ENCRYPTION_ALGORITHM; @@ -159,6 +163,7 @@ public abstract class AbfsClient implements Closeable { private final AbfsPerfTracker abfsPerfTracker; private String clientProvidedEncryptionKey = null; private String clientProvidedEncryptionKeySHA = null; + private final IdentityTransformerInterface identityTransformer; private final String accountName; private final AuthType authType; @@ -293,6 +298,18 @@ private AbfsClient(final URL baseUrl, metricIdlePeriod); } this.abfsMetricUrl = abfsConfiguration.getMetricUri(); + + final Class identityTransformerClass = + abfsConfiguration.getRawConfiguration().getClass(FS_AZURE_IDENTITY_TRANSFORM_CLASS, IdentityTransformer.class, + IdentityTransformerInterface.class); + try { + this.identityTransformer = + identityTransformerClass.getConstructor(Configuration.class).newInstance(abfsConfiguration.getRawConfiguration()); + } catch (IllegalAccessException | InstantiationException | IllegalArgumentException | + InvocationTargetException | NoSuchMethodException e) { + throw new IOException(e); + } + LOG.trace("IdentityTransformer init complete"); } public AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, @@ -509,8 +526,7 @@ public abstract AbfsRestOperation setFilesystemProperties(Hashtable requestHeaders = createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); @@ -344,7 +343,7 @@ public ListResponseData listPath(final String relativePath, AbfsRestOperationType.ListPaths, HTTP_METHOD_GET, url, requestHeaders); op.execute(tracingContext); - ListResponseData listResponseData = parseListPathResults(op.getResult(), identityTransformer, uri); + ListResponseData listResponseData = parseListPathResults(op.getResult(), uri); listResponseData.setContinuationToken(getContinuationFromResponse(op.getResult())); listResponseData.setOp(op); return listResponseData; @@ -1477,8 +1476,7 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) * @throws IOException if parsing fails. */ @Override - public ListResponseData parseListPathResults(AbfsHttpOperation result, - IdentityTransformerInterface identityTransformer, URI uri) throws IOException { + public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws IOException { InputStream listResultInputStream = result.getListResultStream(); DfsListResultSchema listResultSchema; try { @@ -1500,7 +1498,7 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, List fileStatuses = new ArrayList<>(); for (DfsListResultEntrySchema entry : listResultSchema.paths()) { - fileStatuses.add(getFileStatusFromEntry(entry, identityTransformer, uri)); + fileStatuses.add(getFileStatusFromEntry(entry, uri)); } ListResponseData listResponseData = new ListResponseData(); listResponseData.setFileStatusList(fileStatuses); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java index 91676ab28e99bd..b6dde3bdf1b711 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java @@ -231,7 +231,7 @@ protected String listAndEnqueue(final ListBlobQueue listBlobQueue, op = getAbfsClient().listPath(path.toUri().getPath(), true, queueAvailableSizeForProduction, continuationToken, - tracingContext, null, null).getOp(); + tracingContext, null).getOp(); } catch (AzureBlobFileSystemException ex) { throw ex; } catch (IOException ex) { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java index eb6dbc269fc370..c180689b267ab4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsClient.java @@ -65,7 +65,7 @@ public void testContinuationTokenHavingEqualSign() throws Exception { try { AbfsRestOperation op = abfsClient .listPath("/", true, LIST_MAX_RESULTS, "===========", - getTestTracingContext(fs, true), null, null).getOp(); + getTestTracingContext(fs, true), null).getOp(); Assert.assertTrue(false); } catch (AbfsRestOperationException ex) { Assert.assertEquals("InvalidQueryParameterValue", ex.getErrorCode().getErrorCode()); @@ -106,7 +106,7 @@ public void testListPathWithValidListMaxResultsValues() AbfsRestOperation op = getFileSystem().getAbfsClient().listPath( directory.toString(), false, getListMaxResults(), null, - getTestTracingContext(getFileSystem(), true), null, null).getOp(); + getTestTracingContext(getFileSystem(), true), null).getOp(); List list = op.getResult().getListResultSchema().paths(); String continuationToken = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); @@ -141,7 +141,7 @@ public void testListPathWithValueGreaterThanServerMaximum() AbfsRestOperation op = getFileSystem().getAbfsClient().listPath( directory.toString(), false, getListMaxResults(), null, - getTestTracingContext(getFileSystem(), true), null, null).getOp(); + getTestTracingContext(getFileSystem(), true), null).getOp(); List list = op.getResult().getListResultSchema().paths(); String continuationToken = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); @@ -179,7 +179,7 @@ private List listPath(String directory) throws IOException { return getFileSystem().getAbfsClient() .listPath(directory, false, getListMaxResults(), null, - getTestTracingContext(getFileSystem(), true), null, null).getOp().getResult() + getTestTracingContext(getFileSystem(), true), null).getOp().getResult() .getListResultSchema().paths(); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java index 880413d6cbd14b..6ef88b9a80e1e0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java @@ -343,7 +343,7 @@ true, new BlobAppendRequestParameters(BLOCK_ID, null)), getTestTracingContext(fs, false)); case LISTSTATUS: return client.listPath(path, false, 5, null, - getTestTracingContext(fs, true), null, null).getOp(); + getTestTracingContext(fs, true), null).getOp(); case RENAME: TracingContext tc = getTestTracingContext(fs, true); return client.renamePath(path, new Path(path + "_2").toString(), diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java index a1639aa40c0fe5..4a1b2730b3caae 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java @@ -295,13 +295,11 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception { doCallRealMethod().when(mockClient) .listPath(Mockito.nullable(String.class), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), - Mockito.nullable(TracingContext.class), Mockito.nullable( - IdentityTransformerInterface.class), Mockito.nullable(URI.class)); + Mockito.nullable(TracingContext.class), Mockito.nullable(URI.class)); doCallRealMethod().when((AbfsBlobClient) mockClient) .listPath(Mockito.nullable(String.class), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), - Mockito.nullable(TracingContext.class), - Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class), Mockito.anyBoolean()); + Mockito.nullable(TracingContext.class), Mockito.nullable(URI.class), Mockito.anyBoolean()); doCallRealMethod().when((AbfsBlobClient) mockClient) .getPathStatus(Mockito.nullable(String.class), Mockito.nullable(TracingContext.class), Mockito.nullable(ContextEncryptionAdapter.class), Mockito.anyBoolean()); @@ -394,12 +392,12 @@ public void testDeleteImplicitDirWithSingleListResults() throws Exception { boolean recursive = answer.getArgument(1); String continuation = answer.getArgument(3); TracingContext context = answer.getArgument(4); - return client.listPath(path, recursive, 1, continuation, context, null, null); + return client.listPath(path, recursive, 1, continuation, context, null); }) .when(spiedClient) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), - Mockito.any(TracingContext.class), Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class)); + Mockito.any(TracingContext.class), Mockito.nullable(URI.class)); client.deleteBlobPath(new Path("/testDir/dir1"), null, getTestTracingContext(fs, true)); fs.delete(new Path("/testDir/dir1"), true); @@ -546,14 +544,14 @@ public void testProducerStopOnDeleteFailure() throws Exception { }) .when(spiedClient) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), - Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class)); + Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(URI.class)); intercept(AccessDeniedException.class, () -> { fs.delete(new Path("/src"), true); }); Mockito.verify(spiedClient, Mockito.times(1)) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), - Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class)); + Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(URI.class)); } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java index 8bbafedf8b0ec2..f226e5b61fba74 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java @@ -256,7 +256,7 @@ public void testListStatusIsCalledForImplicitPathOnBlobEndpoint() throws Excepti fs.getFileStatus(implicitPath); Mockito.verify(abfsClient, Mockito.times(1)).getPathStatus(any(), eq(false), any(), any()); - Mockito.verify(abfsClient, Mockito.times(1)).listPath(any(), eq(false), eq(1), any(), any(), any(), any(), eq(false)); + Mockito.verify(abfsClient, Mockito.times(1)).listPath(any(), eq(false), eq(1), any(), any(), any(), eq(false)); } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index da128213a427f3..9b68a5be5e16ce 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -184,14 +184,14 @@ public void testListPathTracingContext() throws Exception { // Assert that there were 2 paginated ListPath calls were made 1 and 2. // 1. Without continuation token Mockito.verify(spiedClient, times(1)).listPath( - eq("/"), eq(false), - eq(spiedFs.getAbfsStore().getAbfsConfiguration().getListMaxResults()), - eq(null), eq(spiedTracingContext), any(IdentityTransformerInterface.class), any(URI.class)); + "/", false, + spiedFs.getAbfsStore().getAbfsConfiguration().getListMaxResults(), + null, spiedTracingContext, spiedFs.getAbfsStore().getUri()); // 2. With continuation token Mockito.verify(spiedClient, times(1)).listPath( "/", false, spiedFs.getAbfsStore().getAbfsConfiguration().getListMaxResults(), - TEST_CONTINUATION_TOKEN, spiedTracingContext, null, null); + TEST_CONTINUATION_TOKEN, spiedTracingContext, spiedFs.getAbfsStore().getUri()); // Assert that none of the API calls used the same tracing header. Mockito.verify(spiedTracingContext, times(0)).constructHeader(any(), any(), any()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 2aa45549bb6b0b..15208e4eb4e5a6 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -1359,13 +1359,12 @@ public void testBlobRenameWithListGivingPaginatedResultWithOneObjectPerList() String continuation = answer.getArgument(3); TracingContext context = answer.getArgument(4); return getFileSystem().getAbfsClient() - .listPath(path, recursive, 1, continuation, context, null, null); + .listPath(path, recursive, 1, continuation, context, null); }) .when(spiedClient) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), Mockito.nullable(String.class), - Mockito.any(TracingContext.class), Mockito.nullable( - IdentityTransformerInterface.class), Mockito.nullable(URI.class)); + Mockito.any(TracingContext.class), Mockito.nullable(URI.class)); fs.rename(new Path("/testDir/dir1"), new Path("/testDir/dir2")); for (int i = 0; i < 10; i++) { Assertions.assertThat(fs.exists(new Path("/testDir/dir2/file" + i))) @@ -1443,13 +1442,13 @@ public void testProducerStopOnRenameFailure() throws Exception { listCallInvocation[0]++; return getFileSystem().getAbfsClient().listPath(answer.getArgument(0), answer.getArgument(1), 1, - answer.getArgument(3), answer.getArgument(4), answer.getArgument(5), answer.getArgument(6)); + answer.getArgument(3), answer.getArgument(4), answer.getArgument(6)); } return answer.callRealMethod(); }) .when(spiedClient) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), - Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(IdentityTransformerInterface.class), Mockito.nullable(URI.class)); + Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(URI.class)); intercept(AccessDeniedException.class, () -> { fs.rename(new Path("/src"), new Path("/dst")); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index c759e69622d265..0be7dafdca4b33 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -224,7 +224,7 @@ public static void addGeneralMockBehaviourToAbfsClient(final AbfsClient abfsClie .when(intercept) .sendingRequest(any(), nullable(AbfsCounters.class)); Mockito.doNothing().when(intercept).updateMetrics(any(), any()); - Mockito.doReturn(listResponseData).when(abfsClient).parseListPathResults(any(), any(), any()); + Mockito.doReturn(listResponseData).when(abfsClient).parseListPathResults(any(), any()); // Returning correct retry policy based on failure reason Mockito.doReturn(exponentialRetryPolicy).when(abfsClient).getExponentialRetryPolicy(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java index c6c9fbfa49bfb8..90adb95834d26e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java @@ -140,8 +140,7 @@ protected void addPaths(final List paths, return listResponseData; }).when(client) .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), - Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable( - IdentityTransformerInterface.class), Mockito.nullable(URI.class)); + Mockito.nullable(String.class), Mockito.any(TracingContext.class), Mockito.nullable(URI.class)); listActionTaker.listRecursiveAndTakeAction(); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DirectoryStateHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DirectoryStateHelper.java index 530323b76df60f..df65f7f19e25fd 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DirectoryStateHelper.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DirectoryStateHelper.java @@ -73,7 +73,7 @@ public static boolean isImplicitDirectory(Path path, AzureBlobFileSystem fs, // 2nd condition: listPaths should return some entries. AbfsRestOperation op = client.listPath( - relativePath, false, 1, null, testTracingContext, null, null, false).getOp(); + relativePath, false, 1, null, testTracingContext, null, false).getOp(); if (op != null && op.getResult() != null) { int listSize = op.getResult().getListResultSchema().paths().size(); if (listSize > 0) { From ec1419b47587389823a2b6175322bb8541a25e34 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 3 Mar 2025 08:41:14 -0800 Subject: [PATCH 09/19] Fix tests --- .../apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java | 1 - .../fs/azurebfs/ITestAzureBlobFileSystemListStatus.java | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 8fac16b822e1b6..3eb2cf99ac021c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -69,7 +69,6 @@ import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; -import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.Base64; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 9b68a5be5e16ce..2cd050e47a5152 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -81,7 +81,7 @@ */ public class ITestAzureBlobFileSystemListStatus extends AbstractAbfsIntegrationTest { - private static final int TEST_FILES_NUMBER = 6; + private static final int TEST_FILES_NUMBER = 6000; private static final String TEST_CONTINUATION_TOKEN = "continuation"; public ITestAzureBlobFileSystemListStatus() throws Exception { @@ -91,7 +91,7 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); - config.set(AZURE_LIST_MAX_RESULTS, "5"); + config.set(AZURE_LIST_MAX_RESULTS, "5000"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); @@ -369,6 +369,7 @@ public void testListStatusWithImplicitExplicitChildren() throws Exception { // Create an implicit directory under root Path dir = new Path("a"); + Path fileInsideDir = new Path("a/file"); createAzCopyFolder(dir); // Assert that implicit directory is returned @@ -377,7 +378,7 @@ public void testListStatusWithImplicitExplicitChildren() throws Exception { assertImplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); // Create a marker blob for the directory. - fs.mkdirs(dir); + fs.create(fileInsideDir); // Assert that only one entry of explicit directory is returned fileStatuses = fs.listStatus(root); From 3edab0f5b5ab5d0def2b79d9ac394d4c72001e8e Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Tue, 4 Mar 2025 00:47:06 -0800 Subject: [PATCH 10/19] Yetus Checks --- .../src/config/checkstyle-suppressions.xml | 2 + .../contracts/services/ListResponseData.java | 37 ++++++++++ .../services/AbfsAHCHttpOperation.java | 2 - .../fs/azurebfs/services/AbfsBlobClient.java | 63 ++++++++++------- .../fs/azurebfs/services/AbfsClient.java | 21 ++++-- .../fs/azurebfs/services/AbfsDfsClient.java | 67 +++++++++++-------- .../fs/azurebfs/services/AbfsErrors.java | 2 + .../ITestAzureBlobFileSystemDelete.java | 1 - .../ITestAzureBlobFileSystemListStatus.java | 11 +-- .../ITestAzureBlobFileSystemRename.java | 1 - .../azurebfs/services/AbfsClientTestUtil.java | 1 - .../services/TestListActionTaker.java | 1 - 12 files changed, 133 insertions(+), 76 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml index 07aa26d2381270..1cedf0c54b0400 100644 --- a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml +++ b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml @@ -48,6 +48,8 @@ files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]services[\\/]AbfsClient.java"/> + fileStatusList; @@ -32,34 +37,66 @@ public class ListResponseData { private AbfsRestOperation executedRestOperation; private String continuationToken; + /** + * Returns the list of FileStatus objects. + * @return the list of FileStatus objects + */ public List getFileStatusList() { return fileStatusList; } + /** + * Sets the list of FileStatus objects. + * @param fileStatusList the list of FileStatus objects + */ public void setFileStatusList(final List fileStatusList) { this.fileStatusList = fileStatusList; } + /** + * Returns the map of rename pending JSON paths. + * @return the map of rename pending JSON paths + */ public Map getRenamePendingJsonPaths() { return renamePendingJsonPaths; } + /** + * Sets the map of rename pending JSON paths. + * @param renamePendingJsonPaths the map of rename pending JSON paths + */ public void setRenamePendingJsonPaths(final Map renamePendingJsonPaths) { this.renamePendingJsonPaths = renamePendingJsonPaths; } + /** + * Returns the executed REST operation. + * @return the executed REST operation + */ public AbfsRestOperation getOp() { return executedRestOperation; } + /** + * Sets the executed REST operation. + * @param executedRestOperation the executed REST operation + */ public void setOp(final AbfsRestOperation executedRestOperation) { this.executedRestOperation = executedRestOperation; } + /** + * Returns the continuation token. + * @return the continuation token + */ public String getContinuationToken() { return continuationToken; } + /** + * Sets the continuation token. + * @param continuationToken the continuation token + */ public void setContinuationToken(final String continuationToken) { this.continuationToken = continuationToken; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java index a68efec6268a77..dd585d32fb7cf2 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java @@ -38,7 +38,6 @@ import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; -import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpEntityEnclosingRequestBase; import org.apache.http.client.methods.HttpGet; @@ -48,7 +47,6 @@ import org.apache.http.client.methods.HttpPut; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.entity.ByteArrayEntity; -import org.apache.http.util.EntityUtils; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APACHE_IMPL; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 0c48f9371f983b..c473314b8afa0d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -79,7 +79,6 @@ import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; -import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -168,6 +167,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_MAX_RESULTS; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_PREFIX; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RESTYPE; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_BLOB_LIST_PARSING; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.PATH_EXISTS; import static org.apache.hadoop.fs.azurebfs.utils.UriUtils.isKeyForDirectorySet; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ATOMIC_DIR_RENAME_RECOVERY_ON_GET_PATH_EXCEPTION; @@ -345,7 +345,8 @@ public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) * @param listMaxResults maximum number of blobs to return. * @param continuation marker to specify the continuation token. * @param tracingContext for tracing the service call. - * @return executed rest operation containing response from server. + * @param uri to be used for path conversion. + * @return {@link ListResponseData}. containing listing response. * @throws AzureBlobFileSystemException if rest operation or response parsing fails. */ @Override @@ -383,7 +384,8 @@ public ListResponseData listPath(final String relativePath, final boolean recurs listResponseData.setOp(op); // Filter the paths for which no rename redo operation is performed. - retryRenameOnAtomicEntriesInListResults(op, tracingContext, listResponseData.getRenamePendingJsonPaths()); + retryRenameOnAtomicEntriesInListResults(op, tracingContext, + listResponseData.getRenamePendingJsonPaths()); if (isEmptyListResults(op.getResult()) && is404CheckRequired) { // If the list operation returns no paths, we need to check if the path is a file. @@ -398,7 +400,7 @@ public ListResponseData listPath(final String relativePath, final boolean recurs List fileStatusList = new ArrayList<>(); Map renamePendingJsonPaths = new HashMap<>(); for (BlobListResultEntrySchema entry : listResultSchema.paths()) { - fileStatusList.add(getFileStatusFromEntry(entry, uri)); + fileStatusList.add(getVersionedFileStatusFromEntry(entry, uri)); if (isRenamePendingJsonPathEntry(entry)) { renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); } @@ -1589,31 +1591,38 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) /** * Parse the XML response body returned by ListBlob API on Blob Endpoint. * @param result InputStream contains the response from server. - * @return BlobListResultSchema containing the list of entries. + * @param uri to be used for path conversion. + * @return {@link ListResponseData}. containing listing response. + * @throws AzureBlobFileSystemException if parsing fails. */ @Override public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { - InputStream stream = result.getListResultStream(); - if (stream == null) { - return null; - } BlobListResultSchema listResultSchema; - try { - final SAXParser saxParser = saxParserThreadLocal.get(); - saxParser.reset(); - listResultSchema = new BlobListResultSchema(); - saxParser.parse(stream, - new BlobListXmlParser(listResultSchema, getBaseUrl().toString())); - result.setListResultSchema(listResultSchema); - } catch (SAXException | IOException e) { - throw new RuntimeException(e); + try (InputStream stream = result.getListResultStream()) { + if (stream == null) { + return null; + } + try { + final SAXParser saxParser = saxParserThreadLocal.get(); + saxParser.reset(); + listResultSchema = new BlobListResultSchema(); + saxParser.parse(stream, + new BlobListXmlParser(listResultSchema, getBaseUrl().toString())); + result.setListResultSchema(listResultSchema); + } catch (SAXException | IOException e) { + throw new AbfsDriverException(e); + } + } catch (IOException e) { + LOG.error("Unable to deserialize list results", e); + throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, e); } try { - return listResponseDataParsingXmlListResult(listResultSchema, uri); + return filterDuplicateEntriesAndRenamePendingFiles(listResultSchema, uri); } catch (IOException e) { - throw new AbfsDriverException(e); + LOG.error("Unable to filter list results", e); + throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, e); } } @@ -1907,10 +1916,12 @@ private List getMetadataHeadersList(final Hashtable fileStatuses = new ArrayList<>(); Map renamePendingJsonPaths = new HashMap<>(); @@ -1921,7 +1932,7 @@ private ListResponseData listResponseDataParsingXmlListResult( // This is a blob entry. It is either a file or a marker blob. // In both cases we will add this. nameToEntryMap.put(entry.name(), entry); - fileStatuses.add(getFileStatusFromEntry(entry, uri)); + fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri)); if (isRenamePendingJsonPathEntry(entry)) { renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); @@ -1931,7 +1942,7 @@ private ListResponseData listResponseDataParsingXmlListResult( // This might have already been added as a marker blob. if (!nameToEntryMap.containsKey(entry.name())) { nameToEntryMap.put(entry.name(), entry); - fileStatuses.add(getFileStatusFromEntry(entry, uri)); + fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri)); } } } @@ -1953,9 +1964,9 @@ && isAtomicRenameKey(entry.path().toUri().getPath()) * When listing is done on a file, Blob Endpoint returns the empty listing * but DFS Endpoint returns the file status as one of the entries. * This is to convert file status into ListResultSchema. - * @param relativePath - * @param pathStatus - * @return + * @param relativePath relative path of the file. + * @param pathStatus AbfsRestOperation containing the file status. + * @return BlobListResultSchema containing the file status. */ private BlobListResultSchema getListResultSchemaFromPathStatus(String relativePath, AbfsRestOperation pathStatus) { BlobListResultSchema listResultSchema = new BlobListResultSchema(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 92888ff6bb42a0..49953346b9daf9 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -305,8 +305,8 @@ private AbfsClient(final URL baseUrl, try { this.identityTransformer = identityTransformerClass.getConstructor(Configuration.class).newInstance(abfsConfiguration.getRawConfiguration()); - } catch (IllegalAccessException | InstantiationException | IllegalArgumentException | - InvocationTargetException | NoSuchMethodException e) { + } catch (IllegalAccessException | InstantiationException | IllegalArgumentException + | InvocationTargetException | NoSuchMethodException e) { throw new IOException(e); } LOG.trace("IdentityTransformer init complete"); @@ -522,7 +522,8 @@ public abstract AbfsRestOperation setFilesystemProperties(Hashtable getXMSProperties(AbfsHttpOperation result) /** * Parse the list file response from DFS ListPath API in Json format * @param result InputStream contains the list results. - * @throws IOException if parsing fails. + * @param uri to be used for path conversion. + * @return {@link ListResponseData}. containing listing response. + * @throws AzureBlobFileSystemException if parsing fails. */ @Override - public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws IOException { - InputStream listResultInputStream = result.getListResultStream(); - DfsListResultSchema listResultSchema; - try { - final ObjectMapper objectMapper = new ObjectMapper(); - listResultSchema = objectMapper.readValue(listResultInputStream, DfsListResultSchema.class); - result.setListResultSchema(listResultSchema); - } catch (IOException ex) { - LOG.error("Unable to deserialize list results", ex); - throw ex; - } + public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { + try (InputStream listResultInputStream = result.getListResultStream()) { + DfsListResultSchema listResultSchema; + try { + final ObjectMapper objectMapper = new ObjectMapper(); + listResultSchema = objectMapper.readValue(listResultInputStream, + DfsListResultSchema.class); + result.setListResultSchema(listResultSchema); + } catch (IOException ex) { + throw new AbfsDriverException(ex); + } - if (listResultSchema == null) { - throw new AbfsRestOperationException( - AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), - AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), - "listStatusAsync path not found", - null); - } + if (listResultSchema == null) { + throw new AbfsRestOperationException( + AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), + AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), + "listStatusAsync path not found", + null); + } - List fileStatuses = new ArrayList<>(); - for (DfsListResultEntrySchema entry : listResultSchema.paths()) { - fileStatuses.add(getFileStatusFromEntry(entry, uri)); + List fileStatuses = new ArrayList<>(); + for (DfsListResultEntrySchema entry : listResultSchema.paths()) { + fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri)); + } + ListResponseData listResponseData = new ListResponseData(); + listResponseData.setFileStatusList(fileStatuses); + listResponseData.setRenamePendingJsonPaths(null); + listResponseData.setContinuationToken( + getContinuationFromResponse(result)); + return listResponseData; + } catch (IOException ex) { + LOG.error("Unable to deserialize list results", ex); + throw new AbfsDriverException(ERR_DFS_LIST_PARSING, ex); } - ListResponseData listResponseData = new ListResponseData(); - listResponseData.setFileStatusList(fileStatuses); - listResponseData.setRenamePendingJsonPaths(null); - return listResponseData; } @Override diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java index e75df046d8d6b6..1329f64fc56a0e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java @@ -73,5 +73,7 @@ public final class AbfsErrors { "Error while recovering from create failure."; public static final String ERR_RENAME_RECOVERY = "Error while recovering from rename failure."; + public static final String ERR_BLOB_LIST_PARSING = "Parsing of XML List Response Failed in BlobClient."; + public static final String ERR_DFS_LIST_PARSING = "Parsing of Json List Response Failed in DfsClient."; private AbfsErrors() {} } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java index 4a1b2730b3caae..e81b493b45c702 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java @@ -41,7 +41,6 @@ import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; -import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 2cd050e47a5152..9041282ae5d879 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -18,13 +18,9 @@ package org.apache.hadoop.fs.azurebfs; -import javax.annotation.Nullable; -import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.net.SocketTimeoutException; -import java.net.URI; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; @@ -32,7 +28,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; -import com.fasterxml.jackson.databind.ObjectMapper; import org.assertj.core.api.Assertions; import org.junit.Test; import org.mockito.Mockito; @@ -50,7 +45,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; -import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; import org.apache.hadoop.fs.azurebfs.utils.DirectoryStateHelper; @@ -71,7 +65,6 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.nullable; import static org.mockito.Mockito.times; import static org.mockito.Mockito.when; @@ -81,7 +74,7 @@ */ public class ITestAzureBlobFileSystemListStatus extends AbstractAbfsIntegrationTest { - private static final int TEST_FILES_NUMBER = 6000; + private static final int TEST_FILES_NUMBER = 60; private static final String TEST_CONTINUATION_TOKEN = "continuation"; public ITestAzureBlobFileSystemListStatus() throws Exception { @@ -91,7 +84,7 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); - config.set(AZURE_LIST_MAX_RESULTS, "5000"); + config.set(AZURE_LIST_MAX_RESULTS, "50"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 15208e4eb4e5a6..18a6c23830b7cc 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -48,7 +48,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index 0be7dafdca4b33..c996133190af87 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -35,7 +35,6 @@ import org.apache.hadoop.fs.azurebfs.AbfsCountersImpl; import org.assertj.core.api.Assertions; import org.mockito.AdditionalMatchers; -import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java index 90adb95834d26e..1fb6bcc51b258c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java @@ -34,7 +34,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; -import org.apache.hadoop.fs.azurebfs.oauth2.IdentityTransformerInterface; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_AZURE_LIST_MAX_RESULTS; From 10163c81b7a2b4eecd1458b2e081e3681fc00872 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 17 Mar 2025 09:08:57 -0700 Subject: [PATCH 11/19] Fixing Failing Tests --- .../hadoop/fs/azurebfs/services/AbfsBlobClient.java | 9 ++++----- .../fs/azurebfs/ITestAzureBlobFileSystemRename.java | 10 +++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 9c4fa1da190d78..de34f7be57a8a0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -1864,7 +1864,6 @@ private void retryRenameOnAtomicKeyPath(final Path path, throw ex; } } - return true; } @VisibleForTesting @@ -1953,11 +1952,11 @@ private ListResponseData filterDuplicateEntriesAndRenamePendingFiles( if (StringUtils.isNotEmpty(entry.eTag())) { // This is a blob entry. It is either a file or a marker blob. // In both cases we will add this. - nameToEntryMap.put(entry.name(), entry); - fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri)); - if (isRenamePendingJsonPathEntry(entry)) { renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); + } else { + nameToEntryMap.put(entry.name(), entry); + fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri)); } } else { // This is a BlobPrefix entry. It is a directory with file inside @@ -1978,7 +1977,7 @@ private ListResponseData filterDuplicateEntriesAndRenamePendingFiles( private boolean isRenamePendingJsonPathEntry(BlobListResultEntrySchema entry) { return entry.path() != null && !entry.path().isRoot() - && isAtomicRenameKey(entry.path().toUri().getPath()) + && isAtomicRenameKey(entry.path().toUri().getPath()) && !entry.isDirectory() && entry.path().toUri().getPath().endsWith(RenameAtomicity.SUFFIX); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 37e9250e8d6dab..146d43ab007032 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -1518,7 +1518,7 @@ public void testProducerStopOnRenameFailure() throws Exception { listCallInvocation[0]++; return getFileSystem().getAbfsClient().listPath(answer.getArgument(0), answer.getArgument(1), 1, - answer.getArgument(3), answer.getArgument(4), answer.getArgument(6)); + answer.getArgument(3), answer.getArgument(4), answer.getArgument(5)); } return answer.callRealMethod(); }) @@ -2089,8 +2089,8 @@ public AzureBlobFileSystem createJsonFile(Path path, Path renameJson) fs.setWorkingDirectory(new Path(ROOT_PATH)); fs.create(new Path(path, "file.txt")); - AzureBlobFileSystemStore.VersionedFileStatus fileStatus - = (AzureBlobFileSystemStore.VersionedFileStatus) fs.getFileStatus(path); + VersionedFileStatus fileStatus + = (VersionedFileStatus) fs.getFileStatus(path); new RenameAtomicity(path, new Path("/hbase/test4"), renameJson, getTestTracingContext(fs, true), @@ -2303,8 +2303,8 @@ public void testListCrashRecoveryWithMultipleJsonFile() throws Exception { fs.create(new Path(path2, "file3.txt")); Path renameJson2 = new Path(path2.getParent(), path2.getName() + SUFFIX); - AzureBlobFileSystemStore.VersionedFileStatus fileStatus - = (AzureBlobFileSystemStore.VersionedFileStatus) fs.getFileStatus(path2); + VersionedFileStatus fileStatus + = (VersionedFileStatus) fs.getFileStatus(path2); new RenameAtomicity(path2, new Path("/hbase/test4"), renameJson2, getTestTracingContext(fs, true), From 2b13bbd62de448279735425a934b61c869149183 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 17 Mar 2025 09:40:43 -0700 Subject: [PATCH 12/19] Added Negative Scenarios Test --- .../ITestAzureBlobFileSystemListStatus.java | 71 ++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 9041282ae5d879..e6edd39956644d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -41,8 +41,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultSchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; @@ -74,7 +76,7 @@ */ public class ITestAzureBlobFileSystemListStatus extends AbstractAbfsIntegrationTest { - private static final int TEST_FILES_NUMBER = 60; + private static final int TEST_FILES_NUMBER = 6000; private static final String TEST_CONTINUATION_TOKEN = "continuation"; public ITestAzureBlobFileSystemListStatus() throws Exception { @@ -84,7 +86,7 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); - config.set(AZURE_LIST_MAX_RESULTS, "50"); + config.set(AZURE_LIST_MAX_RESULTS, "5000"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); @@ -419,6 +421,71 @@ public void testListStatusOnImplicitDirectoryPath() throws Exception { assertFileFileStatus(statuses1[0], fs.makeQualified(statuses1[0].getPath())); } + @Test + public void testListStatusOnEmptyDirectory() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path emptyDir = new Path("/emptyDir"); + fs.mkdirs(emptyDir); + + FileStatus[] statuses = fs.listStatus(emptyDir); + Assertions.assertThat(statuses.length).isEqualTo(0); + } + + @Test + public void testContinuationTokenAcrossListStatus() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path path = new Path("/testContinuationToken"); + fs.mkdirs(path); + fs.create(new Path(path + "/file1")); + fs.create(new Path(path + "/file2")); + + fs.listStatus(path); + + ListResponseData listResponseData = fs.getAbfsStore().getClient().listPath( + "/testContinuationToken",false, 1, null, getTestTracingContext(fs, true), + fs.getAbfsStore().getUri()); + + Assertions.assertThat(listResponseData.getContinuationToken()).isNotNull(); + Assertions.assertThat(listResponseData.getFileStatusList()).hasSize(1); + + ListResponseData listResponseData1 = fs.getAbfsStore().getClient().listPath( + "/testContinuationToken",false, 1, listResponseData.getContinuationToken(), getTestTracingContext(fs, true), + fs.getAbfsStore().getUri()); + + Assertions.assertThat(listResponseData1.getContinuationToken()).isNull(); + Assertions.assertThat(listResponseData1.getFileStatusList()).hasSize(1); + } + + @Test + public void testInvalidContinuationToken() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path path = new Path("/testInvalidContinuationToken"); + fs.mkdirs(path); + fs.create(new Path(path + "/file1")); + fs.create(new Path(path + "/file2")); + + intercept(AbfsRestOperationException.class, + () -> fs.getAbfsStore().getClient().listPath( + "/testInvalidContinuationToken", false, 1, "invalidToken", + getTestTracingContext(fs, true), fs.getAbfsStore().getUri())); + } + + @Test + public void testEmptyContinuationToken() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path path = new Path("/testInvalidContinuationToken"); + fs.mkdirs(path); + fs.create(new Path(path + "/file1")); + fs.create(new Path(path + "/file2")); + + ListResponseData listResponseData = fs.getAbfsStore().getClient().listPath( + "/testInvalidContinuationToken", false, 1, "", + getTestTracingContext(fs, true), fs.getAbfsStore().getUri()); + + Assertions.assertThat(listResponseData.getContinuationToken()).isNotNull(); + Assertions.assertThat(listResponseData.getFileStatusList()).hasSize(1); + } + private void assertFileFileStatus(final FileStatus fileStatus, final Path qualifiedPath) { Assertions.assertThat(fileStatus.getPath()).isEqualTo(qualifiedPath); From 5d34f65d098fb7cbf8da25c2dd7a81160ef1ca11 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 17 Mar 2025 22:50:45 -0700 Subject: [PATCH 13/19] PR Checks --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 4 +- .../fs/azurebfs/services/AbfsBlobClient.java | 52 ++++++++----------- .../fs/azurebfs/services/AbfsClient.java | 7 +-- 3 files changed, 28 insertions(+), 35 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index fdc4b56593c2b5..a4438ac118d093 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1281,8 +1281,8 @@ public String listStatus(final Path path, final String startFrom, AbfsRestOperation op = listResponseData.getOp(); perfInfo.registerResult(op.getResult()); continuation = listResponseData.getContinuationToken(); - List fileStatusList = listResponseData.getFileStatusList(); - fileStatuses.addAll(fileStatusList); + List fileStatusListInCurrItr = listResponseData.getFileStatusList(); + fileStatuses.addAll(fileStatusListInCurrItr); perfInfo.registerSuccess(true); countAggregate++; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index de34f7be57a8a0..6c8fa3aa043c7f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -383,9 +383,15 @@ public ListResponseData listPath(final String relativePath, final boolean recurs ListResponseData listResponseData = parseListPathResults(op.getResult(), uri); listResponseData.setOp(op); - // Filter the paths for which no rename redo operation is performed. - retryRenameOnAtomicEntriesInListResults(op, tracingContext, - listResponseData.getRenamePendingJsonPaths()); + // Perform Pending Rename Redo Operation on Atomic Rename Paths. + // Crashed HBase log rename recovery can be done by Filesystem.listStatus. + if (tracingContext != null + && tracingContext.getOpType() == FSOperationType.LISTSTATUS + && op.getResult() != null + && op.getResult().getStatusCode() == HTTP_OK) { + retryRenameOnAtomicEntriesInListResults(tracingContext, + listResponseData.getRenamePendingJsonPaths()); + } if (isEmptyListResults(op.getResult()) && is404CheckRequired) { // If the list operation returns no paths, we need to check if the path is a file. @@ -400,9 +406,10 @@ public ListResponseData listPath(final String relativePath, final boolean recurs List fileStatusList = new ArrayList<>(); Map renamePendingJsonPaths = new HashMap<>(); for (BlobListResultEntrySchema entry : listResultSchema.paths()) { - fileStatusList.add(getVersionedFileStatusFromEntry(entry, uri)); if (isRenamePendingJsonPathEntry(entry)) { renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); + } else { + fileStatusList.add(getVersionedFileStatusFromEntry(entry, uri)); } } AbfsRestOperation listOp = getAbfsRestOperation( @@ -423,24 +430,11 @@ public ListResponseData listPath(final String relativePath, final boolean recurs /** * Filter the paths for which no rename redo operation is performed. * Update BlobListResultSchema path with filtered entries. - * - * @param op blob list operation * @param tracingContext tracing context * @throws AzureBlobFileSystemException if rest operation or response parsing fails. */ - private void retryRenameOnAtomicEntriesInListResults(final AbfsRestOperation op, - final TracingContext tracingContext, + private void retryRenameOnAtomicEntriesInListResults(TracingContext tracingContext, Map renamePendingJsonPaths) throws AzureBlobFileSystemException { - /* - * Crashed HBase log rename recovery is done by Filesystem.getFileStatus and - * Filesystem.listStatus. - */ - if (tracingContext == null - || tracingContext.getOpType() != FSOperationType.LISTSTATUS - || op == null || op.getResult() == null - || op.getResult().getStatusCode() != HTTP_OK) { - return; - } if (renamePendingJsonPaths == null || renamePendingJsonPaths.isEmpty()) { return; } @@ -1179,10 +1173,7 @@ public AbfsRestOperation getPathStatus(final String path, throws AzureBlobFileSystemException { AbfsRestOperation op = this.getPathStatus(path, tracingContext, contextEncryptionAdapter, true); - /* - * Crashed HBase log-folder rename can be recovered by FileSystem#getFileStatus - * and FileSystem#listStatus calls. - */ + // Crashed HBase log-folder rename can be recovered by FileSystem#getFileStatus if (tracingContext.getOpType() == FSOperationType.GET_FILESTATUS && op.getResult() != null && checkIsDir(op.getResult())) { takeGetPathStatusAtomicRenameKeyAction(new Path(path), tracingContext); @@ -1785,13 +1776,14 @@ public void takeGetPathStatusAtomicRenameKeyAction(final Path path, AbfsRestOperation pendingJsonFileStatus; Path pendingJsonPath = new Path(path.getParent(), path.toUri().getPath() + RenameAtomicity.SUFFIX); + int pendingJsonFileContentLength = 0; try { - pendingJsonFileStatus = getPathStatus( - pendingJsonPath.toUri().getPath(), tracingContext, - null, false); + pendingJsonFileStatus = getPathStatus(pendingJsonPath.toUri().getPath(), + tracingContext, null, false); if (checkIsDir(pendingJsonFileStatus.getResult())) { return; } + pendingJsonFileContentLength = Integer.parseInt(pendingJsonFileStatus.getResult().getResponseHeader(CONTENT_LENGTH)); } catch (AbfsRestOperationException ex) { if (ex.getStatusCode() == HTTP_NOT_FOUND) { return; @@ -1802,9 +1794,7 @@ public void takeGetPathStatusAtomicRenameKeyAction(final Path path, boolean renameSrcHasChanged; try { RenameAtomicity renameAtomicity = getRedoRenameAtomicity( - pendingJsonPath, Integer.parseInt(pendingJsonFileStatus.getResult() - .getResponseHeader(CONTENT_LENGTH)), - tracingContext); + pendingJsonPath, pendingJsonFileContentLength, tracingContext); renameAtomicity.redo(); renameSrcHasChanged = false; } catch (AbfsRestOperationException ex) { @@ -1976,8 +1966,10 @@ private ListResponseData filterDuplicateEntriesAndRenamePendingFiles( } private boolean isRenamePendingJsonPathEntry(BlobListResultEntrySchema entry) { - return entry.path() != null && !entry.path().isRoot() - && isAtomicRenameKey(entry.path().toUri().getPath()) && !entry.isDirectory() + return entry.path() != null + && !entry.path().isRoot() + && isAtomicRenameKey(entry.path().toUri().getPath()) + && !entry.isDirectory() && entry.path().toUri().getPath().endsWith(RenameAtomicity.SUFFIX); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 49953346b9daf9..25cb68cb2838f3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1806,8 +1806,8 @@ private String getPrimaryUser() throws AzureBlobFileSystemException { * @throws AzureBlobFileSystemException if transformation fails. */ protected VersionedFileStatus getVersionedFileStatusFromEntry( - ListResultEntrySchema entry, - URI uri) throws AzureBlobFileSystemException { + ListResultEntrySchema entry, URI uri) throws AzureBlobFileSystemException { + long blockSize = abfsConfiguration.getAzureBlockSize(); final String owner, group; try{ if (identityTransformer != null) { @@ -1820,6 +1820,7 @@ protected VersionedFileStatus getVersionedFileStatusFromEntry( group = null; } } catch (IOException ex) { + LOG.error("Failed to get owner/group for path {}", entry.name(), ex); throw new AbfsDriverException(ex); } final String encryptionContext = entry.getXMsEncryptionContext(); @@ -1848,7 +1849,7 @@ protected VersionedFileStatus getVersionedFileStatusFromEntry( contentLength, isDirectory, 1, - getAbfsConfiguration().getAzureBlockSize(), + blockSize, lastModifiedMillis, entryPath, entry.eTag(), From 5b1374cc37c7dec7f0703a3229b25e6b07c351c7 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Wed, 19 Mar 2025 22:37:52 -0700 Subject: [PATCH 14/19] Fixing tests and Comments --- .../fs/azurebfs/services/AbfsBlobClient.java | 19 ++++++++++++------- .../ITestAzureBlobFileSystemListStatus.java | 6 ++++-- .../ITestAzureBlobFileSystemRename.java | 10 +++++----- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 6c8fa3aa043c7f..0c4aa305e0e4ac 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -385,8 +385,7 @@ public ListResponseData listPath(final String relativePath, final boolean recurs // Perform Pending Rename Redo Operation on Atomic Rename Paths. // Crashed HBase log rename recovery can be done by Filesystem.listStatus. - if (tracingContext != null - && tracingContext.getOpType() == FSOperationType.LISTSTATUS + if (tracingContext.getOpType() == FSOperationType.LISTSTATUS && op.getResult() != null && op.getResult().getStatusCode() == HTTP_OK) { retryRenameOnAtomicEntriesInListResults(tracingContext, @@ -1626,14 +1625,14 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throw new AbfsDriverException(e); } } catch (IOException e) { - LOG.error("Unable to deserialize list results", e); + LOG.error("Unable to deserialize list results for uri {}", uri, e); throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, e); } try { return filterDuplicateEntriesAndRenamePendingFiles(listResultSchema, uri); } catch (IOException e) { - LOG.error("Unable to filter list results", e); + LOG.error("Unable to filter list results for uri {}", uri, e); throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, e); } } @@ -1965,12 +1964,18 @@ private ListResponseData filterDuplicateEntriesAndRenamePendingFiles( return listResponseData; } + /** + * Check if the entry is a rename pending json file path. + * @param entry to be checked. + * @return true if it is a rename pending json file path. + */ private boolean isRenamePendingJsonPathEntry(BlobListResultEntrySchema entry) { - return entry.path() != null + String path = entry.path() != null ? entry.path().toUri().getPath() : null; + return path != null && !entry.path().isRoot() - && isAtomicRenameKey(entry.path().toUri().getPath()) + && isAtomicRenameKey(path) && !entry.isDirectory() - && entry.path().toUri().getPath().endsWith(RenameAtomicity.SUFFIX); + && path.endsWith(RenameAtomicity.SUFFIX); } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index e6edd39956644d..2c583156caa54e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -29,6 +29,7 @@ import java.util.concurrent.Future; import org.assertj.core.api.Assertions; +import org.junit.Ignore; import org.junit.Test; import org.mockito.Mockito; import org.mockito.stubbing.Stubber; @@ -442,14 +443,14 @@ public void testContinuationTokenAcrossListStatus() throws Exception { fs.listStatus(path); ListResponseData listResponseData = fs.getAbfsStore().getClient().listPath( - "/testContinuationToken",false, 1, null, getTestTracingContext(fs, true), + "/testContinuationToken", false, 1, null, getTestTracingContext(fs, true), fs.getAbfsStore().getUri()); Assertions.assertThat(listResponseData.getContinuationToken()).isNotNull(); Assertions.assertThat(listResponseData.getFileStatusList()).hasSize(1); ListResponseData listResponseData1 = fs.getAbfsStore().getClient().listPath( - "/testContinuationToken",false, 1, listResponseData.getContinuationToken(), getTestTracingContext(fs, true), + "/testContinuationToken", false, 1, listResponseData.getContinuationToken(), getTestTracingContext(fs, true), fs.getAbfsStore().getUri()); Assertions.assertThat(listResponseData1.getContinuationToken()).isNull(); @@ -458,6 +459,7 @@ public void testContinuationTokenAcrossListStatus() throws Exception { @Test public void testInvalidContinuationToken() throws Exception { + assumeHnsDisabled(); final AzureBlobFileSystem fs = getFileSystem(); Path path = new Path("/testInvalidContinuationToken"); fs.mkdirs(path); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 146d43ab007032..dd105995ee2af6 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -1782,19 +1782,19 @@ private Configuration createConfig(String producerQueueSize, String consumerMaxL private void validateRename(AzureBlobFileSystem fs, Path src, Path dst, boolean isSrcExist, boolean isDstExist, boolean isJsonExist) throws IOException { - Assertions.assertThat(fs.exists(dst)) - .describedAs("Renamed Destination directory should exist.") - .isEqualTo(isDstExist); Assertions.assertThat(fs.exists(new Path(src.getParent(), src.getName() + SUFFIX))) .describedAs("Renamed Pending Json file should exist.") .isEqualTo(isJsonExist); Assertions.assertThat(fs.exists(src)) - .describedAs("Renamed Destination directory should exist.") + .describedAs("Renamed Source directory should exist.") .isEqualTo(isSrcExist); + Assertions.assertThat(fs.exists(dst)) + .describedAs("Renamed Destination directory should exist.") + .isEqualTo(isDstExist); } /** - * Test the renaming of a directory with different parallelism configurations. + * Test the renaming of a directory with different parallelism configurations.c */ @Test public void testRenameDirWithDifferentParallelismConfig() throws Exception { From 21f590ab6b6d580a05e38954251123d2367e2665 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Fri, 21 Mar 2025 00:08:56 -0700 Subject: [PATCH 15/19] Fixing Faling test --- .../fs/azurebfs/services/AbfsBlobClient.java | 16 ++++---- .../ITestAzureBlobFileSystemListStatus.java | 37 ++++--------------- .../azurebfs/services/AbfsClientTestUtil.java | 27 +++++++++++--- 3 files changed, 35 insertions(+), 45 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 0c4aa305e0e4ac..c10755b06f922f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -392,7 +392,7 @@ public ListResponseData listPath(final String relativePath, final boolean recurs listResponseData.getRenamePendingJsonPaths()); } - if (isEmptyListResults(op.getResult()) && is404CheckRequired) { + if (isEmptyListResults(listResponseData) && is404CheckRequired) { // If the list operation returns no paths, we need to check if the path is a file. // If it is a file, we need to return the file in the list. // If it is a non-existing path, we need to throw a FileNotFoundException. @@ -2032,25 +2032,23 @@ public boolean isNonEmptyDirectory(String path, // This method is only called internally to determine state of a path // and hence don't need identity transformation to happen. ListResponseData listResponseData = listPath(path, false, 1, null, tracingContext, null, false); - return !isEmptyListResults(listResponseData.getOp().getResult()); + return !isEmptyListResults(listResponseData); } /** * Check if the list call returned empty results without any continuation token. - * @param result The response of listing API from the server. + * @param listResponseData The response of listing API from the server. * @return True if empty results without continuation token. */ - private boolean isEmptyListResults(AbfsHttpOperation result) { + private boolean isEmptyListResults(ListResponseData listResponseData) { + AbfsHttpOperation result = listResponseData.getOp().getResult(); boolean isEmptyList = result != null && result.getStatusCode() == HTTP_OK && // List Call was successful result.getListResultSchema() != null && // Parsing of list response was successful - result.getListResultSchema().paths().isEmpty() && // No paths were returned - result.getListResultSchema() instanceof BlobListResultSchema && // It is safe to typecast to BlobListResultSchema - ((BlobListResultSchema) result.getListResultSchema()).getNextMarker() == null; // No continuation token was returned + listResponseData.getFileStatusList().isEmpty() && listResponseData.getRenamePendingJsonPaths().isEmpty() &&// No paths were returned + StringUtils.isEmpty(listResponseData.getContinuationToken()); // No continuation token was returned if (isEmptyList) { LOG.debug("List call returned empty results without any continuation token."); return true; - } else if (result != null && !(result.getListResultSchema() instanceof BlobListResultSchema)) { - throw new RuntimeException("List call returned unexpected results over Blob Endpoint."); } return false; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 2c583156caa54e..b9e4944aad306f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -78,7 +78,7 @@ public class ITestAzureBlobFileSystemListStatus extends AbstractAbfsIntegrationTest { private static final int TEST_FILES_NUMBER = 6000; - private static final String TEST_CONTINUATION_TOKEN = "continuation"; + public static final String TEST_CONTINUATION_TOKEN = "continuation"; public ITestAzureBlobFileSystemListStatus() throws Exception { super(); @@ -128,43 +128,20 @@ public Void call() throws Exception { */ @Test public void testListPathTracingContext() throws Exception { - assumeDfsServiceType(); - final AzureBlobFileSystem fs = getFileSystem(); - final AzureBlobFileSystem spiedFs = Mockito.spy(fs); - final AzureBlobFileSystemStore spiedStore = Mockito.spy(fs.getAbfsStore()); - final AbfsClient spiedClient = Mockito.spy(fs.getAbfsClient()); + final AzureBlobFileSystem spiedFs = Mockito.spy(getFileSystem()); + final AzureBlobFileSystemStore spiedStore = Mockito.spy(spiedFs.getAbfsStore()); + final AbfsClient spiedClient = Mockito.spy(spiedFs.getAbfsClient()); final TracingContext spiedTracingContext = Mockito.spy( new TracingContext( - fs.getClientCorrelationId(), fs.getFileSystemId(), + spiedFs.getClientCorrelationId(), spiedFs.getFileSystemId(), FSOperationType.LISTSTATUS, true, TracingHeaderFormat.ALL_ID_FORMAT, null)); Mockito.doReturn(spiedStore).when(spiedFs).getAbfsStore(); - spiedStore.setClient(spiedClient); + Mockito.doReturn(spiedClient).when(spiedStore).getClient(); spiedFs.setWorkingDirectory(new Path("/")); - AbfsClientTestUtil.setMockAbfsRestOperationForListPathOperation(spiedClient, + AbfsClientTestUtil.setMockAbfsRestOperationForListOperation(spiedClient, (httpOperation) -> { - - ListResultEntrySchema entry = new DfsListResultEntrySchema() - .withName("a") - .withIsDirectory(true); - List paths = new ArrayList<>(); - paths.add(entry); - paths.clear(); - entry = new DfsListResultEntrySchema() - .withName("abc.txt") - .withIsDirectory(false); - paths.add(entry); - ListResultSchema schema1 = new DfsListResultSchema().withPaths(paths); - ListResultSchema schema2 = new DfsListResultSchema().withPaths(paths); - - when(httpOperation.getListResultSchema()).thenReturn(schema1) - .thenReturn(schema2); - when(httpOperation.getResponseHeader( - HttpHeaderConfigurations.X_MS_CONTINUATION)) - .thenReturn(TEST_CONTINUATION_TOKEN) - .thenReturn(EMPTY_STRING); - Stubber stubber = Mockito.doThrow( new SocketTimeoutException(CONNECTION_TIMEOUT_JDK_MESSAGE)); stubber.doNothing().when(httpOperation).processResponse( diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index c996133190af87..468ad1e6ce82db 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -45,8 +45,10 @@ import static java.net.HttpURLConnection.HTTP_OK; import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.TEST_CONTINUATION_TOKEN; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_XML; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCKLIST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.CONTENT_LENGTH; @@ -76,7 +78,7 @@ private AbfsClientTestUtil() { } - public static void setMockAbfsRestOperationForListPathOperation( + public static void setMockAbfsRestOperationForListOperation( final AbfsClient spiedClient, FunctionRaisingIOE functionRaisingIOE) throws Exception { @@ -92,17 +94,30 @@ public static void setMockAbfsRestOperationForListPathOperation( new ArrayList<>(), spiedClient.getAbfsConfiguration() )); - ListResponseData listResponseData = Mockito.spy(new ListResponseData()); - listResponseData.setRenamePendingJsonPaths(null); - listResponseData.setOp(abfsRestOperation); - listResponseData.setFileStatusList(new ArrayList<>()); + ListResponseData listResponseData1 = Mockito.spy(new ListResponseData()); + listResponseData1.setRenamePendingJsonPaths(null); + listResponseData1.setOp(abfsRestOperation); + listResponseData1.setFileStatusList(new ArrayList<>()); + listResponseData1.setContinuationToken(TEST_CONTINUATION_TOKEN); + + ListResponseData listResponseData2 = Mockito.spy(new ListResponseData()); + listResponseData2.setRenamePendingJsonPaths(null); + listResponseData2.setOp(abfsRestOperation); + listResponseData2.setFileStatusList(new ArrayList<>()); + listResponseData2.setContinuationToken(EMPTY_STRING); Mockito.doReturn(abfsRestOperation).when(spiedClient).getAbfsRestOperation( eq(AbfsRestOperationType.ListPaths), any(), any(), any()); + Mockito.doReturn(abfsRestOperation).when(spiedClient).getAbfsRestOperation( + eq(AbfsRestOperationType.ListBlobs), any(), any(), any()); - addGeneralMockBehaviourToAbfsClient(spiedClient, exponentialRetryPolicy, staticRetryPolicy, intercept, listResponseData); + addGeneralMockBehaviourToAbfsClient(spiedClient, exponentialRetryPolicy, staticRetryPolicy, intercept, listResponseData1); addGeneralMockBehaviourToRestOpAndHttpOp(abfsRestOperation, httpOperation); + Mockito.doReturn(listResponseData1).doReturn(listResponseData2) + .when(spiedClient) + .parseListPathResults(any(), any()); + functionRaisingIOE.apply(httpOperation); } From 38f7269db6a17ec1631c8554ed64f8d7bc026794 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Tue, 25 Mar 2025 23:36:10 -0700 Subject: [PATCH 16/19] Improved logging --- .../apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java | 4 ++++ .../org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java | 3 +++ .../fs/azurebfs/ITestAzureBlobFileSystemListStatus.java | 4 ++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index c10755b06f922f..646fedb722a3e0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -402,6 +402,7 @@ public ListResponseData listPath(final String relativePath, final boolean recurs } AbfsRestOperation pathStatus = this.getPathStatus(relativePath, tracingContext, null, false); BlobListResultSchema listResultSchema = getListResultSchemaFromPathStatus(relativePath, pathStatus); + LOG.debug("ListBlob attempted on a file path. Returning file status."); List fileStatusList = new ArrayList<>(); Map renamePendingJsonPaths = new HashMap<>(); for (BlobListResultEntrySchema entry : listResultSchema.paths()) { @@ -1621,6 +1622,9 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) saxParser.parse(stream, new BlobListXmlParser(listResultSchema, getBaseUrl().toString())); result.setListResultSchema(listResultSchema); + LOG.debug("ListBlobs listed {} blobs with {} as continuation token", + listResultSchema.paths().size(), + listResultSchema.getNextMarker()); } catch (SAXException | IOException e) { throw new AbfsDriverException(e); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 46d518eeb4eb6f..41f03faebbc5f0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -1486,6 +1486,9 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) listResultSchema = objectMapper.readValue(listResultInputStream, DfsListResultSchema.class); result.setListResultSchema(listResultSchema); + LOG.debug("ListPath listed {} paths with {} as continuation token", + listResultSchema.paths().size(), + getContinuationFromResponse(result)); } catch (IOException ex) { throw new AbfsDriverException(ex); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index b9e4944aad306f..9ce4b72b9c72ca 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -77,7 +77,7 @@ */ public class ITestAzureBlobFileSystemListStatus extends AbstractAbfsIntegrationTest { - private static final int TEST_FILES_NUMBER = 6000; + private static final int TEST_FILES_NUMBER = 60; public static final String TEST_CONTINUATION_TOKEN = "continuation"; public ITestAzureBlobFileSystemListStatus() throws Exception { @@ -87,7 +87,7 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); - config.set(AZURE_LIST_MAX_RESULTS, "5000"); + config.set(AZURE_LIST_MAX_RESULTS, "50"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); From 4e19d7bab162a614fbc9e4c2ecd1e234e5034a9d Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Tue, 25 Mar 2025 23:47:13 -0700 Subject: [PATCH 17/19] Logging on DFS Listing --- .../apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java | 8 -------- .../fs/azurebfs/ITestAzureBlobFileSystemListStatus.java | 4 ++-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 41f03faebbc5f0..3a8c264850f9c6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -1493,14 +1493,6 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throw new AbfsDriverException(ex); } - if (listResultSchema == null) { - throw new AbfsRestOperationException( - AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), - AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), - "listStatusAsync path not found", - null); - } - List fileStatuses = new ArrayList<>(); for (DfsListResultEntrySchema entry : listResultSchema.paths()) { fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 9ce4b72b9c72ca..b9e4944aad306f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -77,7 +77,7 @@ */ public class ITestAzureBlobFileSystemListStatus extends AbstractAbfsIntegrationTest { - private static final int TEST_FILES_NUMBER = 60; + private static final int TEST_FILES_NUMBER = 6000; public static final String TEST_CONTINUATION_TOKEN = "continuation"; public ITestAzureBlobFileSystemListStatus() throws Exception { @@ -87,7 +87,7 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); - config.set(AZURE_LIST_MAX_RESULTS, "50"); + config.set(AZURE_LIST_MAX_RESULTS, "5000"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); From e5a021a00bbcacd9e026eee61ebadf82badadeaa Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Tue, 25 Mar 2025 23:48:11 -0700 Subject: [PATCH 18/19] Unused Import --- .../fs/azurebfs/ITestAzureBlobFileSystemListStatus.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index b9e4944aad306f..744966c05fb3ba 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -29,7 +29,6 @@ import java.util.concurrent.Future; import org.assertj.core.api.Assertions; -import org.junit.Ignore; import org.junit.Test; import org.mockito.Mockito; import org.mockito.stubbing.Stubber; @@ -41,13 +40,8 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; -import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; -import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultEntrySchema; -import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; import org.apache.hadoop.fs.azurebfs.utils.DirectoryStateHelper; @@ -57,7 +51,6 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import static java.net.HttpURLConnection.HTTP_OK; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; From 972889915dbb4f9c53162838f56f6bbaeda2431e Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Wed, 26 Mar 2025 05:26:19 -0700 Subject: [PATCH 19/19] Addressing Comments --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 7 +- .../fs/azurebfs/services/AbfsBlobClient.java | 14 +-- .../fs/azurebfs/services/AbfsClient.java | 31 ++++--- .../fs/azurebfs/services/AbfsDfsClient.java | 4 +- .../azurebfs/services/AbfsHttpOperation.java | 4 + .../services/ListResponseData.java | 3 +- .../services/VersionedFileStatus.java | 12 +++ .../ITestAzureBlobFileSystemListStatus.java | 89 +++++++++++++------ .../ITestAzureBlobFileSystemRename.java | 4 +- .../azurebfs/services/AbfsClientTestUtil.java | 1 - .../TestAbfsRestOperationMockFailures.java | 1 - .../services/TestListActionTaker.java | 1 - 12 files changed, 107 insertions(+), 64 deletions(-) rename hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/{contracts => }/services/ListResponseData.java (96%) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index a4438ac118d093..eef6a8108d5ce5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -76,7 +76,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; +import org.apache.hadoop.fs.azurebfs.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.enums.Trilean; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper; @@ -1282,8 +1282,9 @@ public String listStatus(final Path path, final String startFrom, perfInfo.registerResult(op.getResult()); continuation = listResponseData.getContinuationToken(); List fileStatusListInCurrItr = listResponseData.getFileStatusList(); - fileStatuses.addAll(fileStatusListInCurrItr); - + if (fileStatusListInCurrItr != null && !fileStatusListInCurrItr.isEmpty()) { + fileStatuses.addAll(fileStatusListInCurrItr); + } perfInfo.registerSuccess(true); countAggregate++; shouldContinue = diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 646fedb722a3e0..8166cfb60df056 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -74,7 +74,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListXmlParser; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; @@ -404,13 +403,8 @@ public ListResponseData listPath(final String relativePath, final boolean recurs BlobListResultSchema listResultSchema = getListResultSchemaFromPathStatus(relativePath, pathStatus); LOG.debug("ListBlob attempted on a file path. Returning file status."); List fileStatusList = new ArrayList<>(); - Map renamePendingJsonPaths = new HashMap<>(); for (BlobListResultEntrySchema entry : listResultSchema.paths()) { - if (isRenamePendingJsonPathEntry(entry)) { - renamePendingJsonPaths.put(entry.path(), entry.contentLength().intValue()); - } else { - fileStatusList.add(getVersionedFileStatusFromEntry(entry, uri)); - } + fileStatusList.add(getVersionedFileStatusFromEntry(entry, uri)); } AbfsRestOperation listOp = getAbfsRestOperation( AbfsRestOperationType.ListBlobs, @@ -420,7 +414,7 @@ public ListResponseData listPath(final String relativePath, final boolean recurs listOp.hardSetGetListStatusResult(HTTP_OK, listResultSchema); listResponseData.setFileStatusList(fileStatusList); listResponseData.setContinuationToken(null); - listResponseData.setRenamePendingJsonPaths(renamePendingJsonPaths); + listResponseData.setRenamePendingJsonPaths(null); listResponseData.setOp(listOp); return listResponseData; } @@ -1629,14 +1623,14 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throw new AbfsDriverException(e); } } catch (IOException e) { - LOG.error("Unable to deserialize list results for uri {}", uri, e); + LOG.error("Unable to deserialize list results for uri {}", uri.toString(), e); throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, e); } try { return filterDuplicateEntriesAndRenamePendingFiles(listResultSchema, uri); } catch (IOException e) { - LOG.error("Unable to filter list results for uri {}", uri, e); + LOG.error("Unable to filter list results for uri {}", uri.toString(), e); throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, e); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 25cb68cb2838f3..dd12aa3d0f9387 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -70,7 +70,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; @@ -303,10 +302,11 @@ private AbfsClient(final URL baseUrl, abfsConfiguration.getRawConfiguration().getClass(FS_AZURE_IDENTITY_TRANSFORM_CLASS, IdentityTransformer.class, IdentityTransformerInterface.class); try { - this.identityTransformer = - identityTransformerClass.getConstructor(Configuration.class).newInstance(abfsConfiguration.getRawConfiguration()); + this.identityTransformer = identityTransformerClass.getConstructor( + Configuration.class).newInstance(abfsConfiguration.getRawConfiguration()); } catch (IllegalAccessException | InstantiationException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException e) { + LOG.error("IdentityTransformer Init Falied", e); throw new IOException(e); } LOG.trace("IdentityTransformer init complete"); @@ -1773,23 +1773,29 @@ protected AbfsRestOperation getSuccessOp(final AbfsRestOperationType operationTy return successOp; } + /** + * Get the primary user group name. + * @return primary user group name + * @throws AzureBlobFileSystemException if unable to get the primary user group + */ private String getPrimaryUserGroup() throws AzureBlobFileSystemException { - String primaryUserGroup; if (!getAbfsConfiguration().getSkipUserGroupMetadataDuringInitialization()) { try { - primaryUserGroup = UserGroupInformation.getCurrentUser().getPrimaryGroupName(); + return UserGroupInformation.getCurrentUser().getPrimaryGroupName(); } catch (IOException ex) { LOG.error("Failed to get primary group for {}, using user name as primary group name", getPrimaryUser()); - primaryUserGroup = getPrimaryUser(); } - } else { - //Provide a default group name - primaryUserGroup = getPrimaryUser(); } - return primaryUserGroup; + //Provide a default group name + return getPrimaryUser(); } + /** + * Get the primary username. + * @return primary username + * @throws AzureBlobFileSystemException if unable to get the primary user + */ private String getPrimaryUser() throws AzureBlobFileSystemException { try { return UserGroupInformation.getCurrentUser().getUserName(); @@ -1808,16 +1814,13 @@ private String getPrimaryUser() throws AzureBlobFileSystemException { protected VersionedFileStatus getVersionedFileStatusFromEntry( ListResultEntrySchema entry, URI uri) throws AzureBlobFileSystemException { long blockSize = abfsConfiguration.getAzureBlockSize(); - final String owner, group; + String owner = null, group = null; try{ if (identityTransformer != null) { owner = identityTransformer.transformIdentityForGetRequest( entry.owner(), true, getPrimaryUser()); group = identityTransformer.transformIdentityForGetRequest( entry.group(), false, getPrimaryUserGroup()); - } else { - owner = null; - group = null; } } catch (IOException ex) { LOG.error("Failed to get owner/group for path {}", entry.name(), ex); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 3a8c264850f9c6..ef4194179dcb0a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -61,10 +61,8 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidAbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidFileSystemPropertyException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; -import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.DfsListResultSchema; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; @@ -1504,7 +1502,7 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) getContinuationFromResponse(result)); return listResponseData; } catch (IOException ex) { - LOG.error("Unable to deserialize list results", ex); + LOG.error("Unable to deserialize list results for Uri {}", uri.toString(), ex); throw new AbfsDriverException(ERR_DFS_LIST_PARSING, ex); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 4ff4bdac4a169f..fc35aecd587741 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -660,6 +660,10 @@ protected boolean isConnectionDisconnectedOnError() { return connectionDisconnectedOnError; } + /** + * Sets the list result schema after parsing done on Client. + * @param listResultSchema ListResultSchema + */ protected void setListResultSchema(final ListResultSchema listResultSchema) { this.listResultSchema = listResultSchema; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResponseData.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListResponseData.java similarity index 96% rename from hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResponseData.java rename to hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListResponseData.java index e009025b262d89..28f10226d95e9d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/ListResponseData.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListResponseData.java @@ -16,14 +16,13 @@ * limitations under the License. */ -package org.apache.hadoop.fs.azurebfs.contracts.services; +package org.apache.hadoop.fs.azurebfs.services; import java.util.List; import java.util.Map; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; /** * This class is used to hold the response data for list operations. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java index 42a15a99305165..2db6e9dd83e8c3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/VersionedFileStatus.java @@ -106,15 +106,27 @@ public String getVersion() { return this.version; } + /** + * Returns the etag of this FileStatus. + * @return a string value for the FileStatus etag. + */ @Override public String getEtag() { return getVersion(); } + /** + * Returns the encryption context of this FileStatus + * @return a string value for the FileStatus encryption context + */ public String getEncryptionContext() { return encryptionContext; } + /** + * Returns a string representation of the object. + * @return a string representation of the object + */ @Override public String toString() { final StringBuilder sb = new StringBuilder( diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 744966c05fb3ba..1c4ef6d4212727 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -41,7 +41,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; +import org.apache.hadoop.fs.azurebfs.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; import org.apache.hadoop.fs.azurebfs.utils.DirectoryStateHelper; @@ -53,6 +53,7 @@ import static java.net.HttpURLConnection.HTTP_OK; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS; +import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; @@ -340,7 +341,8 @@ public void testListStatusWithImplicitExplicitChildren() throws Exception { // Assert that implicit directory is returned FileStatus[] fileStatuses = fs.listStatus(root); - Assertions.assertThat(fileStatuses.length).isEqualTo(1); + Assertions.assertThat(fileStatuses.length) + .describedAs("List size is not expected").isEqualTo(1); assertImplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); // Create a marker blob for the directory. @@ -348,7 +350,8 @@ public void testListStatusWithImplicitExplicitChildren() throws Exception { // Assert that only one entry of explicit directory is returned fileStatuses = fs.listStatus(root); - Assertions.assertThat(fileStatuses.length).isEqualTo(1); + Assertions.assertThat(fileStatuses.length) + .describedAs("List size is not expected").isEqualTo(1); assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); // Create a file under root @@ -357,9 +360,10 @@ public void testListStatusWithImplicitExplicitChildren() throws Exception { // Assert that two entries are returned in alphabetic order. fileStatuses = fs.listStatus(root); - Assertions.assertThat(fileStatuses.length).isEqualTo(2); + Assertions.assertThat(fileStatuses.length) + .describedAs("List size is not expected").isEqualTo(2); assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); - assertFileFileStatus(fileStatuses[1], fs.makeQualified(file1)); + assertFilePathFileStatus(fileStatuses[1], fs.makeQualified(file1)); // Create another implicit directory under root. Path dir2 = new Path("c"); @@ -367,9 +371,10 @@ public void testListStatusWithImplicitExplicitChildren() throws Exception { // Assert that three entries are returned in alphabetic order. fileStatuses = fs.listStatus(root); - Assertions.assertThat(fileStatuses.length).isEqualTo(3); + Assertions.assertThat(fileStatuses.length) + .describedAs("List size is not expected").isEqualTo(3); assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir)); - assertFileFileStatus(fileStatuses[1], fs.makeQualified(file1)); + assertFilePathFileStatus(fileStatuses[1], fs.makeQualified(file1)); assertImplicitDirectoryFileStatus(fileStatuses[2], fs.makeQualified(dir2)); } @@ -384,12 +389,14 @@ public void testListStatusOnImplicitDirectoryPath() throws Exception { createAzCopyFolder(implicitPath); FileStatus[] statuses = fs.listStatus(implicitPath); - Assertions.assertThat(statuses.length).isGreaterThanOrEqualTo(1); + Assertions.assertThat(statuses.length) + .describedAs("List size is not expected").isGreaterThanOrEqualTo(1); assertImplicitDirectoryFileStatus(statuses[0], fs.makeQualified(statuses[0].getPath())); FileStatus[] statuses1 = fs.listStatus(new Path(statuses[0].getPath().toString())); - Assertions.assertThat(statuses1.length).isGreaterThanOrEqualTo(1); - assertFileFileStatus(statuses1[0], fs.makeQualified(statuses1[0].getPath())); + Assertions.assertThat(statuses1.length) + .describedAs("List size is not expected").isGreaterThanOrEqualTo(1); + assertFilePathFileStatus(statuses1[0], fs.makeQualified(statuses1[0].getPath())); } @Test @@ -399,7 +406,20 @@ public void testListStatusOnEmptyDirectory() throws Exception { fs.mkdirs(emptyDir); FileStatus[] statuses = fs.listStatus(emptyDir); - Assertions.assertThat(statuses.length).isEqualTo(0); + Assertions.assertThat(statuses.length) + .describedAs("List size is not expected").isEqualTo(0); + } + + @Test + public void testListStatusOnRenamePendingJsonFile() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path renamePendingJsonPath = new Path("/hbase/A/A-" + SUFFIX); + fs.create(renamePendingJsonPath); + + FileStatus[] statuses = fs.listStatus(renamePendingJsonPath); + Assertions.assertThat(statuses.length) + .describedAs("List size is not expected").isEqualTo(1); + assertFilePathFileStatus(statuses[0], fs.makeQualified(statuses[0].getPath())); } @Test @@ -416,15 +436,19 @@ public void testContinuationTokenAcrossListStatus() throws Exception { "/testContinuationToken", false, 1, null, getTestTracingContext(fs, true), fs.getAbfsStore().getUri()); - Assertions.assertThat(listResponseData.getContinuationToken()).isNotNull(); - Assertions.assertThat(listResponseData.getFileStatusList()).hasSize(1); + Assertions.assertThat(listResponseData.getContinuationToken()) + .describedAs("Continuation Token Should not be null").isNotNull(); + Assertions.assertThat(listResponseData.getFileStatusList()) + .describedAs("Listing Size Not as expected").hasSize(1); ListResponseData listResponseData1 = fs.getAbfsStore().getClient().listPath( "/testContinuationToken", false, 1, listResponseData.getContinuationToken(), getTestTracingContext(fs, true), fs.getAbfsStore().getUri()); - Assertions.assertThat(listResponseData1.getContinuationToken()).isNull(); - Assertions.assertThat(listResponseData1.getFileStatusList()).hasSize(1); + Assertions.assertThat(listResponseData1.getContinuationToken()) + .describedAs("Continuation Token Should be null").isNull(); + Assertions.assertThat(listResponseData1.getFileStatusList()) + .describedAs("Listing Size Not as expected").hasSize(1); } @Test @@ -454,15 +478,20 @@ public void testEmptyContinuationToken() throws Exception { "/testInvalidContinuationToken", false, 1, "", getTestTracingContext(fs, true), fs.getAbfsStore().getUri()); - Assertions.assertThat(listResponseData.getContinuationToken()).isNotNull(); - Assertions.assertThat(listResponseData.getFileStatusList()).hasSize(1); + Assertions.assertThat(listResponseData.getContinuationToken()) + .describedAs("Continuation Token Should Not be null").isNotNull(); + Assertions.assertThat(listResponseData.getFileStatusList()) + .describedAs("Listing Size Not as expected").hasSize(1); } - private void assertFileFileStatus(final FileStatus fileStatus, + private void assertFilePathFileStatus(final FileStatus fileStatus, final Path qualifiedPath) { - Assertions.assertThat(fileStatus.getPath()).isEqualTo(qualifiedPath); - Assertions.assertThat(fileStatus.isFile()).isEqualTo(true); - Assertions.assertThat(fileStatus.isDirectory()).isEqualTo(false); + Assertions.assertThat(fileStatus.getPath()) + .describedAs("Path Not as expected").isEqualTo(qualifiedPath); + Assertions.assertThat(fileStatus.isFile()) + .describedAs("Expecting a File Path").isEqualTo(true); + Assertions.assertThat(fileStatus.isDirectory()) + .describedAs("Expecting a File Path").isEqualTo(false); Assertions.assertThat(fileStatus.getModificationTime()).isNotEqualTo(0); } @@ -471,7 +500,8 @@ private void assertImplicitDirectoryFileStatus(final FileStatus fileStatus, assertDirectoryFileStatus(fileStatus, qualifiedPath); DirectoryStateHelper.isImplicitDirectory(qualifiedPath, getFileSystem(), getTestTracingContext(getFileSystem(), true)); - Assertions.assertThat(fileStatus.getModificationTime()).isEqualTo(0); + Assertions.assertThat(fileStatus.getModificationTime()) + .describedAs("Last Modified Time Not as Expected").isEqualTo(0); } private void assertExplicitDirectoryFileStatus(final FileStatus fileStatus, @@ -479,14 +509,19 @@ private void assertExplicitDirectoryFileStatus(final FileStatus fileStatus, assertDirectoryFileStatus(fileStatus, qualifiedPath); DirectoryStateHelper.isExplicitDirectory(qualifiedPath, getFileSystem(), getTestTracingContext(getFileSystem(), true)); - Assertions.assertThat(fileStatus.getModificationTime()).isNotEqualTo(0); + Assertions.assertThat(fileStatus.getModificationTime()) + .describedAs("Last Modified Time Not as Expected").isNotEqualTo(0); } private void assertDirectoryFileStatus(final FileStatus fileStatus, final Path qualifiedPath) { - Assertions.assertThat(fileStatus.getPath()).isEqualTo(qualifiedPath); - Assertions.assertThat(fileStatus.isDirectory()).isEqualTo(true); - Assertions.assertThat(fileStatus.isFile()).isEqualTo(false); - Assertions.assertThat(fileStatus.getLen()).isEqualTo(0); + Assertions.assertThat(fileStatus.getPath()) + .describedAs("Path Not as Expected").isEqualTo(qualifiedPath); + Assertions.assertThat(fileStatus.isDirectory()) + .describedAs("Expecting a Directory Path").isEqualTo(true); + Assertions.assertThat(fileStatus.isFile()) + .describedAs("Expecting a Directory Path").isEqualTo(false); + Assertions.assertThat(fileStatus.getLen()) + .describedAs("Content Length Not as Expected").isEqualTo(0); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index dd105995ee2af6..fa9b63c0ef97a1 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -1786,7 +1786,7 @@ private void validateRename(AzureBlobFileSystem fs, Path src, Path dst, .describedAs("Renamed Pending Json file should exist.") .isEqualTo(isJsonExist); Assertions.assertThat(fs.exists(src)) - .describedAs("Renamed Source directory should exist.") + .describedAs("Renamed Source directory should not exist.") .isEqualTo(isSrcExist); Assertions.assertThat(fs.exists(dst)) .describedAs("Renamed Destination directory should exist.") @@ -1794,7 +1794,7 @@ private void validateRename(AzureBlobFileSystem fs, Path src, Path dst, } /** - * Test the renaming of a directory with different parallelism configurations.c + * Test the renaming of a directory with different parallelism configurations. */ @Test public void testRenameDirWithDifferentParallelismConfig() throws Exception { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index 468ad1e6ce82db..a6652fe85f1403 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -39,7 +39,6 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.util.functional.FunctionRaisingIOE; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java index 8252143ff978ec..4aaa53003d48b8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java @@ -30,7 +30,6 @@ import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java index 1fb6bcc51b258c..f67ba45360c2ab 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestListActionTaker.java @@ -32,7 +32,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResponseData; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.utils.TracingContext;