Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5d58750
Spec: Fix REST pagination requirements based on new feedback (#9917)
rahil-c Mar 22, 2024
33838d5
docs: Add links checker (#9965)
Fokko Mar 23, 2024
857590f
Build: Bump mkdocs-material from 9.5.14 to 9.5.15 (#10031)
dependabot[bot] Mar 24, 2024
8311f05
Docs: Fix link to blog post (#10028)
AlexMercedCoder Mar 25, 2024
49a6634
Core: Migrate tests to JUnit5 (#10027)
tomtongue Mar 25, 2024
83bacf5
Add Iceberg version to UserAgent in S3 requests (#9963)
CsengerG Mar 25, 2024
602186b
Core, Spark: Fix handling of null binary values when sorting with zor…
amogh-jahagirdar Mar 25, 2024
817a5e1
Hive: Extract common code to be re-used for View support (#10001)
nk1506 Mar 26, 2024
2eabd52
Hive: Add test to make sure iceberg table with same name as hive tabl…
nk1506 Mar 26, 2024
b6cbb52
Build: Bump Spark from 3.5 to 3.5.1 (#9832)
manuzhang Mar 26, 2024
4579b7a
Spark: Fail on recursive cycle in view (#9834)
nastra Mar 27, 2024
fa80c85
Build: disable link-check for existing medium blog posts (#10042)
manuzhang Mar 27, 2024
9987314
Spark 3.4: Fail on recursive cycle in view (#10048)
nastra Mar 27, 2024
371a6b7
Build: Bump org.xerial:sqlite-jdbc from 3.45.1.0 to 3.45.2.0 (#9974)
dependabot[bot] Mar 27, 2024
baaedc6
Build: Bump io.netty:netty-buffer from 4.1.107.Final to 4.1.108.Final…
dependabot[bot] Mar 27, 2024
003cd94
Build: Bump arrow from 15.0.1 to 15.0.2 (#10034)
dependabot[bot] Mar 27, 2024
15e2a16
Build: Bump kafka from 3.6.1 to 3.7.0 (#9855)
dependabot[bot] Mar 27, 2024
4de819e
Build: Bump orc from 1.9.2 to 1.9.3 (#10033)
dependabot[bot] Mar 27, 2024
66a0954
Build: Bump com.azure:azure-sdk-bom from 1.2.20 to 1.2.21 (#9857)
dependabot[bot] Mar 27, 2024
bd46035
Build: Bump com.esotericsoftware:kryo from 4.0.2 to 4.0.3 (#9984)
dependabot[bot] Mar 27, 2024
81b62c7
Flink: implement range partitioner for map data statistics (#9321)
stevenzwu Mar 27, 2024
8e6c08e
Build: Bump software.amazon.awssdk:bom from 2.24.5 to 2.25.18 (#10050)
dependabot[bot] Mar 28, 2024
783158a
CI: Run Markdown links checker only when `{docs,site}/**` changes (#1…
Fokko Mar 28, 2024
4eef2fe
Core, Data: Migrate tests to JUnit5 (#10039)
tomtongue Mar 28, 2024
2d76c91
Build: disable link-check for all medium blog posts (#10057)
manuzhang Mar 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/docs-check-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

name: Check Markdown docs links

on:
push:
paths:
- docs/**
- site/**
branches:
- 'main'
pull_request:
paths:
- docs/**
- site/**
workflow_dispatch:

jobs:
markdown-link-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: gaurav-nelson/github-action-markdown-link-check@v1
with:
config-file: 'site/link-checker-config.json'
use-verbose-mode: yes
7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
- under the License.
-->

![Iceberg](https://iceberg.apache.org/docs/latest/img/Iceberg-logo.png)
![Iceberg](https://iceberg.apache.org/assets/images/Iceberg-logo.svg)

[![](https://github.com/apache/iceberg/actions/workflows/java-ci.yml/badge.svg)](https://github.com/apache/iceberg/actions/workflows/java-ci.yml)
[![Slack](https://img.shields.io/badge/chat-on%20Slack-brightgreen.svg)](https://apache-iceberg.slack.com/)
Expand All @@ -37,11 +37,8 @@ The core Java library is located in this repository and is the reference impleme

[Documentation][iceberg-docs] is available for all libraries and integrations.

Current work is tracked in the [roadmap][roadmap].

[iceberg-docs]: https://iceberg.apache.org/docs/latest/
[iceberg-spec]: https://iceberg.apache.org/spec
[roadmap]: https://iceberg.apache.org/roadmap/
[iceberg-spec]: https://iceberg.apache.org/spec/

## Collaboration

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ public S3Client s3() {
b -> s3FileIOProperties.applyCredentialConfigurations(awsClientProperties, b))
.applyMutation(s3FileIOProperties::applySignerConfiguration)
.applyMutation(s3FileIOProperties::applyS3AccessGrantsConfigurations)
.applyMutation(s3FileIOProperties::applyUserAgentConfigurations)
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public S3Client s3() {
awsClientProperties, s3ClientBuilder))
.applyMutation(s3FileIOProperties::applySignerConfiguration)
.applyMutation(s3FileIOProperties::applyS3AccessGrantsConfigurations)
.applyMutation(s3FileIOProperties::applyUserAgentConfigurations)
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.iceberg.EnvironmentContext;
import org.apache.iceberg.aws.AwsClientProperties;
import org.apache.iceberg.aws.glue.GlueCatalog;
import org.apache.iceberg.aws.s3.signer.S3V4RestSignerClient;
Expand Down Expand Up @@ -375,6 +376,14 @@ public class S3FileIOProperties implements Serializable {

public static final boolean PRELOAD_CLIENT_ENABLED_DEFAULT = false;

/**
* User Agent Prefix set by the S3 client.
*
* <p>This allows developers to monitor which version of Iceberg they have deployed to a cluster
* (for example, through the S3 Access Logs, which contain the user agent field).
*/
private static final String S3_FILE_IO_USER_AGENT = "s3fileio/" + EnvironmentContext.get();

private String sseType;
private String sseKey;
private String sseMd5;
Expand Down Expand Up @@ -819,6 +828,11 @@ public <T extends S3ClientBuilder> void applyS3AccessGrantsConfigurations(T buil
}
}

public <T extends S3ClientBuilder> void applyUserAgentConfigurations(T builder) {
builder.overrideConfiguration(
c -> c.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, S3_FILE_IO_USER_AGENT));
}

/**
* Dynamically load the http client builder to avoid runtime deps requirements of any optional SDK
* Plugins
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,4 +478,14 @@ public void testApplyEndpointConfiguration() {
s3FileIOProperties.applyEndpointConfigurations(mockS3ClientBuilder);
Mockito.verify(mockS3ClientBuilder).endpointOverride(Mockito.any(URI.class));
}

@Test
public void testApplyUserAgentConfigurations() {
Map<String, String> properties = Maps.newHashMap();
S3FileIOProperties s3FileIOProperties = new S3FileIOProperties(properties);
S3ClientBuilder mockS3ClientBuilder = Mockito.mock(S3ClientBuilder.class);
s3FileIOProperties.applyUserAgentConfigurations(mockS3ClientBuilder);

Mockito.verify(mockS3ClientBuilder).overrideConfiguration(Mockito.any(Consumer.class));
}
}
21 changes: 1 addition & 20 deletions core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -285,26 +285,7 @@ private Map<String, String> tableOverrideProperties() {
}

protected static String fullTableName(String catalogName, TableIdentifier identifier) {
StringBuilder sb = new StringBuilder();

if (catalogName.contains("/") || catalogName.contains(":")) {
// use / for URI-like names: thrift://host:port/db.table
sb.append(catalogName);
if (!catalogName.endsWith("/")) {
sb.append("/");
}
} else {
// use . for non-URI named catalogs: prod.db.table
sb.append(catalogName).append(".");
}

for (String level : identifier.namespace().levels()) {
sb.append(level).append(".");
}

sb.append(identifier.name());

return sb.toString();
return CatalogUtil.fullTableName(catalogName, identifier);
}

protected MetricsReporter metricsReporter() {
Expand Down
118 changes: 118 additions & 0 deletions core/src/main/java/org/apache/iceberg/BaseMetastoreOperations.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import static org.apache.iceberg.TableProperties.COMMIT_NUM_STATUS_CHECKS;
import static org.apache.iceberg.TableProperties.COMMIT_NUM_STATUS_CHECKS_DEFAULT;
import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_MAX_WAIT_MS;
import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_MAX_WAIT_MS_DEFAULT;
import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_MIN_WAIT_MS;
import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_MIN_WAIT_MS_DEFAULT;
import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS;
import static org.apache.iceberg.TableProperties.COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS_DEFAULT;

import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Supplier;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.Tasks;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class BaseMetastoreOperations {
private static final Logger LOG = LoggerFactory.getLogger(BaseMetastoreOperations.class);

public enum CommitStatus {
FAILURE,
SUCCESS,
UNKNOWN
}

/**
* Attempt to load the content and see if any current or past metadata location matches the one we
* were attempting to set. This is used as a last resort when we are dealing with exceptions that
* may indicate the commit has failed but don't have proof that this is the case. Note that all
* the previous locations must also be searched on the chance that a second committer was able to
* successfully commit on top of our commit.
*
* @param tableOrViewName full name of the Table/View
* @param newMetadataLocation the path of the new commit file
* @param properties properties for retry
* @param commitStatusSupplier check if the latest metadata presents or not using metadata
* location for table.
* @return Commit Status of Success, Failure or Unknown
*/
protected CommitStatus checkCommitStatus(
String tableOrViewName,
String newMetadataLocation,
Map<String, String> properties,
Supplier<Boolean> commitStatusSupplier) {
int maxAttempts =
PropertyUtil.propertyAsInt(
properties, COMMIT_NUM_STATUS_CHECKS, COMMIT_NUM_STATUS_CHECKS_DEFAULT);
long minWaitMs =
PropertyUtil.propertyAsLong(
properties, COMMIT_STATUS_CHECKS_MIN_WAIT_MS, COMMIT_STATUS_CHECKS_MIN_WAIT_MS_DEFAULT);
long maxWaitMs =
PropertyUtil.propertyAsLong(
properties, COMMIT_STATUS_CHECKS_MAX_WAIT_MS, COMMIT_STATUS_CHECKS_MAX_WAIT_MS_DEFAULT);
long totalRetryMs =
PropertyUtil.propertyAsLong(
properties,
COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS,
COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS_DEFAULT);

AtomicReference<CommitStatus> status = new AtomicReference<>(CommitStatus.UNKNOWN);

Tasks.foreach(newMetadataLocation)
.retry(maxAttempts)
.suppressFailureWhenFinished()
.exponentialBackoff(minWaitMs, maxWaitMs, totalRetryMs, 2.0)
.onFailure(
(location, checkException) ->
LOG.error("Cannot check if commit to {} exists.", tableOrViewName, checkException))
.run(
location -> {
boolean commitSuccess = commitStatusSupplier.get();

if (commitSuccess) {
LOG.info(
"Commit status check: Commit to {} of {} succeeded",
tableOrViewName,
newMetadataLocation);
status.set(CommitStatus.SUCCESS);
} else {
LOG.warn(
"Commit status check: Commit to {} of {} unknown, new metadata location is not current "
+ "or in history",
tableOrViewName,
newMetadataLocation);
}
});

if (status.get() == CommitStatus.UNKNOWN) {
LOG.error(
"Cannot determine commit state to {}. Failed during checking {} times. "
+ "Treating commit state as unknown.",
tableOrViewName,
maxAttempts);
}
return status.get();
}
}
Loading