Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
164 commits
Select commit Hold shift + click to select a range
319a482
Docs: Add 5 dremio blogs (#10067)
AlexMercedCoder Apr 3, 2024
356c6cd
REST: Fix spurious warning when shutting down refresh executor (#10087)
adutra Apr 4, 2024
25c909b
API: Fix default FileIO#newInputFile ManifestFile, DataFile and Delet…
amogh-jahagirdar Apr 4, 2024
07246b1
Build: Bump org.testcontainers:testcontainers from 1.19.5 to 1.19.7 (…
dependabot[bot] Apr 4, 2024
ba1cd36
OpenAPI: Fix additionalProperties for SnapshotSummary (#9838)
haizhou-zhao Apr 4, 2024
fab5e18
Hive, JDBC: Avoid NPE on Throwables without error msg (#10082)
nk1506 Apr 5, 2024
00f46ac
Core: Introduce ConfigResponseParser (#9952)
nastra Apr 5, 2024
abf238a
Build: Bump mkdocs-material from 9.5.15 to 9.5.17 (#10092)
dependabot[bot] Apr 7, 2024
b3261d0
AWS: Migrate tests to JUnit5 (#10086)
tomtongue Apr 8, 2024
cd70739
Spec: Document support for binary in truncate transform (#10079)
TheNeuralBit Apr 8, 2024
a351e22
Docs: Add Upsolver to vendor list (#10096)
jasonf20 Apr 9, 2024
6f4e9c6
Docs: Update releases.md for Spark scala versions (#10104)
liko9 Apr 9, 2024
9bb86fa
Docs: Fix spacing/descriptions on Branching and Tagging DDL (#10091)
lawofcycles Apr 9, 2024
81bb0d4
Core: Add EnvironmentContext to commit summary (#9273)
manuzhang Apr 9, 2024
96793bf
docs: Fix links of `Get Started` and `Community` parts in footer (#10…
wayneguow Apr 10, 2024
528b9b3
Core: Allow configuring socket/connection timeout in HTTPClient (#10053)
harishch1998 Apr 11, 2024
0bc6dfa
Core: Extend HTTPClient Builder to allow setting a proxy server (#10052)
harishch1998 Apr 11, 2024
290a6a0
Spark 3.4: Fix system function pushdown in CoW row-level commands (#1…
aokolnychyi Apr 11, 2024
ce7c2c1
API, Core, Kafka, Spark: Reduce enum array allocation (#10126)
sullis Apr 12, 2024
2025e79
Spark: Test initialization improvements (#10131)
nastra Apr 12, 2024
1e66657
Spec: Make request bodies required (#10125)
westse Apr 12, 2024
81b3310
Spark 3.5: Support preserving schema nullability in CTAS and RTAS (#1…
zhongyujiang Apr 12, 2024
496b320
Flink, Spark: Replace Boolean.getBoolean() with Boolean.parseBoolean(…
nastra Apr 14, 2024
2400aa5
Build: Bump net.snowflake:snowflake-jdbc from 3.14.5 to 3.15.1 (#10095)
dependabot[bot] Apr 14, 2024
47825ff
Build: Bump software.amazon.awssdk:bom from 2.25.21 to 2.25.31 (#10138)
dependabot[bot] Apr 14, 2024
dd74dd2
Build: Bump org.springframework:spring-web from 5.3.33 to 5.3.34 (#10…
dependabot[bot] Apr 14, 2024
fb657b4
Spark: Simplify SparkSchemaUtil#schemaForTable (#10137)
amogh-jahagirdar Apr 14, 2024
943321e
Flink: Migrate tests to JUnit5 (#10130)
tomtongue Apr 15, 2024
e6a1a45
Core: Fix logging table name when scanning metadata table (#10141)
manuzhang Apr 15, 2024
d067677
AWS: Close underlying scheduler for DynamoDbLockManager (#10132)
regadas Apr 15, 2024
78e8204
Spark 3.5: Add threshold for failed commits in data rewrites (#9611)
manuzhang Apr 15, 2024
97c5700
Core: Fix JDBC Catalog table commit when migrating from schema V0 to …
jbonofre Apr 16, 2024
fc5b2b3
Core: Use 'delete' if RowDelta only has delete files (#10123)
nastra Apr 16, 2024
fbcd142
Flink: Move flink/v1.18 to flink/v1.19
rodmeneses Apr 16, 2024
f761d98
Flink: Recover flink/1.18 files from history
rodmeneses Apr 16, 2024
b3ebcf1
Flink: Refactoring code and properties to make Flink 1.19 to work
rodmeneses Apr 15, 2024
dd194b4
Flink: Removes Flink version 1.16 (#10154)
rodmeneses Apr 16, 2024
0a4e6e6
Docs: Updates flink versioning information in our docs (#10155)
rodmeneses Apr 17, 2024
c41c599
Flink: Backport Flink 1.18 JUnit5 migration to Flink 1.17 (#10163)
tomtongue Apr 17, 2024
928888b
OpenAPI: Renaming views should return 204 (#10166)
c-thiel Apr 17, 2024
228fc9b
Core: Fix namespace SQL statement using ESCAPE character that works w…
jbonofre Apr 17, 2024
8136463
Flink: Don't fail to serialize IcebergSourceSplit when there is too m…
javrasya Apr 18, 2024
1f8cad3
Flink: port #9464 to v1.17 and v1.19 (#10177)
elkhand Apr 18, 2024
efa14bf
Core: Improve size check in CatalogTests (#10182)
nastra Apr 19, 2024
ed2d041
Kafka-connect: Update iceberg.hadoop-conf-dir config description (#10…
ajantha-bhat Apr 19, 2024
3ed04c1
Build: Bump software.amazon.awssdk:bom from 2.25.31 to 2.25.35 (#10192)
dependabot[bot] Apr 21, 2024
e468d02
Build: Bump io.netty:netty-buffer from 4.1.108.Final to 4.1.109.Final…
dependabot[bot] Apr 21, 2024
9664940
Build: Bump org.roaringbitmap:RoaringBitmap from 1.0.5 to 1.0.6 (#10190)
dependabot[bot] Apr 21, 2024
2510ef8
Build: Bump mkdocs-material from 9.5.17 to 9.5.18 (#10189)
dependabot[bot] Apr 21, 2024
4261e18
Build: Bump gradle.plugin.io.morethan.jmhreport:gradle-jmh-report (#1…
dependabot[bot] Apr 21, 2024
a23021d
Core: Lazily compute & cache hashCode in CharSequenceWrapper (#10023)
nastra Apr 22, 2024
e3b78be
AWS: Make sure Signer + User Agent config are both applied (#10198)
nastra Apr 22, 2024
866021d
Hive: turn off the stats gathering when iceberg.hive.keep.stats is fa…
stargrey102 Apr 23, 2024
34e181b
Docs: Don't check links on Release page (#10212)
nastra Apr 24, 2024
bfe0daa
Docs: Use `svn mv` when releasing the binaries (#9926)
Fokko Apr 24, 2024
5326131
Infra: Add 1.5.1 to issue template (#10214)
amogh-jahagirdar Apr 24, 2024
0f11f54
Update site to 1.5.1 docs (#10218)
amogh-jahagirdar Apr 24, 2024
f460964
Core: Use 'delete' / 'append' if OverwriteFiles only deletes/appends …
nastra Apr 25, 2024
837a4aa
AWS: Fix TestGlueCatalogTable#testCreateTable (#10221)
Apr 25, 2024
10ffc60
Docs: Add 1.5.1 release notes (#10224)
amogh-jahagirdar Apr 25, 2024
5821efc
Spec: Clarify missing fields when writing (#8672)
Fokko Apr 26, 2024
c9f775b
Flink: Move ParquetReader to LogicalTypeAnnotationVisitor (#9719)
Fokko Apr 26, 2024
b7d3a7f
Flink: Fix bounded source state restore record duplication (#10208)
pvary Apr 26, 2024
c9eed43
REST: fix incorrect token refresh thread name (#10223)
adutra Apr 26, 2024
21c0ec4
Flink: Backport #10208 to v1.18 and v1.17 (#10230)
pvary Apr 26, 2024
646440a
Flink: Prevent setting endTag/endSnapshotId for streaming source (#10…
pvary Apr 26, 2024
1e35bf9
Flink: Backport #10207 to v1.18 and v1.17 (#10235)
pvary Apr 27, 2024
01bc864
Build: Bump nessie from 0.79.0 to 0.80.0 (#10237)
dependabot[bot] Apr 28, 2024
a0a6bcf
Build: Bump software.amazon.awssdk:bom from 2.25.35 to 2.25.40 (#10240)
dependabot[bot] Apr 29, 2024
9310bd4
Spark: Bump minor version for Spark-3.4 (#10243)
ajantha-bhat Apr 29, 2024
6016110
Build: Bump com.gorylenko.gradle-git-properties:gradle-git-properties…
dependabot[bot] Apr 29, 2024
a55797d
Build: Bump com.google.errorprone:error_prone_annotations (#10236)
dependabot[bot] Apr 29, 2024
426818b
Core: Add property to disable table initialization for JdbcCatalog (#…
mrcnc Apr 29, 2024
6f0d9dd
Flink: Migrate tests to JUnit5 (#10232)
tomtongue Apr 30, 2024
9626850
Release: add instruction to update doap.rdf file as part of release p…
jbonofre Apr 30, 2024
5aa0d3b
Add stale PRs management (#10134)
jbonofre Apr 30, 2024
e785aa7
Docs: Update doap.rdf (#10255)
ajantha-bhat Apr 30, 2024
e85884d
Build: Bump com.azure:azure-sdk-bom from 1.2.22 to 1.2.23 (#10238)
dependabot[bot] Apr 30, 2024
839f71c
Build: Bump mkdocs-material from 9.5.18 to 9.5.19 (#10241)
dependabot[bot] Apr 30, 2024
8396097
Build: Bump datamodel-code-generator from 0.25.5 to 0.25.6 (#10242)
dependabot[bot] Apr 30, 2024
1757577
Flink: Apply DeleteGranularity for writes (#10200)
pvary Apr 30, 2024
aeb2682
Hive: Remove deprecated `setSchema(TableMetadata, Map<String, String>…
Fokko May 1, 2024
0323308
Flink: Backport #10200 to v1.19 and v1.17 (#10259)
pvary May 1, 2024
7600ba7
Core: Add pagination when listing namespaces/tables/views (#9782)
rahil-c May 3, 2024
5106151
Docs: Update features for Hive 4.0 (#10162)
SourabhBadhya May 3, 2024
be305b2
Build: Bump org.xerial:sqlite-jdbc from 3.45.2.0 to 3.45.3.0 (#10194)
dependabot[bot] May 3, 2024
9cd5977
Build: Bump mkdocs-material from 9.5.19 to 9.5.21 (#10272)
dependabot[bot] May 5, 2024
ed84ea0
docs: Remove link to Flink unit test (#10160)
manuzhang May 6, 2024
2857d3a
Build: Bump nessie from 0.80.0 to 0.81.1 (#10267)
dependabot[bot] May 6, 2024
ed09592
MR: Fix using Date type as partition field (#10210)
lurnagao-dahua May 7, 2024
a5b85a7
Docs: Update site to 1.5.2 docs (#10291)
amogh-jahagirdar May 9, 2024
5d3d647
Build: Bump software.amazon.awssdk:bom from 2.25.40 to 2.25.45 (#10266)
dependabot[bot] May 9, 2024
e6586e9
Infra: Add Iceberg 1.5.2 to issue template (#10296)
amogh-jahagirdar May 9, 2024
b187b17
Update doap.rdf for 1.5.2 release (#10297)
amogh-jahagirdar May 9, 2024
e10098b
Docs: Add release notes for 1.5.2 (#10295)
amogh-jahagirdar May 9, 2024
3c8e046
Spec: Fix markdown for struct evolution default value rules (#10290)
sfc-gh-dmetzgar May 10, 2024
2b21020
Core: Retry connections in JDBC catalog with user configured error co…
amogh-jahagirdar May 10, 2024
e484f0d
Build: Bump guava from 33.1.0-jre to 33.2.0-jre (#10271)
dependabot[bot] May 11, 2024
04792cf
Spark 3.5: Remove obsolete conf parsing logic (#10309)
aokolnychyi May 11, 2024
485ce34
Build: Bump org.testcontainers:testcontainers from 1.19.7 to 1.19.8 (…
dependabot[bot] May 13, 2024
b752b74
docs: Update Quickstart to Hive 4.0.0 (#10325)
911432 May 13, 2024
d0dbc9c
Build: Bump software.amazon.awssdk:bom from 2.25.45 to 2.25.50 (#10323)
dependabot[bot] May 13, 2024
b623630
Parquet: Add Bloom filter FPP config (#10149)
huaxingao May 13, 2024
02b1ff9
Spark 3.5: Add support for enums in SparkConfParser (#10311)
huaxingao May 13, 2024
d23c490
Spark: Backport tests for struct aggregation pushdown to 3.3/3.4, cle…
amogh-jahagirdar May 14, 2024
ea916c1
Docs: Update vendor information for Cloudera (#10278)
bartash May 14, 2024
5e08f88
Make proxy endpoint configurable for s3 Http clients (#10332)
flyrain May 14, 2024
a6fb9cd
Spark 3.4: Add support for enums in SparkConfParser (#10330)
huaxingao May 14, 2024
2058053
AWS: Retain Glue Catalog table description after updating Iceberg tab…
May 15, 2024
4c9f47d
Kafka-connect: Handle namespace creation for auto table creation (#10…
ajantha-bhat May 15, 2024
2cd6d0d
Avoid adding a closed client to the pool (#10337)
flyrain May 15, 2024
090fe2e
Build: Bump nessie from 0.81.1 to 0.82.0 (#10318)
dependabot[bot] May 16, 2024
788bea2
Spark 3.5: Fix the setting of equalAuthorities in RemoveOrphanFilesPr…
hantangwangd May 16, 2024
f31315e
Docs: Fix Apache Doris documentation link (#10263)
detinho May 16, 2024
f4aaa37
Build: Bump io.delta:delta-spark_2.12 from 3.1.0 to 3.2.0 (#10320)
dependabot[bot] May 16, 2024
139721f
Remove unused manifest predicate (#10339)
amogh-jahagirdar May 16, 2024
bd046f8
Spark: Fix issue when partitioning by UUID (#8250)
nastra May 16, 2024
6abb99f
Spark 3.4, 3.3: Fix the setting of equalAuthorities in RemoveOrphanFi…
hantangwangd May 17, 2024
2a68edc
Use a unique field-id for delete files elements (#10347)
fqaiser94 May 17, 2024
2886ef4
Core, Spark 3.4: Remove redundant output in tests (#10348)
aokolnychyi May 17, 2024
236f625
Core: Replace deprecated Roaring64Bitmap#add call with addRange (#10350)
amogh-jahagirdar May 18, 2024
8d6bee7
Spark: Coerce shorts and bytes into ints in Parquet Writer (#10349)
shardulm94 May 20, 2024
fcd07d9
Build: Bump mkdocs-material from 9.5.21 to 9.5.23 (#10353)
dependabot[bot] May 23, 2024
f1a548f
Build: Bump org.springframework:spring-web from 5.3.34 to 5.3.35 (#10…
dependabot[bot] May 23, 2024
b3c25fb
Build: Bump software.amazon.awssdk:bom from 2.25.50 to 2.25.57 (#10367)
dependabot[bot] May 23, 2024
9114cc8
Hive: Use base table metadata to create HiveLock (#10016)
lirui-apache May 23, 2024
dd2197f
API: Fix aggregate pushdown when optional DataFile stats are null (#1…
jkolash May 23, 2024
d4c2ef8
Spark 3.5: Support camel case session configs and options (#10310)
aokolnychyi May 24, 2024
311dbbb
AWS: Support S3 DSSE-KMS encryption (#8370)
May 24, 2024
af9b9ee
Docs: add metrics-reporting back (#10377)
manuzhang May 27, 2024
2a35e23
Build: Bump software.amazon.awssdk:bom from 2.25.57 to 2.25.60 (#10385)
dependabot[bot] May 27, 2024
9a3db37
Build: Bump io.netty:netty-buffer from 4.1.109.Final to 4.1.110.Final…
dependabot[bot] May 27, 2024
ca8af31
Build: Bump io.airlift:aircompressor from 0.26 to 0.27 (#10383)
dependabot[bot] May 27, 2024
957cb0d
Build: Bump org.springframework:spring-web from 5.3.35 to 5.3.36 (#10…
dependabot[bot] May 27, 2024
6f4b195
Prevent deadlock in Jackson (#10379)
snazy May 27, 2024
580df62
Build: Bump nessie from 0.82.0 to 0.83.2 (#10381)
dependabot[bot] May 27, 2024
795fea9
Url encode field names for partition paths (#10329)
danielcweeks May 27, 2024
f9cdde2
Build: Bump net.snowflake:snowflake-jdbc from 3.15.1 to 3.16.0 (#10269)
dependabot[bot] May 28, 2024
d723f9f
Spark 3.5: Only traverse ancestors of current snapshot when building …
manuzhang May 30, 2024
2843f32
docs: Add archive for documentations older than 1.4.0 (#10374)
manuzhang May 30, 2024
46732b8
Flink 1.19: Fix flaky TestIcebergSourceFailover > testBoundedWithSave…
pvary May 30, 2024
2722290
docs: deploy on changes in `docs/` (#10394)
manuzhang May 30, 2024
6a59454
Spark 3.4: Only traverse ancestors of current snapshot when building …
manuzhang Jun 1, 2024
23eb594
Bump Azurite test-container to `3.30.0`
Fokko Jun 2, 2024
b4ffbf4
Build: Bump mkdocs-material from 9.5.23 to 9.5.25 (#10413)
dependabot[bot] Jun 2, 2024
1837c81
Build: Bump org.assertj:assertj-core from 3.25.3 to 3.26.0 (#10416)
dependabot[bot] Jun 3, 2024
ee11de9
Build: Bump guava from 33.2.0-jre to 33.2.1-jre (#10414)
dependabot[bot] Jun 3, 2024
2dfc0c6
Build: Bump org.xerial:sqlite-jdbc from 3.45.3.0 to 3.46.0.0 (#10415)
dependabot[bot] Jun 3, 2024
2521684
Docs: Refer to the README.md in `site/` for the docs (#10402)
Fokko Jun 3, 2024
7d75f82
Build: Require approving review (#10424)
Fokko Jun 3, 2024
134345d
Parquet: Remove TestHelpers in parquet module (#10428)
advancedxy Jun 3, 2024
67e181e
Core: Introduce AuthConfig (#10161)
nastra Jun 3, 2024
45bdf3f
Build: Bump software.amazon.awssdk:bom from 2.25.60 to 2.25.64 (#10421)
dependabot[bot] Jun 4, 2024
ab476ab
Build: Bump com.google.errorprone:error_prone_annotations (#10418)
dependabot[bot] Jun 4, 2024
40da6f1
Core: Use TestTemplate instead of Test annotation in TestPartitionSpe…
nastra Jun 4, 2024
0a26f02
Docs: Point links in metrics-reporting.md to GitHub Java source (#10397)
manuzhang Jun 4, 2024
a642a93
Build: Clean up Jackson dependency usages (#10448)
nastra Jun 5, 2024
cbe391d
Flink: refactor sink shuffling statistics collection (#10331)
stevenzwu Jun 5, 2024
59e9377
Spark 3.4, 3.5: SHOW VIEWS failed with AssertionError (#10442)
huaxingao Jun 5, 2024
be46d29
Core, Parquet, Orc: Don't write column sizes when metrics mode is Non…
amogh-jahagirdar Jun 5, 2024
afc3081
Spark 3.4, 3.5: Follow-up for #10442, Remove static test import (#10451)
huaxingao Jun 6, 2024
c7d3ef4
Flink: Maintenance - MonitorSource (#10308)
pvary Jun 6, 2024
e0dc57e
Open-API: Use union instead of inheritance for TableRequirements (#10…
anuragmantri Jun 6, 2024
c7de6cb
Core: Reword exception message in RewriteManifests validation (#10446)
ajantha-bhat Jun 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 3 additions & 0 deletions .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ github:

protected_branches:
main:
required_pull_request_reviews:
required_approving_review_count: 1

required_linear_history: true

features:
Expand Down
4 changes: 3 additions & 1 deletion .baseline/checkstyle/checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@
org.apache.spark.sql.functions.*,
org.apache.spark.sql.connector.iceberg.write.RowLevelOperation.Command.*,
org.apache.spark.sql.connector.write.RowLevelOperation.Command.*,
org.junit.Assert.*"/>
org.junit.Assert.*,
org.assertj.core.api.Assertions.*,
org.assertj.core.api.Assumptions.*"/>
</module>
<module name="ClassTypeParameterName"> <!-- Java Style Guide: Type variable names -->
<property name="format" value="(^[A-Z][0-9]?)$|([A-Z][a-zA-Z0-9]*[T]$)"/>
Expand Down
4 changes: 3 additions & 1 deletion .github/ISSUE_TEMPLATE/iceberg_bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ body:
description: What Apache Iceberg version are you using?
multiple: false
options:
- "1.5.0 (latest release)"
- "1.5.2 (latest release)"
- "1.5.1"
- "1.5.0"
- "1.4.3"
- "1.4.2"
- "1.4.1"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/flink-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
strategy:
matrix:
jvm: [8, 11]
flink: ['1.16', '1.17', '1.18']
flink: ['1.17', '1.18', '1.19']
env:
SPARK_LOCAL_IP: localhost
steps:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/site-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ on:
branches:
- main
paths:
- docs/**
- site/**
workflow_dispatch:
jobs:
Expand Down
12 changes: 9 additions & 3 deletions .github/workflows/stale.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# under the License.
#

name: "Close Stale Issues"
name: "Close Stale Issues and PRs"
on:
schedule:
- cron: '0 0 * * *'
Expand All @@ -33,12 +33,11 @@ jobs:
steps:
- uses: actions/stale@v9.0.0
with:
# stale issues
stale-issue-label: 'stale'
exempt-issue-labels: 'not-stale'
days-before-issue-stale: 180
days-before-issue-close: 14
# Only close stale issues, leave PRs alone
days-before-pr-stale: -1
stale-issue-message: >
This issue has been automatically marked as stale because it has been open for 180 days
with no activity. It will be closed in next 14 days if no further activity occurs. To
Expand All @@ -47,5 +46,12 @@ jobs:
close-issue-message: >
This issue has been closed because it has not received any activity in the last 14 days
since being marked as 'stale'
# stale PRs
stale-pr-label: 'stale'
exempt-pr-labels: 'not-stale,security'
stale-pr-message: 'This pull request has been marked as stale due to 30 days of inactivity. It will be closed in 1 week if no further activity occurs. If you think that’s incorrect or this pull request requires a review, please simply write any comment. If closed, you can revive the PR at any time and @mention a reviewer or discuss it on the dev@iceberg.apache.org list. Thank you for your contributions.'
close-pr-message: 'This pull request has been closed due to lack of activity. This is not a judgement on the merit of the PR in any way. It is just a way of keeping the PR queue manageable. If you think that is incorrect, or the pull request requires review, you can revive the PR at any time.'
days-before-pr-stale: 30
days-before-pr-close: 7
ascending: true
operations-per-run: 100
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ gradle/wrapper/gradle-wrapper.jar
lib/

# web site build
docs/site/
site/site/
site/docs/docs/
site/docs/.asf.yaml
Expand Down
4 changes: 3 additions & 1 deletion api/src/main/java/org/apache/iceberg/FileFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public enum FileFormat {
private final String ext;
private final boolean splittable;

private static final FileFormat[] VALUES = values();

FileFormat(String ext, boolean splittable) {
this.ext = "." + ext;
this.splittable = splittable;
Expand All @@ -55,7 +57,7 @@ public String addExtension(String filename) {
}

public static FileFormat fromFileName(CharSequence filename) {
for (FileFormat format : FileFormat.values()) {
for (FileFormat format : VALUES) {
int extStart = filename.length() - format.ext.length();
if (Comparators.charSequences()
.compare(format.ext, filename.subSequence(extStart, filename.length()))
Expand Down
2 changes: 1 addition & 1 deletion api/src/main/java/org/apache/iceberg/PartitionSpec.java
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ public String partitionToPath(StructLike data) {
if (i > 0) {
sb.append("/");
}
sb.append(field.name()).append("=").append(escape(valueString));
sb.append(escape(field.name())).append("=").append(escape(valueString));
}
return sb.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ public interface RewriteDataFiles

int PARTIAL_PROGRESS_MAX_COMMITS_DEFAULT = 10;

/**
* The maximum amount of failed commits that this rewrite is allowed if partial progress is
* enabled. By default, all commits are allowed to fail. This setting has no effect if partial
* progress is disabled.
*/
String PARTIAL_PROGRESS_MAX_FAILED_COMMITS = "partial-progress.max-failed-commits";

/**
* The entire rewrite operation is broken down into pieces based on partitioning and within
* partitions based on size into groups. These sub-units of the rewrite are referred to as file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ public String describe() {
}
}

<V> boolean safeContainsKey(Map<Integer, V> map, int key) {
if (map == null) {
return false;
}
return map.containsKey(key);
}

<V> V safeGet(Map<Integer, V> map, int key) {
return safeGet(map, key, null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ protected Long countFor(StructLike row) {

@Override
protected boolean hasValue(DataFile file) {
return file.valueCounts().containsKey(fieldId) && file.nullValueCounts().containsKey(fieldId);
return safeContainsKey(file.valueCounts(), fieldId)
&& file.nullValueCounts().containsKey(fieldId);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ protected MaxAggregate(BoundTerm<T> term) {

@Override
protected boolean hasValue(DataFile file) {
boolean hasBound = file.upperBounds().containsKey(fieldId);
boolean hasBound = safeContainsKey(file.upperBounds(), fieldId);
Long valueCount = safeGet(file.valueCounts(), fieldId);
Long nullCount = safeGet(file.nullValueCounts(), fieldId);
boolean boundAllNull =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ protected MinAggregate(BoundTerm<T> term) {

@Override
protected boolean hasValue(DataFile file) {
boolean hasBound = file.lowerBounds().containsKey(fieldId);
boolean hasBound = safeContainsKey(file.lowerBounds(), fieldId);
Long valueCount = safeGet(file.valueCounts(), fieldId);
Long nullCount = safeGet(file.nullValueCounts(), fieldId);
boolean boundAllNull =
Expand Down
12 changes: 6 additions & 6 deletions api/src/main/java/org/apache/iceberg/io/FileIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,25 @@ default InputFile newInputFile(String path, long length) {
default InputFile newInputFile(DataFile file) {
Preconditions.checkArgument(
file.keyMetadata() == null,
"Cannot decrypt data file: {} (use EncryptingFileIO)",
"Cannot decrypt data file: %s (use EncryptingFileIO)",
file.path());
return newInputFile(file.path().toString());
return newInputFile(file.path().toString(), file.fileSizeInBytes());
}

default InputFile newInputFile(DeleteFile file) {
Preconditions.checkArgument(
file.keyMetadata() == null,
"Cannot decrypt delete file: {} (use EncryptingFileIO)",
"Cannot decrypt delete file: %s (use EncryptingFileIO)",
file.path());
return newInputFile(file.path().toString());
return newInputFile(file.path().toString(), file.fileSizeInBytes());
}

default InputFile newInputFile(ManifestFile manifest) {
Preconditions.checkArgument(
manifest.keyMetadata() == null,
"Cannot decrypt manifest: {} (use EncryptingFileIO)",
"Cannot decrypt manifest: %s (use EncryptingFileIO)",
manifest.path());
return newInputFile(manifest.path());
return newInputFile(manifest.path(), manifest.length());
}

/** Get a {@link OutputFile} instance to write bytes to the file at the given path. */
Expand Down
4 changes: 4 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/JavaHashes.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ public class JavaHashes {
private JavaHashes() {}

public static int hashCode(CharSequence str) {
if (null == str) {
return 0;
}

int result = 177;
for (int i = 0; i < str.length(); i += 1) {
char ch = str.charAt(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,20 @@ public static CharSequenceWrapper wrap(CharSequence seq) {
}

private CharSequence wrapped;
// lazily computed & cached hashCode
private transient int hashCode = 0;
// tracks if the hash has been calculated as actually being zero to avoid re-calculating the hash.
// this follows the hashCode() implementation from java.lang.String
private transient boolean hashIsZero = false;

private CharSequenceWrapper(CharSequence wrapped) {
this.wrapped = wrapped;
}

public CharSequenceWrapper set(CharSequence newWrapped) {
this.wrapped = newWrapped;
this.hashCode = 0;
this.hashIsZero = false;
return this;
}

Expand All @@ -58,6 +65,10 @@ public boolean equals(Object other) {
return wrapped.equals(that.wrapped);
}

if (null == wrapped && null == that.wrapped) {
return true;
}

if (length() != that.length()) {
return false;
}
Expand All @@ -67,7 +78,19 @@ public boolean equals(Object other) {

@Override
public int hashCode() {
return JavaHashes.hashCode(wrapped);
int hash = hashCode;

// don't recalculate if the hash is actually 0
if (hash == 0 && !hashIsZero) {
hash = JavaHashes.hashCode(wrapped);
if (hash == 0) {
hashIsZero = true;
} else {
this.hashCode = hash;
}
}

return hash;
}

@Override
Expand Down
12 changes: 11 additions & 1 deletion api/src/test/java/org/apache/iceberg/TestPartitionPaths.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ public class TestPartitionPaths {
new Schema(
Types.NestedField.required(1, "id", Types.IntegerType.get()),
Types.NestedField.optional(2, "data", Types.StringType.get()),
Types.NestedField.optional(3, "ts", Types.TimestampType.withoutZone()));
Types.NestedField.optional(3, "ts", Types.TimestampType.withoutZone()),
Types.NestedField.optional(4, "\"esc\"#1", Types.StringType.get()));

@Test
public void testPartitionPath() {
Expand Down Expand Up @@ -62,4 +63,13 @@ public void testEscapedStrings() {
.as("Should escape / as %2F")
.isEqualTo("data=a%2Fb%2Fc%2Fd/data_trunc=a%2Fb%2Fc%2Fd");
}

@Test
public void testEscapedFieldNames() {
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).identity("\"esc\"#1").build();

assertThat(spec.partitionToPath(Row.of("a/b/c/d")))
.as("Should escape \" as %22 and # as %23")
.isEqualTo("%22esc%22%231=a%2Fb%2Fc%2Fd");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,22 @@ public class TestAggregateEvaluator {
FILE, MISSING_SOME_NULLS_STATS_1, MISSING_SOME_NULLS_STATS_2
};

private static final DataFile MISSING_ALL_OPTIONAL_STATS =
new TestDataFile(
"file_null_stats.avro",
Row.of(),
20,
// any value counts, including nulls
null,
// null value counts
null,
// nan value counts
null,
// lower bounds
null,
// upper bounds
null);

@Test
public void testIntAggregate() {
List<Expression> list =
Expand Down Expand Up @@ -173,6 +189,42 @@ public void testNoStats() {
assertEvaluatorResult(result, expected);
}

@Test
public void testIntAggregateAllMissingStats() {
List<Expression> list =
ImmutableList.of(
Expressions.countStar(),
Expressions.count("id"),
Expressions.max("id"),
Expressions.min("id"));
AggregateEvaluator aggregateEvaluator = AggregateEvaluator.create(SCHEMA, list);

aggregateEvaluator.update(MISSING_ALL_OPTIONAL_STATS);

assertThat(aggregateEvaluator.allAggregatorsValid()).isFalse();
StructLike result = aggregateEvaluator.result();
Object[] expected = {20L, null, null, null};
assertEvaluatorResult(result, expected);
}

@Test
public void testOptionalColAllMissingStats() {
List<Expression> list =
ImmutableList.of(
Expressions.countStar(),
Expressions.count("no_stats"),
Expressions.max("no_stats"),
Expressions.min("no_stats"));
AggregateEvaluator aggregateEvaluator = AggregateEvaluator.create(SCHEMA, list);

aggregateEvaluator.update(MISSING_ALL_OPTIONAL_STATS);

assertThat(aggregateEvaluator.allAggregatorsValid()).isFalse();
StructLike result = aggregateEvaluator.result();
Object[] expected = {20L, null, null, null};
assertEvaluatorResult(result, expected);
}

private void assertEvaluatorResult(StructLike result, Object[] expected) {
Object[] actual = new Object[result.size()];
for (int i = 0; i < result.size(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,17 @@
import java.util.concurrent.TimeUnit;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;

public class TestCharSequenceMap {

@Test
public void nullString() {
Assertions.assertThat(CharSequenceMap.create()).doesNotContainKey((String) null);
Assertions.assertThat(CharSequenceMap.create()).doesNotContainValue((String) null);
}

@Test
public void testEmptyMap() {
CharSequenceMap<String> map = CharSequenceMap.create();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ public void testSearchingInCharSequenceCollection() {
Assertions.assertThat(set.contains("def")).isTrue();
}

@Test
public void nullString() {
Assertions.assertThat(CharSequenceSet.of(Arrays.asList((String) null))).contains((String) null);
Assertions.assertThat(CharSequenceSet.empty()).doesNotContain((String) null);
}

@Test
public void testRetainAll() {
CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456"));
Expand Down
Loading