diff --git a/js/downloads.js b/js/downloads.js
index fdac3478150..3169fa26844 100644
--- a/js/downloads.js
+++ b/js/downloads.js
@@ -22,7 +22,7 @@ var packagesV10 = [hadoop2p7, hadoop3p2, hadoopFree, sources];
// 3.1.0+
var packagesV11 = [hadoop3p2, hadoop2p7, hadoopFree, sources];
-
+addRelease("3.2.0", new Date("10/13/2021"), packagesV11, true);
addRelease("3.1.2", new Date("06/01/2021"), packagesV11, true);
addRelease("3.0.3", new Date("06/23/2021"), packagesV10, true);
diff --git a/news/_posts/2021-10-13-spark-3-2-0-released.md b/news/_posts/2021-10-13-spark-3-2-0-released.md
new file mode 100644
index 00000000000..23657086e05
--- /dev/null
+++ b/news/_posts/2021-10-13-spark-3-2-0-released.md
@@ -0,0 +1,14 @@
+---
+layout: post
+title: Spark 3.2.0 released
+categories:
+- News
+tags: []
+status: publish
+type: post
+published: true
+meta:
+ _edit_last: '4'
+ _wpas_done_all: '1'
+---
+We are happy to announce the availability of Spark 3.2.0! Visit the release notes to read about the new features, or download the release today.
diff --git a/releases/_posts/2021-10-13-spark-release-3-2-0.md b/releases/_posts/2021-10-13-spark-release-3-2-0.md
new file mode 100644
index 00000000000..870e8e2bdbd
--- /dev/null
+++ b/releases/_posts/2021-10-13-spark-release-3-2-0.md
@@ -0,0 +1,318 @@
+---
+layout: post
+title: Spark Release 3.2.0
+categories: []
+tags: []
+status: publish
+type: post
+published: true
+meta:
+_edit_last: '4'
+_wpas_done_all: '1'
+---
+
+Apache Spark 3.2.0 is the third release of the 3.x line. With tremendous contribution from the open-source community, this release managed to resolve in excess of 1,700 Jira tickets.
+
+In this release, Spark supports the Pandas API layer on Spark. Pandas users can scale out their applications on Spark with one line code change. Other major updates include RocksDB StateStore support, session window support, push-based shuffle support, ANSI SQL INTERVAL types, enabling Adaptive Query Execution (AQE) by default, and ANSI SQL mode GA.
+
+To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/downloads.html) page. You can consult JIRA for the [detailed changes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&version=12349407). We have curated a list of high level changes here, grouped by major modules.
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+### Highlights
+
+* Support Pandas API layer on PySpark ([SPARK-34849](https://issues.apache.org/jira/browse/SPARK-34849))
+* Support push-based shuffle to improve shuffle efficiency ([SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602))
+* Add RocksDB StateStore implementation ([SPARK-34198](https://issues.apache.org/jira/browse/SPARK-34198))
+* EventTime based sessionization (session window) ([SPARK-10816](https://issues.apache.org/jira/browse/SPARK-10816))
+* ANSI SQL mode GA ([SPARK-35030](https://issues.apache.org/jira/browse/SPARK-35030))
+* Support for ANSI SQL INTERVAL types ([SPARK-27790](https://issues.apache.org/jira/browse/SPARK-27790))
+* Enable adaptive query execution by default ([SPARK-33679](https://issues.apache.org/jira/browse/SPARK-33679))
+* Query compilation latency reduction ([SPARK-35042](https://issues.apache.org/jira/browse/SPARK-35042), [SPARK-35103](https://issues.apache.org/jira/browse/SPARK-35103), [SPARK-34989](https://issues.apache.org/jira/browse/SPARK-34989))
+* Support Scala 2.13 ([SPARK-34218](https://issues.apache.org/jira/browse/SPARK-34218))
+
+
+### Core and Spark SQL
+
+**ANSI SQL Compatibility Enhancements**
+
+* Support for ANSI SQL INTERVAL types ([SPARK-27790](https://issues.apache.org/jira/browse/SPARK-27790))
+* New type coercion syntax rules in ANSI mode ([SPARK-34246](https://issues.apache.org/jira/browse/SPARK-34246))
+* ANSI mode: IntegralDivide throws exception on overflow ([SPARK-35152](https://issues.apache.org/jira/browse/SPARK-35152))
+* ANSI mode: Check for overflow in Average ([SPARK-35955](https://issues.apache.org/jira/browse/SPARK-35955))
+* Block count(table.*) to follow ANSI standard and other SQL engines ([SPARK-34199](https://issues.apache.org/jira/browse/SPARK-34199))
+
+**Performance**
+
+* Query compilation latency
+ * Support traversal pruning in transform/resolve functions and their call sites ([SPARK-35042](https://issues.apache.org/jira/browse/SPARK-35042))
+ * Improve the performance of mapChildren and withNewChildren methods ([SPARK-34989](https://issues.apache.org/jira/browse/SPARK-34989))
+ * Improve the performance of type coercion rules ([SPARK-35103](https://issues.apache.org/jira/browse/SPARK-35103))
+* Query optimization
+ * Remove redundant aggregates in the Optimizer ([SPARK-33122](https://issues.apache.org/jira/browse/SPARK-33122))
+ * Push down limit through Project with Join ([SPARK-34622](https://issues.apache.org/jira/browse/SPARK-34622))
+ * Push down limit for LEFT SEMI and LEFT ANTI join ([SPARK-36404](https://issues.apache.org/jira/browse/SPARK-36404), [SPARK-34514](https://issues.apache.org/jira/browse/SPARK-34514))
+ * Push down limit through window when partitionSpec is empty ([SPARK-34575](https://issues.apache.org/jira/browse/SPARK-34575))
+ * Use a relative cost comparison function in the CBO ([SPARK-34922](https://issues.apache.org/jira/browse/SPARK-34922))
+ * Cardinality estimation of union, sort and range operator ([SPARK-33411](https://issues.apache.org/jira/browse/SPARK-33411))
+ * Only push down LeftSemi/LeftAnti over Aggregate if join can be planned as broadcast join ([SPARK-34081](https://issues.apache.org/jira/browse/SPARK-34081))
+ * UnwrapCastInBinaryComparison support In/InSet predicate ([SPARK-35316](https://issues.apache.org/jira/browse/SPARK-35316))
+ * Subexpression elimination enhancements ([SPARK-35448](https://issues.apache.org/jira/browse/SPARK-35448))
+ * Keep necessary stats after partition pruning ([SPARK-34119](https://issues.apache.org/jira/browse/SPARK-34119))
+ * Decouple bucket filter pruning and bucket table scan ([SPARK-32985](https://issues.apache.org/jira/browse/SPARK-32985))
+* Query execution
+ * Adaptive query execution
+ * Enable adaptive query execution by default ([SPARK-33679](https://issues.apache.org/jira/browse/SPARK-33679))
+ * Support Dynamic Partition Pruning (DPP) in AQE when the join is broadcast hash join at the beginning or there is no reused broadcast exchange ([SPARK-34168](https://issues.apache.org/jira/browse/SPARK-34168), [SPARK-35710](https://issues.apache.org/jira/browse/SPARK-35710))
+ * Optimize skew join before coalescing shuffle partitions ([SPARK-35447](https://issues.apache.org/jira/browse/SPARK-35447))
+ * Support AQE side shuffled hash join formula using rule ([SPARK-35282](https://issues.apache.org/jira/browse/SPARK-35282))
+ * Support AQE side broadcast hash join threshold ([SPARK-35264](https://issues.apache.org/jira/browse/SPARK-35264))
+ * Allow custom plugin for AQE cost evaluator ([SPARK-35794](https://issues.apache.org/jira/browse/SPARK-35794))
+ * Enable Zstandard buffer pool by default ([SPARK-34340](https://issues.apache.org/jira/browse/SPARK-34340), [SPARK-34390](https://issues.apache.org/jira/browse/SPARK-34390))
+ * Add code-gen for all join types of sort merge join ([SPARK-34705](https://issues.apache.org/jira/browse/SPARK-34705))
+ * Whole plan exchange and subquery reuse ([SPARK-29375](https://issues.apache.org/jira/browse/SPARK-29375))
+ * Broadcast nested loop join improvement ([SPARK-34706](https://issues.apache.org/jira/browse/SPARK-34706))
+ * Support two levels of hash maps for final hash aggregation ([SPARK-35141](https://issues.apache.org/jira/browse/SPARK-35141))
+ * Allow concurrent writers for writing dynamic partitions and bucket table ([SPARK-26164](https://issues.apache.org/jira/browse/SPARK-26164))
+ * Improve performance of processing FETCH_PRIOR in Spark Thrift server ([SPARK-33655](https://issues.apache.org/jira/browse/SPARK-33655))
+
+**Connector Enhancements**
+
+* Parquet
+ * Upgrade Apache Parquet used to version 1.12.1 ([SPARK-36726](https://issues.apache.org/jira/browse/SPARK-36726))
+ * Parquet vectorized reader support column index ([SPARK-34289](https://issues.apache.org/jira/browse/SPARK-34289))
+ * Add new parquet data source options to control datetime rebasing in read ([SPARK-34377](https://issues.apache.org/jira/browse/SPARK-34377))
+ * Read parquet unsigned types that are stored as int32 physical type in parquet ([SPARK-34817](https://issues.apache.org/jira/browse/SPARK-34817))
+ * Read Parquet unsigned int64 logical type that stored as signed int64 physical type to decimal(20, 0) ([SPARK-34786](https://issues.apache.org/jira/browse/SPARK-34786))
+ * Handle column index when using vectorized Parquet reader ([SPARK-34859](https://issues.apache.org/jira/browse/SPARK-34859))
+ * Improve Parquet In filter pushdown ([SPARK-32792](https://issues.apache.org/jira/browse/SPARK-32792))
+* ORC
+ * Upgrade Apache ORC used to version 1.6.11 ([SPARK-36482](https://issues.apache.org/jira/browse/SPARK-36482))
+ * Support Apache ORC forced positional evolution ([SPARK-32864](https://issues.apache.org/jira/browse/SPARK-32864))
+ * Support nested column in ORC vectorized reader ([SPARK-34862](https://issues.apache.org/jira/browse/SPARK-34862))
+ * Support ZSTD, LZ4 compression in ORC data source ([SPARK-33978](https://issues.apache.org/jira/browse/SPARK-33978), [SPARK-35612](https://issues.apache.org/jira/browse/SPARK-35612))
+ * Set the list of read columns in the task configuration to reduce reading of ORC data ([SPARK-35783](https://issues.apache.org/jira/browse/SPARK-35783))
+* Avro
+ * Upgrade Apache Avro used to version 1.10.2 ([SPARK-34778](https://issues.apache.org/jira/browse/SPARK-34778))
+ * Supporting Avro schema evolution for partitioned Hive tables with "avro.schema.literal" ([SPARK-26836](https://issues.apache.org/jira/browse/SPARK-26836))
+ * Add new Avro datasource options to control datetime rebasing in read ([SPARK-34404](https://issues.apache.org/jira/browse/SPARK-34404))
+ * Adding support for user provided schema url in Avro ([SPARK-34416](https://issues.apache.org/jira/browse/SPARK-34416))
+ * Add support for positional Catalyst-to-Avro schema matching ([SPARK-34365](https://issues.apache.org/jira/browse/SPARK-34365))
+* JSON
+ * Upgrade Jackson used to version 2.12.3 ([SPARK-35550](https://issues.apache.org/jira/browse/SPARK-35550))
+ * Allow JSON data sources to write non-ASCII characters as codepoints ([SPARK-35047](https://issues.apache.org/jira/browse/SPARK-35047))
+* CSV
+ * Upgrade univocity-parsers to 2.9.1 ([SPARK-33940](https://issues.apache.org/jira/browse/SPARK-33940))
+* JDBC
+ * Represent JDBC Time type as Integer in milliseconds ([SPARK-33888](https://issues.apache.org/jira/browse/SPARK-33888))
+ * Calculate more precise partition stride in JDBCRelation ([SPARK-34843](https://issues.apache.org/jira/browse/SPARK-34843))
+ * Support refreshKrb5Config option in JDBC data sources ([SPARK-35226](https://issues.apache.org/jira/browse/SPARK-35226))
+* Hive Metastore support filter by NOT IN ([SPARK-34538](https://issues.apache.org/jira/browse/SPARK-34538))
+
+**Kubernetes Enhancements**
+
+* Upgrade Kubernetes client to 5.4.1 ([SPARK-35660](https://issues.apache.org/jira/browse/SPARK-35660))
+* Support spark.kubernetes.executor.disableConfigMap ([SPARK-34316](https://issues.apache.org/jira/browse/SPARK-34316))
+* Support remote template files ([SPARK-34783](https://issues.apache.org/jira/browse/SPARK-34783))
+* Introducing a limit for pending PODs ([SPARK-36052](https://issues.apache.org/jira/browse/SPARK-36052))
+* Support shuffle data recovery on the reused PVCs ([SPARK-35593](https://issues.apache.org/jira/browse/SPARK-35593))
+* Support early driver service clean-up during app termination ([SPARK-35131](https://issues.apache.org/jira/browse/SPARK-35131))
+* Add config for driver readiness timeout before executors start ([SPARK-32975](https://issues.apache.org/jira/browse/SPARK-32975))
+* Support driver-owned on-demand PVC ([SPARK-35182](https://issues.apache.org/jira/browse/SPARK-35182))
+* Support shuffle data recovery on the reused PVCs
+* Maximum decommissioning time & allow decommissioning for excludes ([SPARK-34104](https://issues.apache.org/jira/browse/SPARK-34104))
+* Support submit to k8s only with token ([SPARK-33720](https://issues.apache.org/jira/browse/SPARK-33720))
+* Add a developer API for custom feature steps ([SPARK-33261](https://issues.apache.org/jira/browse/SPARK-33261))
+
+**Data Source V2 API**
+
+* Aggregate pushdown APIs ([SPARK-34952](https://issues.apache.org/jira/browse/SPARK-34952))
+* FunctionCatalog API ([SPARK-27658](https://issues.apache.org/jira/browse/SPARK-27658))
+* DataSourceV2 Function Catalog implementation ([SPARK-35260](https://issues.apache.org/jira/browse/SPARK-35260))
+* Add API to request distribution and ordering on write ([SPARK-33779](https://issues.apache.org/jira/browse/SPARK-33779))
+* Add interfaces to pass the required sorting and clustering for writes ([SPARK-23889](https://issues.apache.org/jira/browse/SPARK-23889))
+* Support metrics from Datasource v2 scan ([SPARK-34338](https://issues.apache.org/jira/browse/SPARK-34338))
+* Support metrics at writing path ([SPARK-36030](https://issues.apache.org/jira/browse/SPARK-36030))
+* Support partitioning with static number on the required distribution and ordering on write ([SPARK-34255](https://issues.apache.org/jira/browse/SPARK-34255))
+* Support Dynamic filtering ([SPARK-35779](https://issues.apache.org/jira/browse/SPARK-35779))
+* Support LocalScan ([SPARK-35535](https://issues.apache.org/jira/browse/SPARK-35535))
+* MERGE ... UPDATE/INSERT * should do by-name resolution ([SPARK-34720](https://issues.apache.org/jira/browse/SPARK-34720))
+
+**Feature Enhancements**
+
+* Subquery
+ * Support LATERAL subqueries ([SPARK-34382](https://issues.apache.org/jira/browse/SPARK-34382))
+ * Improve correlated subqueries ([SPARK-35553](https://issues.apache.org/jira/browse/SPARK-35553))
+ * Allow non-aggregated single row correlated scalar subquery ([SPARK-28379](https://issues.apache.org/jira/browse/SPARK-28379))
+ * Only allow a subset of correlated equality predicates when a subquery is aggregated ([SPARK-35080](https://issues.apache.org/jira/browse/SPARK-35080))
+ * Resolve star expressions in subqueries using outer query plans ([SPARK-35618](https://issues.apache.org/jira/browse/SPARK-35618))
+* New built-in functions
+ * current_user ([SPARK-21957](https://issues.apache.org/jira/browse/SPARK-21957))
+ * product ([SPARK-33678](https://issues.apache.org/jira/browse/SPARK-33678))
+ * regexp_like,regexp ([SPARK-33597](https://issues.apache.org/jira/browse/SPARK-33597), [SPARK-34376](https://issues.apache.org/jira/browse/SPARK-34376))
+ * try_cast ([SPARK-34881](https://issues.apache.org/jira/browse/SPARK-34881))
+ * try_add ([SPARK-35162](https://issues.apache.org/jira/browse/SPARK-35162))
+ * try_divide ([SPARK-35162](https://issues.apache.org/jira/browse/SPARK-35162))
+ * bit_get ([SPARK-33245](https://issues.apache.org/jira/browse/SPARK-33245))
+* Use Apache Hadoop 3.3.1 by default ([SPARK-29250](https://issues.apache.org/jira/browse/SPARK-29250))
+* Make user-defined type (UDT) API public ([SPARK-7768](https://issues.apache.org/jira/browse/SPARK-7768))
+* Add checksum for shuffle blocks ([SPARK-35275](https://issues.apache.org/jira/browse/SPARK-35275))
+* Enable spark.storage.replication.proactive by default ([SPARK-33870](https://issues.apache.org/jira/browse/SPARK-33870))
+* Add table function registry ([SPARK-34678](https://issues.apache.org/jira/browse/SPARK-34678))
+* Support Fallback Storage Cleanup during stopping SparkContext ([SPARK-34142](https://issues.apache.org/jira/browse/SPARK-34142))
+* Support Java enums from Scala Dataset API ([SPARK-23862](https://issues.apache.org/jira/browse/SPARK-23862))
+* ADD JAR with ivy coordinates should be compatible with Hive transitive behavior ([SPARK-34506](https://issues.apache.org/jira/browse/SPARK-34506))
+* Support ADD ARCHIVE and LIST ARCHIVES command ([SPARK-34603](https://issues.apache.org/jira/browse/SPARK-34603))
+* Support multiple paths for ADD FILE/JAR/ARCHIVE commands ([SPARK-35105](https://issues.apache.org/jira/browse/SPARK-35105))
+* Support archive files as resources for CREATE FUNCTION USING syntax ([SPARK-35236](https://issues.apache.org/jira/browse/SPARK-35236))
+* Loading SparkSessionExtensions from ServiceLoader ([SPARK-35380](https://issues.apache.org/jira/browse/SPARK-35380))
+* Add sentences function to functions.{scala,py} ([SPARK-35418](https://issues.apache.org/jira/browse/SPARK-35418))
+* Extend the function of decode so as consistent with mainstream databases ([SPARK-33527](https://issues.apache.org/jira/browse/SPARK-33527))
+* Apply spark.sql.hive.metastorePartitionPruning for non-Hive tables that uses Hive metastore for partition management ([SPARK-36128](https://issues.apache.org/jira/browse/SPARK-36128))
+* Support creating tables with null column ([SPARK-36241](https://issues.apache.org/jira/browse/SPARK-36241))
+* Propagate reason for exec loss to Web UI ([SPARK-34764](https://issues.apache.org/jira/browse/SPARK-34764))
+* Avoid inlining non-deterministic With-CTEs ([SPARK-36447](https://issues.apache.org/jira/browse/SPARK-36447))
+* Support analyze all tables in a specific database ([SPARK-33687](https://issues.apache.org/jira/browse/SPARK-33687))
+* Standardize exception messages in Spark ([SPARK-33539](https://issues.apache.org/jira/browse/SPARK-33539))
+* Support (IGNORE | RESPECT) NULLS for LEAD/LAG/NTH_VALUE/FIRST_VALUE/LAST_VALUE ([SPARK-30789](https://issues.apache.org/jira/browse/SPARK-30789))
+
+**Other Notable Changes**
+
+* Monitor
+ * New metrics to ExternalShuffleService ([SPARK-35258](https://issues.apache.org/jira/browse/SPARK-35258))
+ * Add new stage-level REST APIs and parameters ([SPARK-26399](https://issues.apache.org/jira/browse/SPARK-26399))
+ * Support task and executor Metrics Distributions in the REST API ([SPARK-34488](https://issues.apache.org/jira/browse/SPARK-34488))
+ * Add fallback metrics for hash aggregate ([SPARK-35529](https://issues.apache.org/jira/browse/SPARK-35529), [SPARK-34237](https://issues.apache.org/jira/browse/SPARK-34237))
+* Add count_distinct as an option to Dataset#summary ([SPARK-34165](https://issues.apache.org/jira/browse/SPARK-34165))
+* Introduce SQLSTATE and ERRORCODE to SQL Exception ([SPARK-34920](https://issues.apache.org/jira/browse/SPARK-34920))
+* Implement ScriptTransform in sql/core ([SPARK-31936](https://issues.apache.org/jira/browse/SPARK-31936))
+* Keep dependants cached while refreshing v1 tables ([SPARK-34138](https://issues.apache.org/jira/browse/SPARK-34138))
+* Make BlockManagerMaster driver heartbeat timeout configurable ([SPARK-34278](https://issues.apache.org/jira/browse/SPARK-34278))
+* io.file.buffer.size set by spark.buffer.size will override by loading hive-site.xml accidentally may cause perf regression ([SPARK-34346](https://issues.apache.org/jira/browse/SPARK-34346))
+* Make shuffle service name configurable on client-side and allow for classpath-based config override on server side ([SPARK-34828](https://issues.apache.org/jira/browse/SPARK-34828))
+* ExecutorMetricsPoller should keep stage entry in stageTCMP until a heartbeat occurs ([SPARK-34779](https://issues.apache.org/jira/browse/SPARK-34779))
+* Replace if with filter clause in RewriteDistinctAggregates ([SPARK-34882](https://issues.apache.org/jira/browse/SPARK-34882))
+* CREATE TABLE LIKE should respect the reserved table properties ([SPARK-34935](https://issues.apache.org/jira/browse/SPARK-34935))
+* Fix nested column pruning for extracting case-insensitive struct field from array of struct ([SPARK-34963](https://issues.apache.org/jira/browse/SPARK-34963))
+* Ship ivySettings file to driver in YARN cluster mode ([SPARK-34472](https://issues.apache.org/jira/browse/SPARK-34472))
+* Resolve duplicated common columns from USING/NATURAL JOIN ([SPARK-34527](https://issues.apache.org/jira/browse/SPARK-34527))
+* Invoke should find the method with correct number of parameters ([SPARK-35278](https://issues.apache.org/jira/browse/SPARK-35278))
+* Don't optimize out grouping expressions from aggregate expressions without aggregate function ([SPARK-34581](https://issues.apache.org/jira/browse/SPARK-34581))
+* Support resolving missing attrs for distribute/cluster by/repartition hint ([SPARK-35331](https://issues.apache.org/jira/browse/SPARK-35331))
+* Improve s3a magic committer support by inferring missing configs ([SPARK-35383](https://issues.apache.org/jira/browse/SPARK-35383))
+* Avoid failing rename caused by destination directory not exist ([SPARK-35106](https://issues.apache.org/jira/browse/SPARK-35106))
+* BlockManagerMasterEndpoint should not ignore index-only shuffle file during updating ([SPARK-35589](https://issues.apache.org/jira/browse/SPARK-35589))
+* Redact the sensitive info in the result of Set command ([SPARK-35576](https://issues.apache.org/jira/browse/SPARK-35576))
+* Update state schema to be able to accept long length JSON([SPARK-35602](https://issues.apache.org/jira/browse/SPARK-35602))
+* Consider making the ':' in STRUCT data type definition optional ([SPARK-35706](https://issues.apache.org/jira/browse/SPARK-35706))
+* View should not capture configs used in RelationConversions ([SPARK-35792](https://issues.apache.org/jira/browse/SPARK-35792))
+* Append new nested struct fields rather than sort for unionByName with null filling ([SPARK-35290](https://issues.apache.org/jira/browse/SPARK-35290))
+* Use Void as the type name of NullType ([SPARK-36224](https://issues.apache.org/jira/browse/SPARK-36224))
+
+### Structured Streaming
+
+**Major feature**
+
+* Add RocksDB StateStore implementation ([SPARK-34198](https://issues.apache.org/jira/browse/SPARK-34198))
+* EventTime based sessionization (session window) ([SPARK-10816](https://issues.apache.org/jira/browse/SPARK-10816))
+* Upgrade Kafka client to 2.8.0 ([SPARK-33913](https://issues.apache.org/jira/browse/SPARK-33913))
+
+**Other Notable Changes**
+
+* Support user-defined initial state with flatMapGroupsWithState in Structured Streaming ([SPARK-35896](https://issues.apache.org/jira/browse/SPARK-35896))
+* Support initial state for flatMapGroupsWithState in batch mode ([SPARK-36132](https://issues.apache.org/jira/browse/SPARK-36132))
+* Introduce the strategy on mismatched offset for start offset timestamp on Kafka data source ([SPARK-35611](https://issues.apache.org/jira/browse/SPARK-35611))
+* Introduce a new Option in Kafka source to specify a minimum number of records to read per trigger ([SPARK-35312](https://issues.apache.org/jira/browse/SPARK-35312))
+* Include more granular metrics for stateful operators in StreamingQueryProgress ([SPARK-35896](https://issues.apache.org/jira/browse/SPARK-35896))
+* Expose source metrics via progress report and add Kafka use-case to report delay ([SPARK-34854](https://issues.apache.org/jira/browse/SPARK-34854))
+* Add config for ignoring metadata directory of FileStreamSink ([SPARK-35565](https://issues.apache.org/jira/browse/SPARK-35565))
+* Add latest offsets to source progress ([SPARK-33955](https://issues.apache.org/jira/browse/SPARK-33955))
+
+### PySpark
+
+**Project Zen**
+
+* Pandas API on Spark ([SPARK-34849](https://issues.apache.org/jira/browse/SPARK-34849))
+ * Enable mypy for pandas-on-Spark ([SPARK-34941](https://issues.apache.org/jira/browse/SPARK-34941))
+ * Implement CategoricalDtype support ([SPARK-35997](https://issues.apache.org/jira/browse/SPARK-35997), [SPARK-36185](https://issues.apache.org/jira/browse/SPARK-36185))
+ * Complete the basic operations of Series and Index ([SPARK-36103](https://issues.apache.org/jira/browse/SPARK-36103), [SPARK-36104](https://issues.apache.org/jira/browse/SPARK-36104), [SPARK-36192](https://issues.apache.org/jira/browse/SPARK-36192))
+ * Match behaviors to 1.3 pandas ([SPARK-36367](https://issues.apache.org/jira/browse/SPARK-36367))
+ * Match behaviours on Series with NaN to pandas ’([SPARK-36031](https://issues.apache.org/jira/browse/SPARK-36031), [SPARK-36310](https://issues.apache.org/jira/browse/SPARK-36310))
+ * Implement unary operator 'invert' of integral Series and Index ([SPARK-36003](https://issues.apache.org/jira/browse/SPARK-36003))
+ * Implement CategoricalIndex.map and DatetimeIndex.map ([SPARK-36470](https://issues.apache.org/jira/browse/SPARK-36470))
+ * Implement Index.map ([SPARK-36469](https://issues.apache.org/jira/browse/SPARK-36469))
+* faulthandler support for Python worker crashed ([SPARK-36062](https://issues.apache.org/jira/browse/SPARK-36062))
+* Use Snake naming rule across the function APIs ([SPARK-34306](https://issues.apache.org/jira/browse/SPARK-34306))
+* Enable spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled by default ([SPARK-35419](https://issues.apache.org/jira/browse/SPARK-35419))
+* Support to infer nested dict as a struct when creating a DataFrame ([SPARK-35929](https://issues.apache.org/jira/browse/SPARK-35929))
+
+**Other Notable Changes**
+
+* Enable pinned thread mode by default ([SPARK-35303](https://issues.apache.org/jira/browse/SPARK-35303))
+* Add NullType support for Arrow executions ([SPARK-33489](https://issues.apache.org/jira/browse/SPARK-33489))
+* Add Arrow self_destruct support to toPandas ([SPARK-32953](https://issues.apache.org/jira/browse/SPARK-32953))
+* Add thread target wrapper API for pyspark pin thread mode ([SPARK-35498](https://issues.apache.org/jira/browse/SPARK-35498))
+
+### MLLIB
+
+**Performance improvements**
+
+* BucketedRandomProjectionLSH transform optimization ([SPARK-34220](https://issues.apache.org/jira/browse/SPARK-34220))
+* w2v findSynonyms optimization ([SPARK-34189](https://issues.apache.org/jira/browse/SPARK-34189))
+* optimize sparse GEMM by skipping bound checking ([SPARK-35707](https://issues.apache.org/jira/browse/SPARK-35707))
+* Improve performance of ML ALS recommendForAll by GEMV ([SPARK-33518](https://issues.apache.org/jira/browse/SPARK-33518))
+
+**Model training improvements**
+
+* Refactor Logistic Aggregator - support virtual centering ([SPARK-34797](https://issues.apache.org/jira/browse/SPARK-34797))
+* Binary Logistic Regression with intercept support centering ([SPARK-34858](https://issues.apache.org/jira/browse/SPARK-34858), [SPARK-34448](https://issues.apache.org/jira/browse/SPARK-34448))
+* Multinomial Logistic Regression with intercept support centering ([SPARK-34860](https://issues.apache.org/jira/browse/SPARK-34860))
+
+**BLAS improvements**
+
+* Replace fully com.github.fommil.netlib by dev.ludovic.netlib:2.0 ([SPARK-35295](https://issues.apache.org/jira/browse/SPARK-35295))
+* Add a vectorized BLAS implementation ([SPARK-33882](https://issues.apache.org/jira/browse/SPARK-33882))
+* Accelerate fallback BLAS with dev.ludovic.netlib ([SPARK-35150](https://issues.apache.org/jira/browse/SPARK-35150))
+
+
+**Other Notable Changes**
+
+* OVR transform fix potential column conflict ([SPARK-34356](https://issues.apache.org/jira/browse/SPARK-34356))
+
+_Programming guide: [Machine Learning Library (MLlib) Guide](https://spark.apache.org/docs/3.2.0/ml-guide.html)._
+
+### SparkR
+
+* Use keyserver.ubuntu.com as a keyserver for CRAN ([SPARK-35885](https://issues.apache.org/jira/browse/SPARK-35885))
+
+_Programming guide: [SparkR (R on Spark)](https://spark.apache.org/docs/3.2.0/sparkr.html)._
+
+
+### GraphX
+
+* Allow to turn off the normalization applied by static PageRank utilities ([SPARK-35357](https://issues.apache.org/jira/browse/SPARK-35357))
+
+_Programming guide: [GraphX Programming Guide](https://spark.apache.org/docs/3.2.0/graphx-programming-guide.html)._
+
+### Deprecations and Removals
+
+* Deprecate spark.launcher.childConectionTimeout ([SPARK-33717](https://issues.apache.org/jira/browse/SPARK-33717))
+* deprecate GROUP BY ... GROUPING SETS (...) and promote GROUP BY GROUPING SETS (...) ([SPARK-34932](https://issues.apache.org/jira/browse/SPARK-34932))
+* Deprecate Python 3.6 in Spark documentation ([SPARK-35939](https://issues.apache.org/jira/browse/SPARK-35939))
+* Deprecate ps.broadcast API ([SPARK-35810](https://issues.apache.org/jira/browse/SPARK-35810))
+* Deprecate the num_files argument ([SPARK-35807](https://issues.apache.org/jira/browse/SPARK-35807))
+* Deprecate DataFrame.to_spark_io ([SPARK-35811](https://issues.apache.org/jira/browse/SPARK-35811))
+
+
+### Known Issues
+
+* Support fetching shuffle blocks in batch with i/o encryption ([SPARK-34827](https://issues.apache.org/jira/browse/SPARK-34827))
+* Fail to load Snappy codec ([SPARK-36681](https://issues.apache.org/jira/browse/SPARK-36681))
+* Can not insert into hive bucket table if table is created with an uppercase schema ([SPARK-35531](https://issues.apache.org/jira/browse/SPARK-35531))
+* Reading Hive view without explicit column names fails in Spark ([SPARK-36905](https://issues.apache.org/jira/browse/SPARK-36905))
+
+### Credits
+
+Last but not least, this release would not have been possible without the following contributors: Adam Binford, Ali Afroozeh, Alkis Polyzotis, Allison Wang, Almog Tavor, Amandeep Sharma, Ammar Al-Batool, Andrew Liu, Andy Grove, Ankur Dave, Anton Okolnychyi, Ashray Jain, Attila Zsolt Piros, Ayushi Agarwal, Baohe Zhang, Bo Zhang, Bruce Robbins, Byungsoo Oh, Carlos Peña, Cary Lee, Chandni Singh, Chao Sun, ChaoJun Zhang, Chendi Xue, Cheng Pan, Cheng Su, Chongguang LIU, Chris Thomas, Chris Wu, Daoyuan Wang, David Christle, David Li, David McWhorter, Denis Pyshev, Dereck Li, Dhruv Kumar, Dhruvil Dave, Dingyu Xu, Dominik Gehl, Dongdong Hong, Dongjoon Hyun, Dooyoung Hwang, Duc Hoa Nguyen, Emil Ejbyfeldt, Enzo Bonnal, Erik Krogen, Eugene Koifman, Fabian A.J. Thiele, Fokko Driesprong, Fu Chen, Gabor Somogyi, Gabriele Nizzoli, Gengliang Wang, Gera Shegalov, Gidon Gershinsky, Guangxin Wang, Haejoon Lee, Haiyang Sun, Han, Harsh Panchal, He Qi, Hector Zhang, Holden Karau, Hopefulnick, Huaxin Gao, Hyukjin Kwon, Ionut Boicu, Ismaël Mejía, Ivan Sadikov, Jarek Potiuk, Jason Yarbrough, Jiaan Geng, Jie Hu, Jose Torres, Josh Rosen, Josh Soref, Julien Lafaye, Jungtaek Lim, Kaifei Yi, Kamil Breguła, Karen Feng, Karuppayya Rajendran, Kazuyuki Tanimura, Ke Jia, Keerthan Vasist, Kent Yao, Kevin Pis, Kevin Su, Koert Kuipers, Kousuke Saruta, Kun Wan, Kunlun Huang, Leanken Lin, Lei Peng, Leona Yoda, Li Zhang, Liang-Chi Hsieh, Lidiya Nixon, Linhong Liu, Lipeng Zhu, Luca Canali, Ludovic Henry, Luka Sturtewagen, Lukas Rytz, Luran He, Maciej Szymkiewicz, Marios Meimaris, Maryann Xue, Matthew Powers, Max Gekk, Maya Anderson, Michael Chen, Michael Zhang, Min Shen, Minchu Yang, Mohanad Elsafty, Nicholas Marion, Ohad Raviv, Pablo Langa, Pawel Ptaszynski, Peter Toth, Phillip Henry, Prakhar Jain, Qi Liu, Qi Zhu, Qilong SU, Qingbo Jiao, Quang-Huy Nguyen, Rahul Mahadev, Raza Jafri, Richard Chen, Richard Penney, Rongchuan Jin, Rui Zeng, Ruifeng Zheng, Ryan Blue, Sajith Ariyarathna, Samuel Moseley, Sanket Reddy, Satish Gopalani, Saurabh Chawla, Sean Owen, Serge Rielau, Shahid K I, Shaoyun Chen, Shardul Mahadik, Shiqi Sun, Shixiong Zhu, Steve Loughran, Steven Aerts, Sumeet Gajjar, Swinky Mann, Takeshi Yamamuro, Takuya UESHIN, Tanel Kiis, Tathagata Das, Tengfei Huang, Terry Kim, Tianhan Hu, Tianhua Huang, Tim Armstrong, Tobias Hermann, Tom Van Bussel, Tomas Pereira De Vasconcelos, Twoentartian, Vasily Kolpakov, Venkata Krishnan Sowrirajan, Venkata Sai Akhil Gudesa, Venki Korukanti, Viettel DGD, Vinod KC, Vlad Glinsky, Walid Gara, Wan Kun, Weichen Xu, Wenchen Fan, William Hyun, Xiao Li, Xiduo You, Xingbo Jiang, Xinrong Meng, XiuLi Wei, Xuedong Luan, Yajun Gao, Yang He, Yang Jie, Yazhi Wang, Ye Zhou, Yi Wu, Yi Zhu, Yijia Cui, Yikun Jiang, Yingyi Bu, Yu Zhong, Yuanjian Li, Yuchen Huo, Yuming Wang, Yuto Akutsu, Zebing Lin, Zhang Xingchao, Zhichao Zhang
+
diff --git a/site/committers.html b/site/committers.html
index 3e885926dca..ad5d857097c 100644
--- a/site/committers.html
+++ b/site/committers.html
@@ -84,7 +84,7 @@
Documentation
diff --git a/site/docs/latest b/site/docs/latest
index 6ebad148881..a4f52a5dbb5 120000
--- a/site/docs/latest
+++ b/site/docs/latest
@@ -1 +1 @@
-3.1.2
\ No newline at end of file
+3.2.0
\ No newline at end of file
diff --git a/site/documentation.html b/site/documentation.html
index 8ee73afac33..4d0a12d7c89 100644
--- a/site/documentation.html
+++ b/site/documentation.html
@@ -84,7 +84,7 @@
Documentation
Apache Spark 3.2.0 is the third release of the 3.x line. With tremendous contribution from the open-source community, this release managed to resolve in excess of 1,700 Jira tickets.
+
+
In this release, Spark supports the Pandas API layer on Spark. Pandas users can scale out their applications on Spark with one line code change. Other major updates include RocksDB StateStore support, session window support, push-based shuffle support, ANSI SQL INTERVAL types, enabling Adaptive Query Execution (AQE) by default, and ANSI SQL mode GA.
+
+
To download Apache Spark 3.2.0, visit the downloads page. You can consult JIRA for the detailed changes. We have curated a list of high level changes here, grouped by major modules.
Keep necessary stats after partition pruning (SPARK-34119)
+
Decouple bucket filter pruning and bucket table scan (SPARK-32985)
+
+
+
Query execution
+
+
Adaptive query execution
+
+
Enable adaptive query execution by default (SPARK-33679)
+
Support Dynamic Partition Pruning (DPP) in AQE when the join is broadcast hash join at the beginning or there is no reused broadcast exchange (SPARK-34168, SPARK-35710)
+
Optimize skew join before coalescing shuffle partitions (SPARK-35447)
+
Support AQE side shuffled hash join formula using rule (SPARK-35282)
+
Support AQE side broadcast hash join threshold (SPARK-35264)
+
Allow custom plugin for AQE cost evaluator (SPARK-35794)
Can not insert into hive bucket table if table is created with an uppercase schema (SPARK-35531)
Reading Hive view without explicit column names fails in Spark (SPARK-36905)
+
Job cancellation causes py4j errors on Jupyter due to pinned thread mode (SPARK-37004)
Credits
From dd8e8dd42990fbcb1bcb5e7dd4bb3d74e2ed0397 Mon Sep 17 00:00:00 2001
From: Gengliang Wang
Date: Mon, 18 Oct 2021 12:52:02 +0800
Subject: [PATCH 05/12] address comments
---
releases/_posts/2021-10-13-spark-release-3-2-0.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/releases/_posts/2021-10-13-spark-release-3-2-0.md b/releases/_posts/2021-10-13-spark-release-3-2-0.md
index 5958b103b2b..c5d3287833d 100644
--- a/releases/_posts/2021-10-13-spark-release-3-2-0.md
+++ b/releases/_posts/2021-10-13-spark-release-3-2-0.md
@@ -39,6 +39,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Support for ANSI SQL INTERVAL types ([SPARK-27790](https://issues.apache.org/jira/browse/SPARK-27790))
* New type coercion syntax rules in ANSI mode ([SPARK-34246](https://issues.apache.org/jira/browse/SPARK-34246))
+* Support LATERAL subqueries ([SPARK-34382](https://issues.apache.org/jira/browse/SPARK-34382))
* ANSI mode: IntegralDivide throws exception on overflow ([SPARK-35152](https://issues.apache.org/jira/browse/SPARK-35152))
* ANSI mode: Check for overflow in Average ([SPARK-35955](https://issues.apache.org/jira/browse/SPARK-35955))
* Block count(table.*) to follow ANSI standard and other SQL engines ([SPARK-34199](https://issues.apache.org/jira/browse/SPARK-34199))
@@ -140,8 +141,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
**Feature Enhancements**
-* Subquery
- * Support LATERAL subqueries ([SPARK-34382](https://issues.apache.org/jira/browse/SPARK-34382))
+* Subquery improvements
* Improve correlated subqueries ([SPARK-35553](https://issues.apache.org/jira/browse/SPARK-35553))
* Allow non-aggregated single row correlated scalar subquery ([SPARK-28379](https://issues.apache.org/jira/browse/SPARK-28379))
* Only allow a subset of correlated equality predicates when a subquery is aggregated ([SPARK-35080](https://issues.apache.org/jira/browse/SPARK-35080))
@@ -162,7 +162,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Support Fallback Storage Cleanup during stopping SparkContext ([SPARK-34142](https://issues.apache.org/jira/browse/SPARK-34142))
* Support Java enums from Scala Dataset API ([SPARK-23862](https://issues.apache.org/jira/browse/SPARK-23862))
* ADD JAR with ivy coordinates should be compatible with Hive transitive behavior ([SPARK-34506](https://issues.apache.org/jira/browse/SPARK-34506))
-* Support ADD ARCHIVE and LIST ARCHIVES command ([SPARK-34603](https://issues.apache.org/jira/browse/SPARK-34603))
+* Support ADD ARCHIVE and LIST ARCHIVES commands ([SPARK-34603](https://issues.apache.org/jira/browse/SPARK-34603))
* Support multiple paths for ADD FILE/JAR/ARCHIVE commands ([SPARK-35105](https://issues.apache.org/jira/browse/SPARK-35105))
* Support archive files as resources for CREATE FUNCTION USING syntax ([SPARK-35236](https://issues.apache.org/jira/browse/SPARK-35236))
* Loading SparkSessionExtensions from ServiceLoader ([SPARK-35380](https://issues.apache.org/jira/browse/SPARK-35380))
@@ -172,7 +172,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Support creating tables with null column ([SPARK-36241](https://issues.apache.org/jira/browse/SPARK-36241))
* Propagate reason for exec loss to Web UI ([SPARK-34764](https://issues.apache.org/jira/browse/SPARK-34764))
* Avoid inlining non-deterministic With-CTEs ([SPARK-36447](https://issues.apache.org/jira/browse/SPARK-36447))
-* Support analyze all tables in a specific database ([SPARK-33687](https://issues.apache.org/jira/browse/SPARK-33687))
+* Support analyzing all tables in a specific database ([SPARK-33687](https://issues.apache.org/jira/browse/SPARK-33687))
* Standardize exception messages in Spark ([SPARK-33539](https://issues.apache.org/jira/browse/SPARK-33539))
* Support (IGNORE | RESPECT) NULLS for LEAD/LAG/NTH_VALUE/FIRST_VALUE/LAST_VALUE ([SPARK-30789](https://issues.apache.org/jira/browse/SPARK-30789))
From eba122a033778555727ecc190988e23f3678d02d Mon Sep 17 00:00:00 2001
From: Gengliang Wang
Date: Mon, 18 Oct 2021 13:21:02 +0800
Subject: [PATCH 06/12] revise wordings
---
.../_posts/2021-10-13-spark-release-3-2-0.md | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/releases/_posts/2021-10-13-spark-release-3-2-0.md b/releases/_posts/2021-10-13-spark-release-3-2-0.md
index c5d3287833d..04e2faf32df 100644
--- a/releases/_posts/2021-10-13-spark-release-3-2-0.md
+++ b/releases/_posts/2021-10-13-spark-release-3-2-0.md
@@ -40,7 +40,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Support for ANSI SQL INTERVAL types ([SPARK-27790](https://issues.apache.org/jira/browse/SPARK-27790))
* New type coercion syntax rules in ANSI mode ([SPARK-34246](https://issues.apache.org/jira/browse/SPARK-34246))
* Support LATERAL subqueries ([SPARK-34382](https://issues.apache.org/jira/browse/SPARK-34382))
-* ANSI mode: IntegralDivide throws exception on overflow ([SPARK-35152](https://issues.apache.org/jira/browse/SPARK-35152))
+* ANSI mode: IntegralDivide throws an exception on overflow ([SPARK-35152](https://issues.apache.org/jira/browse/SPARK-35152))
* ANSI mode: Check for overflow in Average ([SPARK-35955](https://issues.apache.org/jira/browse/SPARK-35955))
* Block count(table.*) to follow ANSI standard and other SQL engines ([SPARK-34199](https://issues.apache.org/jira/browse/SPARK-34199))
@@ -54,9 +54,9 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Remove redundant aggregates in the Optimizer ([SPARK-33122](https://issues.apache.org/jira/browse/SPARK-33122))
* Push down limit through Project with Join ([SPARK-34622](https://issues.apache.org/jira/browse/SPARK-34622))
* Push down limit for LEFT SEMI and LEFT ANTI join ([SPARK-36404](https://issues.apache.org/jira/browse/SPARK-36404), [SPARK-34514](https://issues.apache.org/jira/browse/SPARK-34514))
- * Push down limit through window when partitionSpec is empty ([SPARK-34575](https://issues.apache.org/jira/browse/SPARK-34575))
+ * Push down limit through WINDOW when partition spec is empty ([SPARK-34575](https://issues.apache.org/jira/browse/SPARK-34575))
* Use a relative cost comparison function in the CBO ([SPARK-34922](https://issues.apache.org/jira/browse/SPARK-34922))
- * Cardinality estimation of union, sort and range operator ([SPARK-33411](https://issues.apache.org/jira/browse/SPARK-33411))
+ * Cardinality estimation of union, sort, and range operator ([SPARK-33411](https://issues.apache.org/jira/browse/SPARK-33411))
* Only push down LeftSemi/LeftAnti over Aggregate if join can be planned as broadcast join ([SPARK-34081](https://issues.apache.org/jira/browse/SPARK-34081))
* UnwrapCastInBinaryComparison support In/InSet predicate ([SPARK-35316](https://issues.apache.org/jira/browse/SPARK-35316))
* Subexpression elimination enhancements ([SPARK-35448](https://issues.apache.org/jira/browse/SPARK-35448))
@@ -71,7 +71,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Support AQE side broadcast hash join threshold ([SPARK-35264](https://issues.apache.org/jira/browse/SPARK-35264))
* Allow custom plugin for AQE cost evaluator ([SPARK-35794](https://issues.apache.org/jira/browse/SPARK-35794))
* Enable Zstandard buffer pool by default ([SPARK-34340](https://issues.apache.org/jira/browse/SPARK-34340), [SPARK-34390](https://issues.apache.org/jira/browse/SPARK-34390))
- * Add code-gen for all join types of sort merge join ([SPARK-34705](https://issues.apache.org/jira/browse/SPARK-34705))
+ * Add code-gen for all join types of sort-merge join ([SPARK-34705](https://issues.apache.org/jira/browse/SPARK-34705))
* Whole plan exchange and subquery reuse ([SPARK-29375](https://issues.apache.org/jira/browse/SPARK-29375))
* Broadcast nested loop join improvement ([SPARK-34706](https://issues.apache.org/jira/browse/SPARK-34706))
* Support two levels of hash maps for final hash aggregation ([SPARK-35141](https://issues.apache.org/jira/browse/SPARK-35141))
@@ -134,7 +134,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Add interfaces to pass the required sorting and clustering for writes ([SPARK-23889](https://issues.apache.org/jira/browse/SPARK-23889))
* Support metrics from Datasource v2 scan ([SPARK-34338](https://issues.apache.org/jira/browse/SPARK-34338))
* Support metrics at writing path ([SPARK-36030](https://issues.apache.org/jira/browse/SPARK-36030))
-* Support partitioning with static number on the required distribution and ordering on write ([SPARK-34255](https://issues.apache.org/jira/browse/SPARK-34255))
+* Support partitioning with a static number on the required distribution and ordering on write ([SPARK-34255](https://issues.apache.org/jira/browse/SPARK-34255))
* Support Dynamic filtering ([SPARK-35779](https://issues.apache.org/jira/browse/SPARK-35779))
* Support LocalScan ([SPARK-35535](https://issues.apache.org/jira/browse/SPARK-35535))
* MERGE ... UPDATE/INSERT * should do by-name resolution ([SPARK-34720](https://issues.apache.org/jira/browse/SPARK-34720))
@@ -189,18 +189,18 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Keep dependants cached while refreshing v1 tables ([SPARK-34138](https://issues.apache.org/jira/browse/SPARK-34138))
* Make BlockManagerMaster driver heartbeat timeout configurable ([SPARK-34278](https://issues.apache.org/jira/browse/SPARK-34278))
* io.file.buffer.size set by spark.buffer.size will override by loading hive-site.xml accidentally may cause perf regression ([SPARK-34346](https://issues.apache.org/jira/browse/SPARK-34346))
-* Make shuffle service name configurable on client-side and allow for classpath-based config override on server side ([SPARK-34828](https://issues.apache.org/jira/browse/SPARK-34828))
+* Make shuffle service name configurable on client-side and allow for classpath-based config override on the server side ([SPARK-34828](https://issues.apache.org/jira/browse/SPARK-34828))
* ExecutorMetricsPoller should keep stage entry in stageTCMP until a heartbeat occurs ([SPARK-34779](https://issues.apache.org/jira/browse/SPARK-34779))
* Replace if with filter clause in RewriteDistinctAggregates ([SPARK-34882](https://issues.apache.org/jira/browse/SPARK-34882))
* CREATE TABLE LIKE should respect the reserved table properties ([SPARK-34935](https://issues.apache.org/jira/browse/SPARK-34935))
* Fix nested column pruning for extracting case-insensitive struct field from array of struct ([SPARK-34963](https://issues.apache.org/jira/browse/SPARK-34963))
-* Ship ivySettings file to driver in YARN cluster mode ([SPARK-34472](https://issues.apache.org/jira/browse/SPARK-34472))
+* Ship ivySettings file to the Driver in YARN cluster mode ([SPARK-34472](https://issues.apache.org/jira/browse/SPARK-34472))
* Resolve duplicated common columns from USING/NATURAL JOIN ([SPARK-34527](https://issues.apache.org/jira/browse/SPARK-34527))
* Invoke should find the method with correct number of parameters ([SPARK-35278](https://issues.apache.org/jira/browse/SPARK-35278))
* Don't optimize out grouping expressions from aggregate expressions without aggregate function ([SPARK-34581](https://issues.apache.org/jira/browse/SPARK-34581))
-* Support resolving missing attrs for distribute/cluster by/repartition hint ([SPARK-35331](https://issues.apache.org/jira/browse/SPARK-35331))
+* Support resolving missing attributes for distribute/cluster by/repartition hint ([SPARK-35331](https://issues.apache.org/jira/browse/SPARK-35331))
* Improve s3a magic committer support by inferring missing configs ([SPARK-35383](https://issues.apache.org/jira/browse/SPARK-35383))
-* Avoid failing rename caused by destination directory not exist ([SPARK-35106](https://issues.apache.org/jira/browse/SPARK-35106))
+* Avoid file rename failure caused by destination directory not exist ([SPARK-35106](https://issues.apache.org/jira/browse/SPARK-35106))
* BlockManagerMasterEndpoint should not ignore index-only shuffle file during updating ([SPARK-35589](https://issues.apache.org/jira/browse/SPARK-35589))
* Redact the sensitive info in the result of Set command ([SPARK-35576](https://issues.apache.org/jira/browse/SPARK-35576))
* Update state schema to be able to accept long length JSON([SPARK-35602](https://issues.apache.org/jira/browse/SPARK-35602))
@@ -251,7 +251,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Enable pinned thread mode by default ([SPARK-35303](https://issues.apache.org/jira/browse/SPARK-35303))
* Add NullType support for Arrow executions ([SPARK-33489](https://issues.apache.org/jira/browse/SPARK-33489))
* Add Arrow self_destruct support to toPandas ([SPARK-32953](https://issues.apache.org/jira/browse/SPARK-32953))
-* Add thread target wrapper API for pyspark pin thread mode ([SPARK-35498](https://issues.apache.org/jira/browse/SPARK-35498))
+* Add thread target wrapper API for PySpark pin thread mode ([SPARK-35498](https://issues.apache.org/jira/browse/SPARK-35498))
### MLLIB
From f53518c6e668f684267a914cc716223bd9c18591 Mon Sep 17 00:00:00 2001
From: Gengliang Wang
Date: Mon, 18 Oct 2021 13:27:58 +0800
Subject: [PATCH 07/12] generate html
---
site/releases/spark-release-3-2-0.html | 28 +++++++++++++-------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/site/releases/spark-release-3-2-0.html b/site/releases/spark-release-3-2-0.html
index 4c9f48c805f..b26f54c8032 100644
--- a/site/releases/spark-release-3-2-0.html
+++ b/site/releases/spark-release-3-2-0.html
@@ -187,7 +187,8 @@
Support analyze all tables in a specific database (SPARK-33687)
+
Support analyzing all tables in a specific database (SPARK-33687)
Standardize exception messages in Spark (SPARK-33539)
@@ -397,18 +397,18 @@
Core and Spark SQL
Keep dependants cached while refreshing v1 tables (SPARK-34138)
Make BlockManagerMaster driver heartbeat timeout configurable (SPARK-34278)
io.file.buffer.size set by spark.buffer.size will override by loading hive-site.xml accidentally may cause perf regression (SPARK-34346)
-
Make shuffle service name configurable on client-side and allow for classpath-based config override on server side (SPARK-34828)
+
Make shuffle service name configurable on client-side and allow for classpath-based config override on the server side (SPARK-34828)
ExecutorMetricsPoller should keep stage entry in stageTCMP until a heartbeat occurs (SPARK-34779)
Replace if with filter clause in RewriteDistinctAggregates (SPARK-34882)
CREATE TABLE LIKE should respect the reserved table properties (SPARK-34935)
Fix nested column pruning for extracting case-insensitive struct field from array of struct (SPARK-34963)
-
Ship ivySettings file to driver in YARN cluster mode (SPARK-34472)
+
Ship ivySettings file to the Driver in YARN cluster mode (SPARK-34472)
Resolve duplicated common columns from USING/NATURAL JOIN (SPARK-34527)
Invoke should find the method with correct number of parameters (SPARK-35278)
Don’t optimize out grouping expressions from aggregate expressions without aggregate function (SPARK-34581)
-
Support resolving missing attrs for distribute/cluster by/repartition hint (SPARK-35331)
+
Support resolving missing attributes for distribute/cluster by/repartition hint (SPARK-35331)
Improve s3a magic committer support by inferring missing configs (SPARK-35383)
-
Avoid failing rename caused by destination directory not exist (SPARK-35106)
+
Avoid file rename failure caused by destination directory not exist (SPARK-35106)
BlockManagerMasterEndpoint should not ignore index-only shuffle file during updating (SPARK-35589)
Redact the sensitive info in the result of Set command (SPARK-35576)
Update state schema to be able to accept long length JSON(SPARK-35602)
@@ -470,7 +470,7 @@
PySpark
Enable pinned thread mode by default (SPARK-35303)
Add NullType support for Arrow executions (SPARK-33489)
Add Arrow self_destruct support to toPandas (SPARK-32953)
-
Add thread target wrapper API for pyspark pin thread mode (SPARK-35498)
+
Add thread target wrapper API for PySpark pin thread mode (SPARK-35498)
MLLIB
From 5b1e3e9c5e8f429dda77fd87d49003f2708c260f Mon Sep 17 00:00:00 2001
From: Gengliang Wang
Date: Mon, 18 Oct 2021 15:00:37 +0800
Subject: [PATCH 08/12] address review comments
---
releases/_posts/2021-10-13-spark-release-3-2-0.md | 10 +++++-----
site/releases/spark-release-3-2-0.html | 12 ++++++------
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/releases/_posts/2021-10-13-spark-release-3-2-0.md b/releases/_posts/2021-10-13-spark-release-3-2-0.md
index 04e2faf32df..3038932d05d 100644
--- a/releases/_posts/2021-10-13-spark-release-3-2-0.md
+++ b/releases/_posts/2021-10-13-spark-release-3-2-0.md
@@ -82,7 +82,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Parquet
* Upgrade Apache Parquet used to version 1.12.1 ([SPARK-36726](https://issues.apache.org/jira/browse/SPARK-36726))
- * Parquet vectorized reader support column index ([SPARK-34289](https://issues.apache.org/jira/browse/SPARK-34289))
+ * Support column index in Parquet vectorized reader ([SPARK-34289](https://issues.apache.org/jira/browse/SPARK-34289))
* Add new parquet data source options to control datetime rebasing in read ([SPARK-34377](https://issues.apache.org/jira/browse/SPARK-34377))
* Read parquet unsigned types that are stored as int32 physical type in parquet ([SPARK-34817](https://issues.apache.org/jira/browse/SPARK-34817))
* Read Parquet unsigned int64 logical type that stored as signed int64 physical type to decimal(20, 0) ([SPARK-34786](https://issues.apache.org/jira/browse/SPARK-34786))
@@ -178,10 +178,10 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
**Other Notable Changes**
-* Monitor
+* Monitoring
* New metrics to ExternalShuffleService ([SPARK-35258](https://issues.apache.org/jira/browse/SPARK-35258))
* Add new stage-level REST APIs and parameters ([SPARK-26399](https://issues.apache.org/jira/browse/SPARK-26399))
- * Support task and executor Metrics Distributions in the REST API ([SPARK-34488](https://issues.apache.org/jira/browse/SPARK-34488))
+ * Support task and executor Metrics Distributions in the REST API ([SPARK-34488](https://issues.apache.org/jira/browse/SPARK-34488))
* Add fallback metrics for hash aggregate ([SPARK-35529](https://issues.apache.org/jira/browse/SPARK-35529), [SPARK-34237](https://issues.apache.org/jira/browse/SPARK-34237))
* Add count_distinct as an option to Dataset#summary ([SPARK-34165](https://issues.apache.org/jira/browse/SPARK-34165))
* Introduce SQLSTATE and ERRORCODE to SQL Exception ([SPARK-34920](https://issues.apache.org/jira/browse/SPARK-34920))
@@ -236,7 +236,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Enable mypy for pandas-on-Spark ([SPARK-34941](https://issues.apache.org/jira/browse/SPARK-34941))
* Implement CategoricalDtype support ([SPARK-35997](https://issues.apache.org/jira/browse/SPARK-35997), [SPARK-36185](https://issues.apache.org/jira/browse/SPARK-36185))
* Complete the basic operations of Series and Index ([SPARK-36103](https://issues.apache.org/jira/browse/SPARK-36103), [SPARK-36104](https://issues.apache.org/jira/browse/SPARK-36104), [SPARK-36192](https://issues.apache.org/jira/browse/SPARK-36192))
- * Match behaviors to 1.3 pandas ([SPARK-36367](https://issues.apache.org/jira/browse/SPARK-36367))
+ * Match behaviors to pandas 1.3 ([SPARK-36367](https://issues.apache.org/jira/browse/SPARK-36367))
* Match behaviours on Series with NaN to pandas ’([SPARK-36031](https://issues.apache.org/jira/browse/SPARK-36031), [SPARK-36310](https://issues.apache.org/jira/browse/SPARK-36310))
* Implement unary operator 'invert' of integral Series and Index ([SPARK-36003](https://issues.apache.org/jira/browse/SPARK-36003))
* Implement CategoricalIndex.map and DatetimeIndex.map ([SPARK-36470](https://issues.apache.org/jira/browse/SPARK-36470))
@@ -253,7 +253,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Add Arrow self_destruct support to toPandas ([SPARK-32953](https://issues.apache.org/jira/browse/SPARK-32953))
* Add thread target wrapper API for PySpark pin thread mode ([SPARK-35498](https://issues.apache.org/jira/browse/SPARK-35498))
-### MLLIB
+### MLlib
**Performance improvements**
diff --git a/site/releases/spark-release-3-2-0.html b/site/releases/spark-release-3-2-0.html
index b26f54c8032..f4bfe71e87e 100644
--- a/site/releases/spark-release-3-2-0.html
+++ b/site/releases/spark-release-3-2-0.html
@@ -158,7 +158,7 @@
Implement unary operator ‘invert’ of integral Series and Index (SPARK-36003)
Implement CategoricalIndex.map and DatetimeIndex.map (SPARK-36470)
@@ -473,7 +473,7 @@
PySpark
Add thread target wrapper API for PySpark pin thread mode (SPARK-35498)
-
MLLIB
+
MLlib
Performance improvements
From ece00d0953802f5c6835e1de4d0efa169578d6e8 Mon Sep 17 00:00:00 2001
From: Gengliang Wang
Date: Mon, 18 Oct 2021 17:09:02 +0800
Subject: [PATCH 09/12] address comments
---
releases/_posts/2021-10-13-spark-release-3-2-0.md | 3 +--
site/releases/spark-release-3-2-0.html | 3 +--
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/releases/_posts/2021-10-13-spark-release-3-2-0.md b/releases/_posts/2021-10-13-spark-release-3-2-0.md
index 3038932d05d..644843a8f00 100644
--- a/releases/_posts/2021-10-13-spark-release-3-2-0.md
+++ b/releases/_posts/2021-10-13-spark-release-3-2-0.md
@@ -74,7 +74,6 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Add code-gen for all join types of sort-merge join ([SPARK-34705](https://issues.apache.org/jira/browse/SPARK-34705))
* Whole plan exchange and subquery reuse ([SPARK-29375](https://issues.apache.org/jira/browse/SPARK-29375))
* Broadcast nested loop join improvement ([SPARK-34706](https://issues.apache.org/jira/browse/SPARK-34706))
- * Support two levels of hash maps for final hash aggregation ([SPARK-35141](https://issues.apache.org/jira/browse/SPARK-35141))
* Allow concurrent writers for writing dynamic partitions and bucket table ([SPARK-26164](https://issues.apache.org/jira/browse/SPARK-26164))
* Improve performance of processing FETCH_PRIOR in Spark Thrift server ([SPARK-33655](https://issues.apache.org/jira/browse/SPARK-33655))
@@ -106,7 +105,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* CSV
* Upgrade univocity-parsers to 2.9.1 ([SPARK-33940](https://issues.apache.org/jira/browse/SPARK-33940))
* JDBC
- * Represent JDBC Time type as Integer in milliseconds ([SPARK-33888](https://issues.apache.org/jira/browse/SPARK-33888))
+ * Map JDBC SQL TIME type to TimestampType with time portion fixed regardless of timezone ([SPARK-34357](https://issues.apache.org/jira/browse/SPARK-34357))
* Calculate more precise partition stride in JDBCRelation ([SPARK-34843](https://issues.apache.org/jira/browse/SPARK-34843))
* Support refreshKrb5Config option in JDBC data sources ([SPARK-35226](https://issues.apache.org/jira/browse/SPARK-35226))
* Hive Metastore support filter by NOT IN ([SPARK-34538](https://issues.apache.org/jira/browse/SPARK-34538))
diff --git a/site/releases/spark-release-3-2-0.html b/site/releases/spark-release-3-2-0.html
index f4bfe71e87e..d0cede5eeeb 100644
--- a/site/releases/spark-release-3-2-0.html
+++ b/site/releases/spark-release-3-2-0.html
@@ -234,7 +234,6 @@
Core and Spark SQL
Add code-gen for all join types of sort-merge join (SPARK-34705)
Whole plan exchange and subquery reuse (SPARK-29375)
Make shuffle service name configurable on client-side and allow for classpath-based config override on the server side (SPARK-34828)
ExecutorMetricsPoller should keep stage entry in stageTCMP until a heartbeat occurs (SPARK-34779)
Replace if with filter clause in RewriteDistinctAggregates (SPARK-34882)
@@ -542,6 +543,7 @@
Known Issues
Can not insert into hive bucket table if table is created with an uppercase schema (SPARK-35531)
Reading Hive view without explicit column names fails in Spark (SPARK-36905)
Job cancellation causes py4j errors on Jupyter due to pinned thread mode (SPARK-37004)
+
Driver mistakenly reports removed executors as alive in UI (SPARK-35011)
Credits
From e79d8c6ebfcaa14d2f722585d7394f1ed5fa0efd Mon Sep 17 00:00:00 2001
From: Gengliang Wang
Date: Tue, 19 Oct 2021 00:19:59 +0800
Subject: [PATCH 12/12] address comments
---
.../_posts/2021-10-13-spark-release-3-2-0.md | 8 +++---
site/releases/spark-release-3-2-0.html | 26 +++++++++----------
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/releases/_posts/2021-10-13-spark-release-3-2-0.md b/releases/_posts/2021-10-13-spark-release-3-2-0.md
index 874610e7998..e13d3000791 100644
--- a/releases/_posts/2021-10-13-spark-release-3-2-0.md
+++ b/releases/_posts/2021-10-13-spark-release-3-2-0.md
@@ -23,12 +23,12 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
### Highlights
* Support Pandas API layer on PySpark ([SPARK-34849](https://issues.apache.org/jira/browse/SPARK-34849))
+* Enable adaptive query execution by default ([SPARK-33679](https://issues.apache.org/jira/browse/SPARK-33679))
* Support push-based shuffle to improve shuffle efficiency ([SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602))
* Add RocksDB StateStore implementation ([SPARK-34198](https://issues.apache.org/jira/browse/SPARK-34198))
* EventTime based sessionization (session window) ([SPARK-10816](https://issues.apache.org/jira/browse/SPARK-10816))
* ANSI SQL mode GA ([SPARK-35030](https://issues.apache.org/jira/browse/SPARK-35030))
* Support for ANSI SQL INTERVAL types ([SPARK-27790](https://issues.apache.org/jira/browse/SPARK-27790))
-* Enable adaptive query execution by default ([SPARK-33679](https://issues.apache.org/jira/browse/SPARK-33679))
* Query compilation latency reduction ([SPARK-35042](https://issues.apache.org/jira/browse/SPARK-35042), [SPARK-35103](https://issues.apache.org/jira/browse/SPARK-35103), [SPARK-34989](https://issues.apache.org/jira/browse/SPARK-34989))
* Support Scala 2.13 ([SPARK-34218](https://issues.apache.org/jira/browse/SPARK-34218))
@@ -43,6 +43,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* ANSI mode: IntegralDivide throws an exception on overflow ([SPARK-35152](https://issues.apache.org/jira/browse/SPARK-35152))
* ANSI mode: Check for overflow in Average ([SPARK-35955](https://issues.apache.org/jira/browse/SPARK-35955))
* Block count(table.*) to follow ANSI standard and other SQL engines ([SPARK-34199](https://issues.apache.org/jira/browse/SPARK-34199))
+* Support (IGNORE | RESPECT) NULLS for LEAD/LAG/NTH_VALUE/FIRST_VALUE/LAST_VALUE ([SPARK-30789](https://issues.apache.org/jira/browse/SPARK-30789))
**Performance**
@@ -169,11 +170,9 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Extend the function of decode so as consistent with mainstream databases ([SPARK-33527](https://issues.apache.org/jira/browse/SPARK-33527))
* Apply spark.sql.hive.metastorePartitionPruning for non-Hive tables that uses Hive metastore for partition management ([SPARK-36128](https://issues.apache.org/jira/browse/SPARK-36128))
* Support creating tables with null column ([SPARK-36241](https://issues.apache.org/jira/browse/SPARK-36241))
-* Propagate reason for exec loss to Web UI ([SPARK-34764](https://issues.apache.org/jira/browse/SPARK-34764))
* Avoid inlining non-deterministic With-CTEs ([SPARK-36447](https://issues.apache.org/jira/browse/SPARK-36447))
* Support analyzing all tables in a specific database ([SPARK-33687](https://issues.apache.org/jira/browse/SPARK-33687))
* Standardize exception messages in Spark ([SPARK-33539](https://issues.apache.org/jira/browse/SPARK-33539))
-* Support (IGNORE | RESPECT) NULLS for LEAD/LAG/NTH_VALUE/FIRST_VALUE/LAST_VALUE ([SPARK-30789](https://issues.apache.org/jira/browse/SPARK-30789))
**Other Notable Changes**
@@ -182,6 +181,7 @@ To download Apache Spark 3.2.0, visit the [downloads](https://spark.apache.org/d
* Add new stage-level REST APIs and parameters ([SPARK-26399](https://issues.apache.org/jira/browse/SPARK-26399))
* Support task and executor Metrics Distributions in the REST API ([SPARK-34488](https://issues.apache.org/jira/browse/SPARK-34488))
* Add fallback metrics for hash aggregate ([SPARK-35529](https://issues.apache.org/jira/browse/SPARK-35529), [SPARK-34237](https://issues.apache.org/jira/browse/SPARK-34237))
+ * Propagate reason for exec loss to Web UI ([SPARK-34764](https://issues.apache.org/jira/browse/SPARK-34764))
* Add count_distinct as an option to Dataset#summary ([SPARK-34165](https://issues.apache.org/jira/browse/SPARK-34165))
* Introduce SQLSTATE and ERRORCODE to SQL Exception ([SPARK-34920](https://issues.apache.org/jira/browse/SPARK-34920))
* Implement ScriptTransform in sql/core ([SPARK-31936](https://issues.apache.org/jira/browse/SPARK-31936))
@@ -307,7 +307,7 @@ _Programming guide: [GraphX Programming Guide](https://spark.apache.org/docs/3.2
### Known Issues
* Support fetching shuffle blocks in batch with i/o encryption ([SPARK-34827](https://issues.apache.org/jira/browse/SPARK-34827))
-* Fail to load Snappy codec ([SPARK-36681](https://issues.apache.org/jira/browse/SPARK-36681))
+* Fail to load Snappy codec for sequence file I/O ([SPARK-36681](https://issues.apache.org/jira/browse/SPARK-36681))
* Can not insert into hive bucket table if table is created with an uppercase schema ([SPARK-35531](https://issues.apache.org/jira/browse/SPARK-35531))
* Reading Hive view without explicit column names fails in Spark ([SPARK-36905](https://issues.apache.org/jira/browse/SPARK-36905))
* Job cancellation causes py4j errors on Jupyter due to pinned thread mode ([SPARK-37004](https://issues.apache.org/jira/browse/SPARK-37004))
diff --git a/site/releases/spark-release-3-2-0.html b/site/releases/spark-release-3-2-0.html
index 004b5c9fe51..62be9789357 100644
--- a/site/releases/spark-release-3-2-0.html
+++ b/site/releases/spark-release-3-2-0.html
@@ -170,12 +170,12 @@