From 341c5d46a579d0e83e9e9567cda3ba891bf53065 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Sat, 8 Jun 2024 16:39:09 -0600
Subject: [PATCH] Add changelog generator script

---
 .gitignore                        |   1 +
 dev/changelog/0.1.0.md            | 344 ++++++++++++++++++++++++++++++
 dev/release/README.md             |  24 +++
 dev/release/generate-changelog.py | 164 ++++++++++++++
 dev/release/requirements.in       |   1 +
 5 files changed, 534 insertions(+)
 create mode 100644 dev/changelog/0.1.0.md
 create mode 100755 dev/release/generate-changelog.py
 create mode 100644 dev/release/requirements.in

diff --git a/.gitignore b/.gitignore
index 0818ada9b..1c247dd9a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ rat.txt
 filtered_rat.txt
 dev/dist
 apache-rat-*.jar
+venv
diff --git a/dev/changelog/0.1.0.md b/dev/changelog/0.1.0.md
new file mode 100644
index 000000000..96fedc89c
--- /dev/null
+++ b/dev/changelog/0.1.0.md
@@ -0,0 +1,344 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# DataFusion Comet 0.1.0 Changelog
+
+This release consists of 261 commits from 40 contributors. See credits at the end of this changelog for more information.
+
+**Implemented enhancements:**
+
+- feat: Add native shuffle and columnar shuffle [#30](https://github.com/apache/datafusion-comet/pull/30) (viirya)
+- feat: Support Emit::First for SumDecimalGroupsAccumulator [#47](https://github.com/apache/datafusion-comet/pull/47) (viirya)
+- feat: Nested map support for columnar shuffle [#51](https://github.com/apache/datafusion-comet/pull/51) (viirya)
+- feat: Support Count(Distinct) and similar aggregation functions [#42](https://github.com/apache/datafusion-comet/pull/42) (huaxingao)
+- feat: Upgrade to `jni-rs` 0.21 [#50](https://github.com/apache/datafusion-comet/pull/50) (sunchao)
+- feat: Handle exception thrown from native side [#61](https://github.com/apache/datafusion-comet/pull/61) (sunchao)
+- feat: Support InSet expression in Comet [#59](https://github.com/apache/datafusion-comet/pull/59) (viirya)
+- feat: Add `CometNativeException` for exceptions thrown from the native side [#62](https://github.com/apache/datafusion-comet/pull/62) (sunchao)
+- feat: Add cause to native exception [#63](https://github.com/apache/datafusion-comet/pull/63) (viirya)
+- feat: Pull based native execution [#69](https://github.com/apache/datafusion-comet/pull/69) (viirya)
+- feat: Add executeColumnarCollectIterator to CometExec to collect Comet operator result [#71](https://github.com/apache/datafusion-comet/pull/71) (viirya)
+- feat: Add CometBroadcastExchangeExec to support broadcasting the result of Comet native operator [#80](https://github.com/apache/datafusion-comet/pull/80) (viirya)
+- feat: Reduce memory consumption when writing sorted shuffle files [#82](https://github.com/apache/datafusion-comet/pull/82) (sunchao)
+- feat: Add struct/map as unsupported map key/value for columnar shuffle [#84](https://github.com/apache/datafusion-comet/pull/84) (viirya)
+- feat: Support multiple input sources for CometNativeExec [#87](https://github.com/apache/datafusion-comet/pull/87) (viirya)
+- feat: Date and timestamp trunc with format array [#94](https://github.com/apache/datafusion-comet/pull/94) (parthchandra)
+- feat: Support `First`/`Last` aggregate functions [#97](https://github.com/apache/datafusion-comet/pull/97) (huaxingao)
+- feat: Add support of TakeOrderedAndProjectExec in Comet [#88](https://github.com/apache/datafusion-comet/pull/88) (viirya)
+- feat: Support Binary in shuffle writer [#106](https://github.com/apache/datafusion-comet/pull/106) (advancedxy)
+- feat: Add license header by spotless:apply automatically [#110](https://github.com/apache/datafusion-comet/pull/110) (advancedxy)
+- feat: Add dictionary binary to shuffle writer [#111](https://github.com/apache/datafusion-comet/pull/111) (viirya)
+- feat: Minimize number of connections used by parallel reader [#126](https://github.com/apache/datafusion-comet/pull/126) (parthchandra)
+- feat: Support CollectLimit operator [#100](https://github.com/apache/datafusion-comet/pull/100) (advancedxy)
+- feat: Enable min/max for boolean type [#165](https://github.com/apache/datafusion-comet/pull/165) (huaxingao)
+- feat: Introduce `CometTaskMemoryManager` and native side memory pool [#83](https://github.com/apache/datafusion-comet/pull/83) (sunchao)
+- feat: Fix old style names [#201](https://github.com/apache/datafusion-comet/pull/201) (comphead)
+- feat: enable comet shuffle manager for comet shell [#204](https://github.com/apache/datafusion-comet/pull/204) (zuston)
+- feat: Support bitwise aggregate functions [#197](https://github.com/apache/datafusion-comet/pull/197) (huaxingao)
+- feat: Support BloomFilterMightContain expr [#179](https://github.com/apache/datafusion-comet/pull/179) (advancedxy)
+- feat: Support sort merge join [#178](https://github.com/apache/datafusion-comet/pull/178) (viirya)
+- feat: Support HashJoin operator [#194](https://github.com/apache/datafusion-comet/pull/194) (viirya)
+- feat: Remove use of nightly int_roundings feature [#228](https://github.com/apache/datafusion-comet/pull/228) (psvri)
+- feat: Support Broadcast HashJoin [#211](https://github.com/apache/datafusion-comet/pull/211) (viirya)
+- feat: Enable Comet broadcast by default [#213](https://github.com/apache/datafusion-comet/pull/213) (viirya)
+- feat: Add CometRowToColumnar operator [#206](https://github.com/apache/datafusion-comet/pull/206) (advancedxy)
+- feat: Document the class path / classloader issue with the shuffle manager [#256](https://github.com/apache/datafusion-comet/pull/256) (holdenk)
+- feat: Port Datafusion Covariance to Comet [#234](https://github.com/apache/datafusion-comet/pull/234) (huaxingao)
+- feat: Add manual test to calculate spark builtin functions coverage [#263](https://github.com/apache/datafusion-comet/pull/263) (comphead)
+- feat: Support ANSI mode in CAST from String to Bool [#290](https://github.com/apache/datafusion-comet/pull/290) (andygrove)
+- feat: Add extended explain info to Comet plan [#255](https://github.com/apache/datafusion-comet/pull/255) (parthchandra)
+- feat: Improve CometSortMergeJoin statistics [#304](https://github.com/apache/datafusion-comet/pull/304) (planga82)
+- feat: Add compatibility guide [#316](https://github.com/apache/datafusion-comet/pull/316) (andygrove)
+- feat: Improve CometHashJoin statistics [#309](https://github.com/apache/datafusion-comet/pull/309) (planga82)
+- feat: Support Variance [#297](https://github.com/apache/datafusion-comet/pull/297) (huaxingao)
+- feat: Support murmur3_hash and sha2 family hash functions [#226](https://github.com/apache/datafusion-comet/pull/226) (advancedxy)
+- feat: Disable cast string to timestamp by default [#337](https://github.com/apache/datafusion-comet/pull/337) (andygrove)
+- feat: Improve CometBroadcastHashJoin statistics [#339](https://github.com/apache/datafusion-comet/pull/339) (planga82)
+- feat: Implement Spark-compatible CAST from string to integral types [#307](https://github.com/apache/datafusion-comet/pull/307) (andygrove)
+- feat: Implement Spark-compatible CAST from string to timestamp types [#335](https://github.com/apache/datafusion-comet/pull/335) (vaibhawvipul)
+- feat: Implement Spark-compatible CAST float/double to string [#346](https://github.com/apache/datafusion-comet/pull/346) (mattharder91)
+- feat: Only allow incompatible cast expressions to run in comet if a config is enabled [#362](https://github.com/apache/datafusion-comet/pull/362) (andygrove)
+- feat: Implement Spark-compatible CAST between integer types [#340](https://github.com/apache/datafusion-comet/pull/340) (ganeshkumar269)
+- feat: Supports Stddev [#348](https://github.com/apache/datafusion-comet/pull/348) (huaxingao)
+- feat: Improve cast compatibility tests and docs [#379](https://github.com/apache/datafusion-comet/pull/379) (andygrove)
+- feat: Implement Spark-compatible CAST from non-integral numeric types to integral types [#399](https://github.com/apache/datafusion-comet/pull/399) (rohitrastogi)
+- feat: Implement Spark unhex [#342](https://github.com/apache/datafusion-comet/pull/342) (tshauck)
+- feat: Enable columnar shuffle by default [#250](https://github.com/apache/datafusion-comet/pull/250) (viirya)
+- feat: Implement Spark-compatible CAST from floating-point/double to decimal [#384](https://github.com/apache/datafusion-comet/pull/384) (vaibhawvipul)
+- feat: Add logging to explain reasons for Comet not being able to run a query stage natively [#397](https://github.com/apache/datafusion-comet/pull/397) (andygrove)
+- feat: Add support for TryCast expression in Spark 3.2 and 3.3 [#416](https://github.com/apache/datafusion-comet/pull/416) (vaibhawvipul)
+- feat: Supports UUID column [#395](https://github.com/apache/datafusion-comet/pull/395) (huaxingao)
+- feat: correlation support [#456](https://github.com/apache/datafusion-comet/pull/456) (huaxingao)
+- feat: Implement Spark-compatible CAST from String to Date [#383](https://github.com/apache/datafusion-comet/pull/383) (vidyasankarv)
+- feat: Add COMET_SHUFFLE_MODE config to control Comet shuffle mode [#460](https://github.com/apache/datafusion-comet/pull/460) (viirya)
+- feat: Add random row generator in data generator [#451](https://github.com/apache/datafusion-comet/pull/451) (advancedxy)
+- feat: Add xxhash64 function support [#424](https://github.com/apache/datafusion-comet/pull/424) (advancedxy)
+- feat: add hex scalar function [#449](https://github.com/apache/datafusion-comet/pull/449) (tshauck)
+- feat: Add "Comet Fuzz" fuzz-testing utility [#472](https://github.com/apache/datafusion-comet/pull/472) (andygrove)
+- feat: Use enum to represent CAST eval_mode in expr.proto [#415](https://github.com/apache/datafusion-comet/pull/415) (prashantksharma)
+- feat: Implement ANSI support for UnaryMinus [#471](https://github.com/apache/datafusion-comet/pull/471) (vaibhawvipul)
+- feat: Add specific fuzz tests for cast and try_cast and fix NPE found during fuzz testing [#514](https://github.com/apache/datafusion-comet/pull/514) (andygrove)
+- feat: Add fuzz testing for arithmetic expressions [#519](https://github.com/apache/datafusion-comet/pull/519) (andygrove)
+- feat: Add HashJoin support for BuildRight [#437](https://github.com/apache/datafusion-comet/pull/437) (viirya)
+
+**Fixed bugs:**
+
+- fix: Comet sink operator should not have children operators [#26](https://github.com/apache/datafusion-comet/pull/26) (viirya)
+- fix: Fix the UnionExec match branches in CometExecRule [#68](https://github.com/apache/datafusion-comet/pull/68) (wankunde)
+- fix: Appending null values to element array builders of StructBuilder for null row in a StructArray [#78](https://github.com/apache/datafusion-comet/pull/78) (viirya)
+- fix: Fix compilation error for CometBroadcastExchangeExec [#86](https://github.com/apache/datafusion-comet/pull/86) (viirya)
+- fix: Avoid exception caused by broadcasting empty result [#92](https://github.com/apache/datafusion-comet/pull/92) (wForget)
+- fix: Add num_rows when building RecordBatch [#103](https://github.com/apache/datafusion-comet/pull/103) (advancedxy)
+- fix: Cast string to boolean not compatible with Spark [#107](https://github.com/apache/datafusion-comet/pull/107) (erenavsarogullari)
+- fix: Another attempt to fix libcrypto.dylib loading issue [#112](https://github.com/apache/datafusion-comet/pull/112) (advancedxy)
+- fix: Fix compilation error for Spark 3.2 & 3.3 [#117](https://github.com/apache/datafusion-comet/pull/117) (sunchao)
+- fix: Fix corrupted AggregateMode when transforming plan parameters [#118](https://github.com/apache/datafusion-comet/pull/118) (viirya)
+- fix: bitwise shift with different left/right types [#135](https://github.com/apache/datafusion-comet/pull/135) (viirya)
+- fix: Avoid null exception in removeSubquery [#147](https://github.com/apache/datafusion-comet/pull/147) (viirya)
+- fix: rat check error in vscode ide [#161](https://github.com/apache/datafusion-comet/pull/161) (thexiay)
+- fix: Final aggregation should not bind to the input of partial aggregation [#155](https://github.com/apache/datafusion-comet/pull/155) (viirya)
+- fix: coalesce should return correct datatype [#168](https://github.com/apache/datafusion-comet/pull/168) (viirya)
+- fix: attempt to divide by zero error on decimal division [#172](https://github.com/apache/datafusion-comet/pull/172) (viirya)
+- fix: Aggregation without aggregation expressions should use correct result expressions [#175](https://github.com/apache/datafusion-comet/pull/175) (viirya)
+- fix: Comet native operator can be executed after ReusedExchange [#187](https://github.com/apache/datafusion-comet/pull/187) (viirya)
+- fix: Try to convert a static list into a set in Rust [#184](https://github.com/apache/datafusion-comet/pull/184) (advancedxy)
+- fix: Include active spiller when computing peak shuffle memory [#196](https://github.com/apache/datafusion-comet/pull/196) (sunchao)
+- fix: CometExecRule should handle ShuffleQueryStage and ReusedExchange [#186](https://github.com/apache/datafusion-comet/pull/186) (viirya)
+- fix: Use `makeCopy` to change relation in `FileSourceScanExec` [#207](https://github.com/apache/datafusion-comet/pull/207) (viirya)
+- fix: Remove duplicate byte array allocation for CometDictionary [#224](https://github.com/apache/datafusion-comet/pull/224) (viirya)
+- fix: Remove redundant data copy in columnar shuffle [#233](https://github.com/apache/datafusion-comet/pull/233) (viirya)
+- fix: Only maps FIXED_LEN_BYTE_ARRAY to String for uuid type [#238](https://github.com/apache/datafusion-comet/pull/238) (huaxingao)
+- fix: Reduce RowPartition memory allocation [#244](https://github.com/apache/datafusion-comet/pull/244) (viirya)
+- fix: Remove wrong calculation for Murmur3Hash for float with null input [#245](https://github.com/apache/datafusion-comet/pull/245) (advancedxy)
+- fix: Deallocate row addresses and size arrays after exporting [#246](https://github.com/apache/datafusion-comet/pull/246) (viirya)
+- fix: Fix wrong children expression order in IfExpr [#249](https://github.com/apache/datafusion-comet/pull/249) (viirya)
+- fix: Average expression in Comet Final should handle all null inputs from partial Spark aggregation [#261](https://github.com/apache/datafusion-comet/pull/261) (viirya)
+- fix: Only trigger Comet Final aggregation on Comet partial aggregation [#264](https://github.com/apache/datafusion-comet/pull/264) (viirya)
+- fix: incorrect result on Comet multiple column distinct count [#268](https://github.com/apache/datafusion-comet/pull/268) (viirya)
+- fix: Avoid using CometConf [#266](https://github.com/apache/datafusion-comet/pull/266) (snmvaughan)
+- fix: Fix arrow error when sorting on empty batch [#271](https://github.com/apache/datafusion-comet/pull/271) (viirya)
+- fix: Include license using `#` instead of using XML comment [#274](https://github.com/apache/datafusion-comet/pull/274) (snmvaughan)
+- fix: Comet should not translate try_sum to native sum expression [#277](https://github.com/apache/datafusion-comet/pull/277) (viirya)
+- fix: incorrect result with aggregate expression with filter [#284](https://github.com/apache/datafusion-comet/pull/284) (viirya)
+- fix: Comet should not fail on negative limit parameter [#288](https://github.com/apache/datafusion-comet/pull/288) (viirya)
+- fix: Comet columnar shuffle should not be on top of another Comet shuffle operator [#296](https://github.com/apache/datafusion-comet/pull/296) (viirya)
+- fix: Iceberg scan transition should be in front of other data source v2 [#302](https://github.com/apache/datafusion-comet/pull/302) (viirya)
+- fix: CometExec's outputPartitioning might not be same as Spark expects after AQE interferes [#299](https://github.com/apache/datafusion-comet/pull/299) (viirya)
+- fix: CometShuffleExchangeExec logical link should be correct [#324](https://github.com/apache/datafusion-comet/pull/324) (viirya)
+- fix: SortMergeJoin with unsupported key type should fall back to Spark [#355](https://github.com/apache/datafusion-comet/pull/355) (viirya)
+- fix: limit with offset should return correct results [#359](https://github.com/apache/datafusion-comet/pull/359) (viirya)
+- fix: Disable Comet shuffle with AQE coalesce partitions enabled [#380](https://github.com/apache/datafusion-comet/pull/380) (viirya)
+- fix: Unknown operator id when explain with formatted mode [#410](https://github.com/apache/datafusion-comet/pull/410) (leoluan2009)
+- fix: Reuse CometBroadcastExchangeExec with Spark ReuseExchangeAndSubquery rule [#441](https://github.com/apache/datafusion-comet/pull/441) (viirya)
+- fix: newFileScanRDD should not take constructor from custom Spark versions [#412](https://github.com/apache/datafusion-comet/pull/412) (ceppelli)
+- fix: fix CometNativeExec.doCanonicalize for ReusedExchangeExec [#447](https://github.com/apache/datafusion-comet/pull/447) (viirya)
+- fix: Enable cast string to int tests and fix compatibility issue [#453](https://github.com/apache/datafusion-comet/pull/453) (andygrove)
+- fix: Compute murmur3 hash with dictionary input correctly [#433](https://github.com/apache/datafusion-comet/pull/433) (advancedxy)
+- fix: Only delegate to DataFusion cast when we know that it is compatible with Spark [#461](https://github.com/apache/datafusion-comet/pull/461) (andygrove)
+- fix: `ColumnReader.loadVector` should initiate `CometDictionary` after re-import arrays [#473](https://github.com/apache/datafusion-comet/pull/473) (viirya)
+- fix: substring with negative indices should produce correct result [#470](https://github.com/apache/datafusion-comet/pull/470) (sonhmai)
+- fix: CometReader.loadVector should not overwrite dictionary ids [#476](https://github.com/apache/datafusion-comet/pull/476) (viirya)
+- fix: Reuse previous CometDictionary Java arrays [#489](https://github.com/apache/datafusion-comet/pull/489) (viirya)
+- fix: Fallback to Spark for LIKE with custom escape character [#478](https://github.com/apache/datafusion-comet/pull/478) (sujithjay)
+- fix: Incorrect input schema when preparing result expressions for HashAggregation [#501](https://github.com/apache/datafusion-comet/pull/501) (viirya)
+- fix: Input batch to ShuffleRepartitioner.insert_batch should not be larger than configured batch size [#523](https://github.com/apache/datafusion-comet/pull/523) (viirya)
+- fix: Fix integer overflow in date_parser [#529](https://github.com/apache/datafusion-comet/pull/529) (eejbyfeldt)
+
+**Documentation updates:**
+
+- docs: Move existing documentation into new Contributor Guide and add Getting Started section [#334](https://github.com/apache/datafusion-comet/pull/334) (andygrove)
+- docs: Add more content to the user guide [#347](https://github.com/apache/datafusion-comet/pull/347) (andygrove)
+- docs: Generate configuration guide in mvn build [#349](https://github.com/apache/datafusion-comet/pull/349) (andygrove)
+- docs: Add a plugin overview page to the contributors guide [#345](https://github.com/apache/datafusion-comet/pull/345) (andygrove)
+- docs: fix the docs url of installation instructions [#393](https://github.com/apache/datafusion-comet/pull/393) (haoxins)
+- docs: Running ScalaTest suites from the CLI [#404](https://github.com/apache/datafusion-comet/pull/404) (edmondop)
+- docs: Remove spark.comet.exec.broadcast.enabled from config docs [#421](https://github.com/apache/datafusion-comet/pull/421) (andygrove)
+- docs: fix various sphinx warnings [#428](https://github.com/apache/datafusion-comet/pull/428) (tshauck)
+- docs: Update Spark shell command to include setting additional class path [#435](https://github.com/apache/datafusion-comet/pull/435) (andygrove)
+- docs: Add benchmarking guide [#444](https://github.com/apache/datafusion-comet/pull/444) (andygrove)
+- docs: add guide to adding a new expression [#422](https://github.com/apache/datafusion-comet/pull/422) (tshauck)
+- docs: changes in documentation [#512](https://github.com/apache/datafusion-comet/pull/512) (SemyonSinchenko)
+- docs: Improve user documentation for supported operators and expressions [#520](https://github.com/apache/datafusion-comet/pull/520) (andygrove)
+
+**Other:**
+
+- Initial PR [#1](https://github.com/apache/datafusion-comet/pull/1) (sunchao)
+- build: Add Maven wrapper to the project [#13](https://github.com/apache/datafusion-comet/pull/13) (sunchao)
+- build: Add basic CI test pipelines [#18](https://github.com/apache/datafusion-comet/pull/18) (sunchao)
+- Bump com.google.protobuf:protobuf-java from 3.17.3 to 3.19.6 [#5](https://github.com/apache/datafusion-comet/pull/5) (dependabot[bot])
+- build: Add PR template [#23](https://github.com/apache/datafusion-comet/pull/23) (sunchao)
+- build: Create ticket templates [#24](https://github.com/apache/datafusion-comet/pull/24) (comphead)
+- build: Re-enable Scala style checker and spotless  [#21](https://github.com/apache/datafusion-comet/pull/21) (sunchao)
+- build: Remove license header from pull request template [#28](https://github.com/apache/datafusion-comet/pull/28) (viirya)
+- build: Exclude .github from apache-rat-plugin check [#32](https://github.com/apache/datafusion-comet/pull/32) (viirya)
+- build: Add CI for MacOS (x64 and aarch64) [#35](https://github.com/apache/datafusion-comet/pull/35) (sunchao)
+- fix broken link in README.md [#39](https://github.com/apache/datafusion-comet/pull/39) (nairbv)
+- test: Add some fuzz testing for cast operations [#16](https://github.com/apache/datafusion-comet/pull/16) (andygrove)
+- test: Fix CI failure on libcrypto [#41](https://github.com/apache/datafusion-comet/pull/41) (sunchao)
+- test: Reduce test time spent in `CometShuffleSuite` [#40](https://github.com/apache/datafusion-comet/pull/40) (sunchao)
+- test: Add test for RoundRobinPartitioning [#54](https://github.com/apache/datafusion-comet/pull/54) (viirya)
+- build: Fix potential libcrypto lib loading issue for X86 mac runners [#55](https://github.com/apache/datafusion-comet/pull/55) (advancedxy)
+- refactor: Remove a few duplicated occurrences [#53](https://github.com/apache/datafusion-comet/pull/53) (sunchao)
+- build: Fix mvn cache for containerized runners [#48](https://github.com/apache/datafusion-comet/pull/48) (advancedxy)
+- test: Ensure traversed operators during finding first partial aggregaion are all native [#58](https://github.com/apache/datafusion-comet/pull/58) (viirya)
+- build: Upgrade arrow-rs to 50.0.0 and DataFusion to 35.0.0 [#65](https://github.com/apache/datafusion-comet/pull/65) (viirya)
+- build: Support built with java 1.8 [#45](https://github.com/apache/datafusion-comet/pull/45) (advancedxy)
+- test: Add golden files for TPCDSPlanStabilitySuite [#73](https://github.com/apache/datafusion-comet/pull/73) (sunchao)
+- test: Add TPC-DS test results [#77](https://github.com/apache/datafusion-comet/pull/77) (sunchao)
+- build: Upgrade spotless version to 2.43.0 [#85](https://github.com/apache/datafusion-comet/pull/85) (viirya)
+- test: Expose thrown exception when executing query in CometTPCHQuerySuite [#96](https://github.com/apache/datafusion-comet/pull/96) (viirya)
+- test: Enable TPCDS q41 in CometTPCDSQuerySuite [#98](https://github.com/apache/datafusion-comet/pull/98) (viirya)
+- build: Add CI for TPCDS queries [#99](https://github.com/apache/datafusion-comet/pull/99) (viirya)
+- build: Add tpcds-sf-1 to license header excluded list [#108](https://github.com/apache/datafusion-comet/pull/108) (viirya)
+- build: Show time duration for scala test [#116](https://github.com/apache/datafusion-comet/pull/116) (advancedxy)
+- test: Move MacOS (x86) pipelines to post-commit [#122](https://github.com/apache/datafusion-comet/pull/122) (sunchao)
+- doc: Add Quickstart Comet doc section [#125](https://github.com/apache/datafusion-comet/pull/125) (comphead)
+- build: Upgrade DF to 36.0.0 and arrow-rs 50.0.0 [#66](https://github.com/apache/datafusion-comet/pull/66) (comphead)
+- doc: Minor fix Getting started reformatting [#128](https://github.com/apache/datafusion-comet/pull/128) (comphead)
+- test: Reduce end-to-end test time [#109](https://github.com/apache/datafusion-comet/pull/109) (sunchao)
+- build: Separate and speedup TPC-DS benchmark [#130](https://github.com/apache/datafusion-comet/pull/130) (advancedxy)
+- build: Re-enable TPCDS queries q34 and q64 in `CometTPCDSQuerySuite` [#133](https://github.com/apache/datafusion-comet/pull/133) (viirya)
+- build: Refine names in benchmark.yml [#132](https://github.com/apache/datafusion-comet/pull/132) (advancedxy)
+- build: Make the build system work out of box [#136](https://github.com/apache/datafusion-comet/pull/136) (advancedxy)
+- minor: Update README.md with system diagram [#148](https://github.com/apache/datafusion-comet/pull/148) (alamb)
+- test: Add golden files for test [#150](https://github.com/apache/datafusion-comet/pull/150) (snmvaughan)
+- build: Add checker for PR title [#151](https://github.com/apache/datafusion-comet/pull/151) (sunchao)
+- build: Support CI pipelines for Spark 3.2, 3.3 and 3.4 [#153](https://github.com/apache/datafusion-comet/pull/153) (advancedxy)
+- minor: Only trigger PR title checker on pull requests [#154](https://github.com/apache/datafusion-comet/pull/154) (sunchao)
+- chore: Fix warnings in both compiler and test environments [#164](https://github.com/apache/datafusion-comet/pull/164) (advancedxy)
+- build: Upload test reports and coverage [#163](https://github.com/apache/datafusion-comet/pull/163) (advancedxy)
+- minor: Remove unnecessary logic [#169](https://github.com/apache/datafusion-comet/pull/169) (sunchao)
+- doc: Add initial doc how to expand Comet exceptions [#170](https://github.com/apache/datafusion-comet/pull/170) (comphead)
+- minor: Make `QueryPlanSerde` warning log less confusing [#181](https://github.com/apache/datafusion-comet/pull/181) (viirya)
+- refactor: Skipping slicing on shuffle arrays in shuffle reader [#189](https://github.com/apache/datafusion-comet/pull/189) (viirya)
+- build: Run Spark SQL tests for 3.4 [#166](https://github.com/apache/datafusion-comet/pull/166) (sunchao)
+- build: Enforce scalafix check in CI [#203](https://github.com/apache/datafusion-comet/pull/203) (advancedxy)
+- doc: Update README.md with shuffle configs [#208](https://github.com/apache/datafusion-comet/pull/208) (viirya)
+- test: Follow up on Spark 3.4 diff [#209](https://github.com/apache/datafusion-comet/pull/209) (sunchao)
+- build: Avoid confusion by using profile with clean [#215](https://github.com/apache/datafusion-comet/pull/215) (snmvaughan)
+- test: Add TPC-H test results [#218](https://github.com/apache/datafusion-comet/pull/218) (viirya)
+- build: Add CI for TPC-H queries [#220](https://github.com/apache/datafusion-comet/pull/220) (viirya)
+- test: Enable Comet shuffle in Spark SQL tests [#210](https://github.com/apache/datafusion-comet/pull/210) (sunchao)
+- test: Disable spark ui in unit test by default [#235](https://github.com/apache/datafusion-comet/pull/235) (beryllw)
+- chore: Replace deprecated temporal methods [#229](https://github.com/apache/datafusion-comet/pull/229) (snmvaughan)
+- doc: Update supported expressions [#237](https://github.com/apache/datafusion-comet/pull/237) (viirya)
+- build: Use specified branch of arrow-rs with workaround to invalid offset buffers from Java Arrow [#239](https://github.com/apache/datafusion-comet/pull/239) (viirya)
+- test: Enable string-to-bool cast test [#251](https://github.com/apache/datafusion-comet/pull/251) (andygrove)
+- test: Restore tests in CometTPCDSQuerySuite [#252](https://github.com/apache/datafusion-comet/pull/252) (viirya)
+- test: Enable all remaining TPCDS queries [#254](https://github.com/apache/datafusion-comet/pull/254) (viirya)
+- test: Enable all remaining TPCH queries [#257](https://github.com/apache/datafusion-comet/pull/257) (viirya)
+- chore: Remove some calls to unwrap when calling create_expr in planner.rs [#269](https://github.com/apache/datafusion-comet/pull/269) (andygrove)
+- doc: Fix a small typo in README.md [#272](https://github.com/apache/datafusion-comet/pull/272) (rz-vastdata)
+- chore: Fix typo in info message [#279](https://github.com/apache/datafusion-comet/pull/279) (andygrove)
+- chore: Fix NPE when running CometTPCHQueriesList directly [#285](https://github.com/apache/datafusion-comet/pull/285) (advancedxy)
+- chore: Update Comet repo description [#291](https://github.com/apache/datafusion-comet/pull/291) (viirya)
+- Chore: Cleanup how datafusion session config is created [#289](https://github.com/apache/datafusion-comet/pull/289) (psvri)
+- build: Update asf.yaml to use `@datafusion.apache.org` [#294](https://github.com/apache/datafusion-comet/pull/294) (sunchao)
+- doc: Update DataFusion project name and url [#300](https://github.com/apache/datafusion-comet/pull/300) (viirya)
+- chore: Remove unused functions [#301](https://github.com/apache/datafusion-comet/pull/301) (kazuyukitanimura)
+- chore: Ignore unused variables [#306](https://github.com/apache/datafusion-comet/pull/306) (snmvaughan)
+- chore: Update documentation publishing domain and path [#310](https://github.com/apache/datafusion-comet/pull/310) (andygrove)
+- chore: Add documentation publishing infrastructure [#314](https://github.com/apache/datafusion-comet/pull/314) (andygrove)
+- build: Move shim directories [#318](https://github.com/apache/datafusion-comet/pull/318) (kazuyukitanimura)
+- test: Suppress decimal random number tests for 3.2 and 3.3 [#319](https://github.com/apache/datafusion-comet/pull/319) (kazuyukitanimura)
+- chore: Add allocation source to StreamReader [#332](https://github.com/apache/datafusion-comet/pull/332) (viirya)
+- chore: Add more cast tests and improve test framework [#351](https://github.com/apache/datafusion-comet/pull/351) (andygrove)
+- chore: Implement remaining CAST tests [#356](https://github.com/apache/datafusion-comet/pull/356) (andygrove)
+- doc: Fix target typo in development.md [#364](https://github.com/apache/datafusion-comet/pull/364) (jc4x4)
+- doc: Clean up supported JDKs in README [#366](https://github.com/apache/datafusion-comet/pull/366) (edmondop)
+- build: Add Spark SQL test pipeline with ANSI mode enabled [#321](https://github.com/apache/datafusion-comet/pull/321) (parthchandra)
+- doc: add contributing in README.md [#382](https://github.com/apache/datafusion-comet/pull/382) (caicancai)
+- chore: Store EXTENSION_INFO as Set[String] instead of newline-delimited String [#386](https://github.com/apache/datafusion-comet/pull/386) (andygrove)
+- build: Add scala-version to matrix [#396](https://github.com/apache/datafusion-comet/pull/396) (snmvaughan)
+- chore: Add criterion benchmarks for casting between integer types [#401](https://github.com/apache/datafusion-comet/pull/401) (andygrove)
+- chore: Make COMET_EXEC_BROADCAST_FORCE_ENABLED internal config [#413](https://github.com/apache/datafusion-comet/pull/413) (viirya)
+- chore: Rename some columnar shuffle configs for code consistently [#418](https://github.com/apache/datafusion-comet/pull/418) (leoluan2009)
+- chore: Remove an unused config [#430](https://github.com/apache/datafusion-comet/pull/430) (andygrove)
+- doc: Add Plan Stability Testing to development guide [#432](https://github.com/apache/datafusion-comet/pull/432) (viirya)
+- tests: Move random data generation methods from CometCastSuite to new DataGenerator class [#426](https://github.com/apache/datafusion-comet/pull/426) (andygrove)
+- test: Fix explain with exteded info comet test [#436](https://github.com/apache/datafusion-comet/pull/436) (kazuyukitanimura)
+- chore: Add cargo bench for shuffle writer [#438](https://github.com/apache/datafusion-comet/pull/438) (andygrove)
+- doc: Add Tuning Guide with shuffle configs [#443](https://github.com/apache/datafusion-comet/pull/443) (viirya)
+- chore: improve fallback message when comet native shuffle is not enabled [#445](https://github.com/apache/datafusion-comet/pull/445) (andygrove)
+- Coverage: Add a manual test to show what Spark built in expression the DF can support directly [#331](https://github.com/apache/datafusion-comet/pull/331) (comphead)
+- build: Add spark-4.0 profile and shims [#407](https://github.com/apache/datafusion-comet/pull/407) (kazuyukitanimura)
+- build: bump spark version to 3.4.3 [#292](https://github.com/apache/datafusion-comet/pull/292) (huaxingao)
+- chore: Removing copying data from dictionary values into CometDictionary [#490](https://github.com/apache/datafusion-comet/pull/490) (viirya)
+- chore: Update README to highlight Comet benefits [#497](https://github.com/apache/datafusion-comet/pull/497) (andygrove)
+- test: fix ClassNotFoundException for Hive tests [#499](https://github.com/apache/datafusion-comet/pull/499) (kazuyukitanimura)
+- build: Enable comet tests with spark-4.0 profile [#493](https://github.com/apache/datafusion-comet/pull/493) (kazuyukitanimura)
+- chore: Switch to stable Rust [#505](https://github.com/apache/datafusion-comet/pull/505) (andygrove)
+- Minor: Generate the supported Spark builtin expression list into MD file [#455](https://github.com/apache/datafusion-comet/pull/455) (comphead)
+- chore: Simplify code in CometExecIterator and avoid some small overhead [#522](https://github.com/apache/datafusion-comet/pull/522) (andygrove)
+- chore: Upgrade spark to 4.0.0-preview1 [#526](https://github.com/apache/datafusion-comet/pull/526) (advancedxy)
+- chore: Add UnboundColumn to carry datatype for unbound reference [#518](https://github.com/apache/datafusion-comet/pull/518) (viirya)
+- chore: Remove 3.4.2.diff [#528](https://github.com/apache/datafusion-comet/pull/528) (kazuyukitanimura)
+- build: Switch back to official DataFusion repo and arrow-rs after Arrow Java 16 is released [#403](https://github.com/apache/datafusion-comet/pull/403) (viirya)
+- chore: Add CometEvalMode enum to replace string literals [#539](https://github.com/apache/datafusion-comet/pull/539) (andygrove)
+- chore: Create initial release process scripts for official ASF source release [#429](https://github.com/apache/datafusion-comet/pull/429) (andygrove)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+88	Liang-Chi Hsieh
+    40	Andy Grove
+    27	Chao Sun
+    26	advancedxy
+    11	Huaxin Gao
+     8	KAZUYUKI TANIMURA
+     7	Steve Vaughan
+     7	comphead
+     4	Parth Chandra
+     4	Trent Hauck
+     4	Vipul Vaibhaw
+     3	Pablo Langa
+     2	Edmondo Porcu
+     2	Oleks V
+     2	Vrishabh
+     2	Xuedong Luan
+     1	Andrew Lamb
+     1	Brian Vaughan
+     1	Cancai Cai
+     1	Emil Ejbyfeldt
+     1	Eren Avsarogullari
+     1	Holden Karau
+     1	JC
+     1	Junbo wang
+     1	Junfan Zhang
+     1	Prashant K. Sharma
+     1	RickestCode
+     1	Rohit Rastogi
+     1	Roman Zeyde
+     1	Semyon
+     1	Son
+     1	Sujith Jay Nair
+     1	Xin Hao
+     1	Zhen Wang
+     1	ceppelli
+     1	dependabot[bot]
+     1	thexia
+     1	vidyasankarv
+     1	wankun
+     1	గణేష్
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
+
diff --git a/dev/release/README.md b/dev/release/README.md
index b20f2d48e..2ca2f608e 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -34,6 +34,30 @@ This part of the process can be performed by any committer.
 - Create and merge a PR to update the version number & update the changelog
 - Push a release candidate tag (e.g. 0.1.0-rc1) to the Apache repository
 
+### Generating the Change Log
+
+We haven't yet defined how tagging and branching will work for the source releases. This project is more complex 
+than DataFusion core because it consists of a Maven project and a Cargo project. However, generating a change log 
+to cover changes between any two commits or tags can be performed by running the provided `generate-changelog.py` 
+script.
+
+It is recommended that you set up a virtual Python environment and then install the dependencies:
+
+```shell
+python3 -m venv venv
+source venv/bin/activate
+pip3 install -r requirements.in
+```
+
+To generate the changelog, set the `GITHUB_TOKEN` environment variable to a valid token and then run the script 
+providing two commit ids or tags followed by the version number of the release being created. The following 
+example generates a change log of all changes between the first commit and the current HEAD revision.
+
+```shell
+export GITHUB_TOKEN=<your-token-here>
+python3 generate-changelog.py 52241f44315fd1b2fd6cd9031bb05f046fe3a5a3 HEAD 0.1.0 > ../changelog/0.1.0.md
+```
+
 ## Publishing the Release Candidate
 
 This part of the process can mostly only be performed by a PMC member.
diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py
new file mode 100755
index 000000000..6793d7177
--- /dev/null
+++ b/dev/release/generate-changelog.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+from github import Github
+import os
+import re
+import subprocess
+
+def print_pulls(repo_name, title, pulls):
+    if len(pulls)  > 0:
+        print("**{}:**".format(title))
+        print()
+        for (pull, commit) in pulls:
+            url = "https://github.com/{}/pull/{}".format(repo_name, pull.number)
+            print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, commit.author.login))
+        print()
+
+
+def generate_changelog(repo, repo_name, tag1, tag2, version):
+
+    # get a list of commits between two tags
+    print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr)
+    comparison = repo.compare(tag1, tag2)
+
+    # get the pull requests for these commits
+    print("Fetching pull requests", file=sys.stderr)
+    unique_pulls = []
+    all_pulls = []
+    for commit in comparison.commits:
+        pulls = commit.get_pulls()
+        for pull in pulls:
+            # there can be multiple commits per PR if squash merge is not being used and
+            # in this case we should get all the author names, but for now just pick one
+            if pull.number not in unique_pulls:
+                unique_pulls.append(pull.number)
+                all_pulls.append((pull, commit))
+
+    # we split the pulls into categories
+    breaking = []
+    bugs = []
+    docs = []
+    enhancements = []
+    performance = []
+    other = []
+
+    # categorize the pull requests based on GitHub labels
+    print("Categorizing pull requests", file=sys.stderr)
+    for (pull, commit) in all_pulls:
+
+        # see if PR title uses Conventional Commits
+        cc_type = ''
+        cc_scope = ''
+        cc_breaking = ''
+        parts = re.findall(r'^([a-z]+)(\([a-z]+\))?(!)?:', pull.title)
+        if len(parts) == 1:
+            parts_tuple = parts[0]
+            cc_type = parts_tuple[0] # fix, feat, docs, chore
+            cc_scope = parts_tuple[1] # component within project
+            cc_breaking = parts_tuple[2] == '!'
+
+        labels = [label.name for label in pull.labels]
+        if 'api change' in labels or cc_breaking:
+            breaking.append((pull, commit))
+        elif 'bug' in labels or cc_type == 'fix':
+            bugs.append((pull, commit))
+        elif 'performance' in labels or cc_type == 'perf':
+            performance.append((pull, commit))
+        elif 'enhancement' in labels or cc_type == 'feat':
+            enhancements.append((pull, commit))
+        elif 'documentation' in labels or cc_type == 'docs':
+            docs.append((pull, commit))
+        else:
+            other.append((pull, commit))
+
+    # produce the changelog content
+    print("Generating changelog content", file=sys.stderr)
+
+    # ASF header
+    print("""<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->\n""")
+
+    print(f"# DataFusion Comet {version} Changelog\n")
+
+    # get the number of commits
+    commit_count = subprocess.check_output(f"git log --pretty=oneline {tag1}..{tag2} | wc -l", shell=True, text=True).strip()
+
+    # get number of contributors
+    contributor_count = subprocess.check_output(f"git shortlog -sn {tag1}..{tag2} | wc -l", shell=True, text=True).strip()
+
+    print(f"This release consists of {commit_count} commits from {contributor_count} contributors. "
+          f"See credits at the end of this changelog for more information.\n")
+
+    print_pulls(repo_name, "Breaking changes", breaking)
+    print_pulls(repo_name, "Performance related", performance)
+    print_pulls(repo_name, "Implemented enhancements", enhancements)
+    print_pulls(repo_name, "Fixed bugs", bugs)
+    print_pulls(repo_name, "Documentation updates", docs)
+    print_pulls(repo_name, "Other", other)
+
+    # show code contributions
+    credits = subprocess.check_output(f"git shortlog -sn {tag1}..{tag2}", shell=True, text=True).strip()
+
+    print("## Credits\n")
+    print("Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) "
+          "per contributor.\n")
+    print("```")
+    print(credits)
+    print("```\n")
+
+    print("Thank you also to everyone who contributed in other ways such as filing issues, reviewing "
+          "PRs, and providing feedback on this release.\n")
+
+def cli(args=None):
+    """Process command line arguments."""
+    if not args:
+        args = sys.argv[1:]
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("tag1", help="The previous commit or tag (e.g. 0.1.0)")
+    parser.add_argument("tag2", help="The current commit or tag (e.g. HEAD)")
+    parser.add_argument("version", help="The version number to include in the changelog")
+    args = parser.parse_args()
+
+    token = os.getenv("GITHUB_TOKEN")
+    project = "apache/datafusion-comet"
+
+    g = Github(token)
+    repo = g.get_repo(project)
+    generate_changelog(repo, project, args.tag1, args.tag2, args.version)
+
+if __name__ == "__main__":
+    cli()
\ No newline at end of file
diff --git a/dev/release/requirements.in b/dev/release/requirements.in
new file mode 100644
index 000000000..ff2bdfd42
--- /dev/null
+++ b/dev/release/requirements.in
@@ -0,0 +1 @@
+PyGitHub
\ No newline at end of file