diff --git a/Dockerfile.multiplatform b/Dockerfile.multiplatform
new file mode 100644
index 000000000..40550644b
--- /dev/null
+++ b/Dockerfile.multiplatform
@@ -0,0 +1,76 @@
+FROM alpine:3.19
+
+# Install runtime dependencies
+RUN apk add --no-cache \
+ curl \
+ git \
+ nodejs \
+ npm \
+ bash \
+ ca-certificates \
+ libc6-compat
+
+# Install Go 1.22.2 (required by go.mod)
+ARG TARGETARCH
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
+ GO_ARCH="arm64"; \
+ else \
+ GO_ARCH="amd64"; \
+ fi && \
+ curl -L "https://go.dev/dl/go1.22.2.linux-${GO_ARCH}.tar.gz" -o go.tar.gz && \
+ tar -C /usr/local -xzf go.tar.gz && \
+ rm go.tar.gz
+
+# Set Go environment variables
+ENV GOPATH="/go"
+ENV PATH="/usr/local/go/bin:${PATH}"
+ENV PATH="${GOPATH}/bin:${PATH}"
+
+# Download Hugo binary directly (much more space efficient than compiling)
+ARG TARGETARCH
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
+ HUGO_ARCH="arm64"; \
+ else \
+ HUGO_ARCH="amd64"; \
+ fi && \
+ echo "Downloading Hugo for architecture: ${HUGO_ARCH}" && \
+ curl -L "https://github.com/gohugoio/hugo/releases/download/v0.123.7/hugo_extended_0.123.7_linux-${HUGO_ARCH}.tar.gz" -o hugo.tar.gz && \
+ echo "Extracting Hugo..." && \
+ tar -xzf hugo.tar.gz && \
+ echo "Contents after extraction:" && \
+ ls -la && \
+ echo "Hugo binary details:" && \
+ ls -la hugo && \
+ echo "Moving Hugo binary..." && \
+ cp hugo /usr/local/bin/hugo && \
+ chmod +x /usr/local/bin/hugo && \
+ echo "Hugo binary location and permissions:" && \
+ ls -la /usr/local/bin/hugo && \
+ echo "Testing Hugo binary:" && \
+ ldd /usr/local/bin/hugo && \
+ /usr/local/bin/hugo version && \
+ rm hugo.tar.gz hugo
+
+# Install global dependencies
+RUN npm install -g postcss postcss-cli autoprefixer
+
+# Copy entrypoint script
+COPY scripts/entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+# Create working directory
+WORKDIR /src
+
+# Configure Git to trust the working directory
+RUN git config --global --add safe.directory /src
+
+# Verify installations
+RUN node --version && \
+ npm --version && \
+ npx --version && \
+ hugo version && \
+ go version
+
+EXPOSE 1313
+
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 577fb7a40..226040c47 100644
--- a/Makefile
+++ b/Makefile
@@ -1,20 +1,31 @@
# Hugo configuration
OUTPUT_DIR := output
-DOCKER_IMAGE := hvishwanath/hugo:v0.123.7-ext
+HUGO_BASE_IMAGE := hvishwanath/hugo:v0.123.7-ext-multiplatform
+DOCKER_IMAGE := $(HUGO_BASE_IMAGE)
#PROD_IMAGE := hvishwanath/kafka-site-md:1.2.0
PROD_IMAGE := us-west1-docker.pkg.dev/play-394201/kafka-site-md/kafka-site-md:1.6.0
-.PHONY: build serve clean docker-image prod-image prod-run buildx-setup
+.PHONY: build serve clean docker-image hugo-base-multi-platform prod-image prod-run buildx-setup ghcr-prod-image
# Setup buildx for multi-arch builds
buildx-setup:
docker buildx create --name multiarch --driver docker-container --use || true
docker buildx inspect multiarch --bootstrap
-# Build the Docker image
+# Build the Docker image (single platform)
docker-image:
docker build -t $(DOCKER_IMAGE) . --push
+# Build and push multi-platform Hugo base image
+hugo-base-multi-platform: buildx-setup
+ docker buildx build \
+ --platform linux/amd64,linux/arm64 \
+ --tag $(HUGO_BASE_IMAGE) \
+ --file Dockerfile.multiplatform \
+ --build-arg BUILDKIT_INLINE_CACHE=1 \
+ --push \
+ .
+
# Build the static site using Docker
build:
docker pull $(DOCKER_IMAGE)
@@ -48,8 +59,19 @@ prod-run: prod-image
docker pull $(PROD_IMAGE)
docker run --rm -p 8080:80 $(PROD_IMAGE)
+# Build and push production image to GHCR
+ghcr-prod-image: build buildx-setup
+ docker buildx build \
+ --platform linux/amd64,linux/arm64 \
+ --tag ghcr.io/$(shell basename $(shell git rev-parse --show-toplevel))/kafka-site-md:prod-$(shell git rev-parse --abbrev-ref HEAD) \
+ --tag ghcr.io/$(shell basename $(shell git rev-parse --show-toplevel))/kafka-site-md:prod-$(shell git rev-parse --short HEAD) \
+ --tag ghcr.io/$(shell basename $(shell git rev-parse --show-toplevel))/kafka-site-md:prod-$(shell date +%Y%m%d-%H%M%S) \
+ --file Dockerfile.prod \
+ --push \
+ .
+
# Clean the output directory and remove Docker images
clean:
rm -rf $(OUTPUT_DIR)
- docker rmi $(DOCKER_IMAGE) $(PROD_IMAGE)
+ docker rmi $(DOCKER_IMAGE) $(HUGO_BASE_IMAGE) $(PROD_IMAGE)
docker buildx rm multiarch || true
diff --git a/README.md b/README.md
index 1613c113f..f5d2c19f7 100644
--- a/README.md
+++ b/README.md
@@ -167,4 +167,4 @@ make clean
4. Test locally using `make serve`
5. Submit a pull request
-For more details about the migration to Markdown and the overall architecture, see [KIP-1133](https://cwiki.apache.org/confluence/display/KAFKA/KIP-1133%3A+AK+Documentation+and+Website+in+Markdown).
\ No newline at end of file
+For more details about the migration to Markdown and the overall architecture, see [KIP-1133](https://cwiki.apache.org/confluence/display/KAFKA/KIP-1133%3A+AK+Documentation+and+Website+in+Markdown).
diff --git a/content/en/0110/streams/core-concepts.md b/content/en/0110/streams/core-concepts.md
index 2a30d9cf7..9774e4175 100644
--- a/content/en/0110/streams/core-concepts.md
+++ b/content/en/0110/streams/core-concepts.md
@@ -80,7 +80,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/0110/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/0110/documentation#streamsconfigs) section.
[Previous](/0110/streams/developer-guide) [Next](/0110/streams/architecture)
diff --git a/content/en/10/streams/core-concepts.md b/content/en/10/streams/core-concepts.md
index b55f4c90c..5f9bb3ee6 100644
--- a/content/en/10/streams/core-concepts.md
+++ b/content/en/10/streams/core-concepts.md
@@ -82,7 +82,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/10/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/10/documentation#streamsconfigs) section.
[Previous](/10/streams/tutorial) [Next](/10/streams/architecture)
diff --git a/content/en/11/streams/core-concepts.md b/content/en/11/streams/core-concepts.md
index 6037d9a66..884f54306 100644
--- a/content/en/11/streams/core-concepts.md
+++ b/content/en/11/streams/core-concepts.md
@@ -82,7 +82,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/11/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/11/documentation#streamsconfigs) section.
[Previous](/11/streams/tutorial) [Next](/11/streams/architecture)
diff --git a/content/en/20/streams/core-concepts.md b/content/en/20/streams/core-concepts.md
index 7e6b505ab..db682d74b 100644
--- a/content/en/20/streams/core-concepts.md
+++ b/content/en/20/streams/core-concepts.md
@@ -106,7 +106,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/20/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/20/documentation#streamsconfigs) section.
[Previous](/20/streams/tutorial) [Next](/20/streams/architecture)
diff --git a/content/en/21/streams/core-concepts.md b/content/en/21/streams/core-concepts.md
index 02d07f772..ff0fef824 100644
--- a/content/en/21/streams/core-concepts.md
+++ b/content/en/21/streams/core-concepts.md
@@ -106,7 +106,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/21/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/21/documentation#streamsconfigs) section.
# Out-of-Order Handling
diff --git a/content/en/22/streams/core-concepts.md b/content/en/22/streams/core-concepts.md
index e1f2b384c..585b73ac0 100644
--- a/content/en/22/streams/core-concepts.md
+++ b/content/en/22/streams/core-concepts.md
@@ -106,7 +106,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/22/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/22/documentation#streamsconfigs) section.
# Out-of-Order Handling
diff --git a/content/en/23/streams/core-concepts.md b/content/en/23/streams/core-concepts.md
index 9e7e368db..b3ce4b9df 100644
--- a/content/en/23/streams/core-concepts.md
+++ b/content/en/23/streams/core-concepts.md
@@ -106,7 +106,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/23/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/23/documentation#streamsconfigs) section.
# Out-of-Order Handling
diff --git a/content/en/24/streams/core-concepts.md b/content/en/24/streams/core-concepts.md
index d80ff38cd..f3b856c76 100644
--- a/content/en/24/streams/core-concepts.md
+++ b/content/en/24/streams/core-concepts.md
@@ -106,7 +106,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/24/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/24/documentation#streamsconfigs) section.
# Out-of-Order Handling
diff --git a/content/en/25/streams/core-concepts.md b/content/en/25/streams/core-concepts.md
index 51375444e..b313344de 100644
--- a/content/en/25/streams/core-concepts.md
+++ b/content/en/25/streams/core-concepts.md
@@ -106,7 +106,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/25/documentation#streamsconfigs) section.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline. Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. To read more details on how this is done inside Kafka Streams, readers are recommended to read [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics). In order to achieve exactly-once semantics when running Kafka Streams applications, users can simply set the `processing.guarantee` config value to **exactly_once** (default value is **at_least_once**). More details can be found in the [**Kafka Streams Configs**](/25/documentation#streamsconfigs) section.
# Out-of-Order Handling
diff --git a/content/en/26/streams/core-concepts.md b/content/en/26/streams/core-concepts.md
index a2a372170..7c1a7da7a 100644
--- a/content/en/26/streams/core-concepts.md
+++ b/content/en/26/streams/core-concepts.md
@@ -115,7 +115,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once beta", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **exactly_once** for EOS version 1 (requires brokers version 0.11.0 or newer) or **exactly_once_beta** for EOS version 2 (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/26/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/27/streams/core-concepts.md b/content/en/27/streams/core-concepts.md
index b777a5df3..505df170e 100644
--- a/content/en/27/streams/core-concepts.md
+++ b/content/en/27/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once beta", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **exactly_once** (requires brokers version 0.11.0 or newer) or **exactly_once_beta** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/27/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/28/streams/core-concepts.md b/content/en/28/streams/core-concepts.md
index 42f6dab3f..c0f8cfc47 100644
--- a/content/en/28/streams/core-concepts.md
+++ b/content/en/28/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once beta", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **exactly_once** for EOS version 1 (requires brokers version 0.11.0 or newer) or **exactly_once_beta** for EOS version 2 (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/28/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/30/streams/core-concepts.md b/content/en/30/streams/core-concepts.md
index 4f1f52885..fcf8cf55b 100644
--- a/content/en/30/streams/core-concepts.md
+++ b/content/en/30/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/30/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/30/streams/developer-guide/datatypes.md b/content/en/30/streams/developer-guide/datatypes.md
index 737227480..6916481a8 100644
--- a/content/en/30/streams/developer-guide/datatypes.md
+++ b/content/en/30/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.0.0
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.0/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/31/streams/core-concepts.md b/content/en/31/streams/core-concepts.md
index 56eecad61..3bf5827aa 100644
--- a/content/en/31/streams/core-concepts.md
+++ b/content/en/31/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/31/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/31/streams/developer-guide/datatypes.md b/content/en/31/streams/developer-guide/datatypes.md
index be584f318..ae64d7310 100644
--- a/content/en/31/streams/developer-guide/datatypes.md
+++ b/content/en/31/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.1.1-SNAPSHOT
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.1/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/32/streams/core-concepts.md b/content/en/32/streams/core-concepts.md
index cd05bb11d..edc59c86b 100644
--- a/content/en/32/streams/core-concepts.md
+++ b/content/en/32/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/32/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/32/streams/developer-guide/datatypes.md b/content/en/32/streams/developer-guide/datatypes.md
index 8d90b8ef6..baea46a18 100644
--- a/content/en/32/streams/developer-guide/datatypes.md
+++ b/content/en/32/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.2.1
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.2/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/33/streams/core-concepts.md b/content/en/33/streams/core-concepts.md
index 020e51a5b..ccffc224d 100644
--- a/content/en/33/streams/core-concepts.md
+++ b/content/en/33/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/33/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/33/streams/developer-guide/datatypes.md b/content/en/33/streams/developer-guide/datatypes.md
index f1eea51d1..37e3fb622 100644
--- a/content/en/33/streams/developer-guide/datatypes.md
+++ b/content/en/33/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.3.1
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.3/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/33/streams/developer-guide/dsl-api.md b/content/en/33/streams/developer-guide/dsl-api.md
index 7adc749d2..d595d2e52 100644
--- a/content/en/33/streams/developer-guide/dsl-api.md
+++ b/content/en/33/streams/developer-guide/dsl-api.md
@@ -2007,6 +2007,8 @@ Transformation | Description
| Applies a `ValueTransformer` to each record, while retaining the key of the original record. `transformValues()` allows you to leverage the [Processor API](processor-api.html#streams-developer-guide-processor-api) from the DSL. ([details](/33/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-)) Each input record is transformed into exactly one output record (zero output records or multiple output records are not possible). The `ValueTransformer` may return `null` as the new value for a record. `transformValues` is preferable to `transform` because it will not cause data re-partitioning. `transformValues` is essentially equivalent to adding the `ValueTransformer` via `Topology#addProcessor()` to your [processor topology](../core-concepts.html#streams_topology). An example is available in the [javadocs](/33/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-).
+**CAUTION:** If you are using "merge repartition topics" optimization, it is not recommended to use `KStream#processValues` to avoid compatibility issues for future upgrades to newer versions of Kafka Streams. For more details, see the [migration guide](/40/streams/developer-guide/dsl-api.htm#transformers-removal-and-migration-to-processors) in the Kafka Streams 4.0 docs.
+
The following example shows how to leverage, via the `KStream#process()` method, a custom `Processor` that sends an email notification whenever a page view count reaches a predefined threshold.
First, we need to implement a custom stream processor, `PopularPageEmailAlert`, that implements the `Processor` interface:
diff --git a/content/en/34/streams/core-concepts.md b/content/en/34/streams/core-concepts.md
index f59c9a6d6..fb84b3467 100644
--- a/content/en/34/streams/core-concepts.md
+++ b/content/en/34/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/34/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/34/streams/developer-guide/datatypes.md b/content/en/34/streams/developer-guide/datatypes.md
index e550b6c32..a52d4636a 100644
--- a/content/en/34/streams/developer-guide/datatypes.md
+++ b/content/en/34/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.4.0
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.4/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/34/streams/developer-guide/dsl-api.md b/content/en/34/streams/developer-guide/dsl-api.md
index 00447e7dc..885a1095b 100644
--- a/content/en/34/streams/developer-guide/dsl-api.md
+++ b/content/en/34/streams/developer-guide/dsl-api.md
@@ -2007,6 +2007,8 @@ Transformation | Description
| Applies a `ValueTransformer` to each record, while retaining the key of the original record. `transformValues()` allows you to leverage the [Processor API](processor-api.html#streams-developer-guide-processor-api) from the DSL. ([details](/34/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-)) Each input record is transformed into exactly one output record (zero output records or multiple output records are not possible). The `ValueTransformer` may return `null` as the new value for a record. `transformValues` is preferable to `transform` because it will not cause data re-partitioning. `transformValues` is essentially equivalent to adding the `ValueTransformer` via `Topology#addProcessor()` to your [processor topology](../core-concepts.html#streams_topology). An example is available in the [javadocs](/34/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-).
+**CAUTION:** If you are using "merge repartition topics" optimization, it is not recommended to use `KStream#processValues` to avoid compatibility issues for future upgrades to newer versions of Kafka Streams. For more details, see the [migration guide](/40/streams/developer-guide/dsl-api.htm#transformers-removal-and-migration-to-processors) in the Kafka Streams 4.0 docs.
+
The following example shows how to leverage, via the `KStream#process()` method, a custom `Processor` that sends an email notification whenever a page view count reaches a predefined threshold.
First, we need to implement a custom stream processor, `PopularPageEmailAlert`, that implements the `Processor` interface:
diff --git a/content/en/35/streams/core-concepts.md b/content/en/35/streams/core-concepts.md
index 75b6b79c2..6e884366c 100644
--- a/content/en/35/streams/core-concepts.md
+++ b/content/en/35/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/35/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/35/streams/developer-guide/datatypes.md b/content/en/35/streams/developer-guide/datatypes.md
index 9d7db62a9..2a426d884 100644
--- a/content/en/35/streams/developer-guide/datatypes.md
+++ b/content/en/35/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.5.2
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.5/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/35/streams/developer-guide/dsl-api.md b/content/en/35/streams/developer-guide/dsl-api.md
index b176b2bd5..29c5a9a7a 100644
--- a/content/en/35/streams/developer-guide/dsl-api.md
+++ b/content/en/35/streams/developer-guide/dsl-api.md
@@ -2015,6 +2015,8 @@ Transformation | Description
| Applies a `ValueTransformer` to each record, while retaining the key of the original record. `transformValues()` allows you to leverage the [Processor API](processor-api.html#streams-developer-guide-processor-api) from the DSL. ([details](/35/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-)) Each input record is transformed into exactly one output record (zero output records or multiple output records are not possible). The `ValueTransformer` may return `null` as the new value for a record. `transformValues` is preferable to `transform` because it will not cause data re-partitioning. `transformValues` is essentially equivalent to adding the `ValueTransformer` via `Topology#addProcessor()` to your [processor topology](../core-concepts.html#streams_topology). An example is available in the [javadocs](/35/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-).
+**CAUTION:** If you are using "merge repartition topics" optimization, it is not recommended to use `KStream#processValues` to avoid compatibility issues for future upgrades to newer versions of Kafka Streams. For more details, see the [migration guide](/40/streams/developer-guide/dsl-api.htm#transformers-removal-and-migration-to-processors) in the Kafka Streams 4.0 docs.
+
The following example shows how to leverage, via the `KStream#process()` method, a custom `Processor` that sends an email notification whenever a page view count reaches a predefined threshold.
First, we need to implement a custom stream processor, `PopularPageEmailAlert`, that implements the `Processor` interface:
diff --git a/content/en/36/streams/core-concepts.md b/content/en/36/streams/core-concepts.md
index c525c1f14..7536f3491 100644
--- a/content/en/36/streams/core-concepts.md
+++ b/content/en/36/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/36/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/36/streams/developer-guide/datatypes.md b/content/en/36/streams/developer-guide/datatypes.md
index dea361ea1..407a12c19 100644
--- a/content/en/36/streams/developer-guide/datatypes.md
+++ b/content/en/36/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.6.2
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.6/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/36/streams/developer-guide/dsl-api.md b/content/en/36/streams/developer-guide/dsl-api.md
index 3bc97e32c..ab041bded 100644
--- a/content/en/36/streams/developer-guide/dsl-api.md
+++ b/content/en/36/streams/developer-guide/dsl-api.md
@@ -2021,6 +2021,8 @@ Transformation | Description
| Applies a `ValueTransformer` to each record, while retaining the key of the original record. `transformValues()` allows you to leverage the [Processor API](processor-api.html#streams-developer-guide-processor-api) from the DSL. ([details](/36/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-)) Each input record is transformed into exactly one output record (zero output records or multiple output records are not possible). The `ValueTransformer` may return `null` as the new value for a record. `transformValues` is preferable to `transform` because it will not cause data re-partitioning. `transformValues` is essentially equivalent to adding the `ValueTransformer` via `Topology#addProcessor()` to your [processor topology](../core-concepts.html#streams_topology). An example is available in the [javadocs](/36/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-).
+**CAUTION:** If you are using "merge repartition topics" optimization, it is not recommended to use `KStream#processValues` to avoid compatibility issues for future upgrades to newer versions of Kafka Streams. For more details, see the [migration guide](/40/streams/developer-guide/dsl-api.htm#transformers-removal-and-migration-to-processors) in the Kafka Streams 4.0 docs.
+
The following example shows how to leverage, via the `KStream#process()` method, a custom `Processor` that sends an email notification whenever a page view count reaches a predefined threshold.
First, we need to implement a custom stream processor, `PopularPageEmailAlert`, that implements the `Processor` interface:
diff --git a/content/en/37/streams/core-concepts.md b/content/en/37/streams/core-concepts.md
index c731841d7..82527711d 100644
--- a/content/en/37/streams/core-concepts.md
+++ b/content/en/37/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/37/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/37/streams/developer-guide/datatypes.md b/content/en/37/streams/developer-guide/datatypes.md
index 715dbbea2..d32c1c9c6 100644
--- a/content/en/37/streams/developer-guide/datatypes.md
+++ b/content/en/37/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.7.2
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.7/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/37/streams/developer-guide/dsl-api.md b/content/en/37/streams/developer-guide/dsl-api.md
index c9e7fb00d..d8ae25cb8 100644
--- a/content/en/37/streams/developer-guide/dsl-api.md
+++ b/content/en/37/streams/developer-guide/dsl-api.md
@@ -2020,6 +2020,8 @@ Transformation | Description
| Applies a `ValueTransformer` to each record, while retaining the key of the original record. `transformValues()` allows you to leverage the [Processor API](processor-api.html#streams-developer-guide-processor-api) from the DSL. ([details](/37/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-)) Each input record is transformed into exactly one output record (zero output records or multiple output records are not possible). The `ValueTransformer` may return `null` as the new value for a record. `transformValues` is preferable to `transform` because it will not cause data re-partitioning. `transformValues` is essentially equivalent to adding the `ValueTransformer` via `Topology#addProcessor()` to your [processor topology](../core-concepts.html#streams_topology). An example is available in the [javadocs](/37/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-).
+**CAUTION:** If you are using "merge repartition topics" optimization, it is not recommended to use `KStream#processValues` to avoid compatibility issues for future upgrades to newer versions of Kafka Streams. For more details, see the [migration guide](/40/streams/developer-guide/dsl-api.htm#transformers-removal-and-migration-to-processors) in the Kafka Streams 4.0 docs.
+
The following example shows how to leverage, via the `KStream#process()` method, a custom `Processor` that sends an email notification whenever a page view count reaches a predefined threshold.
First, we need to implement a custom stream processor, `PopularPageEmailAlert`, that implements the `Processor` interface:
diff --git a/content/en/38/streams/core-concepts.md b/content/en/38/streams/core-concepts.md
index c4eb3213e..ac03c1a14 100644
--- a/content/en/38/streams/core-concepts.md
+++ b/content/en/38/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/38/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/38/streams/developer-guide/datatypes.md b/content/en/38/streams/developer-guide/datatypes.md
index 90b927c58..784ee5000 100644
--- a/content/en/38/streams/developer-guide/datatypes.md
+++ b/content/en/38/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.8.1
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.8/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/38/streams/developer-guide/dsl-api.md b/content/en/38/streams/developer-guide/dsl-api.md
index bd7b508e2..f192d9bee 100644
--- a/content/en/38/streams/developer-guide/dsl-api.md
+++ b/content/en/38/streams/developer-guide/dsl-api.md
@@ -1660,6 +1660,8 @@ Transformation | Description
| Applies a `ValueTransformer` to each record, while retaining the key of the original record. `transformValues()` allows you to leverage the [Processor API](processor-api.html#streams-developer-guide-processor-api) from the DSL. ([details](/38/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-)) Each input record is transformed into exactly one output record (zero output records or multiple output records are not possible). The `ValueTransformer` may return `null` as the new value for a record. `transformValues` is preferable to `transform` because it will not cause data re-partitioning. `transformValues` is essentially equivalent to adding the `ValueTransformer` via `Topology#addProcessor()` to your [processor topology](../core-concepts.html#streams_topology). An example is available in the [javadocs](/38/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-).
+**CAUTION:** If you are using "merge repartition topics" optimization, it is not recommended to use `KStream#processValues` to avoid compatibility issues for future upgrades to newer versions of Kafka Streams. For more details, see the [migration guide](/40/streams/developer-guide/dsl-api.htm#transformers-removal-and-migration-to-processors) in the Kafka Streams 4.0 docs.
+
The following example shows how to leverage, via the `KStream#process()` method, a custom `Processor` that sends an email notification whenever a page view count reaches a predefined threshold.
First, we need to implement a custom stream processor, `PopularPageEmailAlert`, that implements the `Processor` interface:
diff --git a/content/en/39/streams/core-concepts.md b/content/en/39/streams/core-concepts.md
index bebc022bb..956e67794 100644
--- a/content/en/39/streams/core-concepts.md
+++ b/content/en/39/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/39/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/39/streams/developer-guide/datatypes.md b/content/en/39/streams/developer-guide/datatypes.md
index 35be24935..3ceb24df5 100644
--- a/content/en/39/streams/developer-guide/datatypes.md
+++ b/content/en/39/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 3.9.1
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/3.9/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/39/streams/developer-guide/dsl-api.md b/content/en/39/streams/developer-guide/dsl-api.md
index e5f231b28..040e65c5f 100644
--- a/content/en/39/streams/developer-guide/dsl-api.md
+++ b/content/en/39/streams/developer-guide/dsl-api.md
@@ -1660,6 +1660,8 @@ Transformation | Description
| Applies a `ValueTransformer` to each record, while retaining the key of the original record. `transformValues()` allows you to leverage the [Processor API](processor-api.html#streams-developer-guide-processor-api) from the DSL. ([details](/39/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-)) Each input record is transformed into exactly one output record (zero output records or multiple output records are not possible). The `ValueTransformer` may return `null` as the new value for a record. `transformValues` is preferable to `transform` because it will not cause data re-partitioning. `transformValues` is essentially equivalent to adding the `ValueTransformer` via `Topology#addProcessor()` to your [processor topology](../core-concepts.html#streams_topology). An example is available in the [javadocs](/39/javadoc/org/apache/kafka/streams/kstream/KStream.html#transformValues-org.apache.kafka.streams.kstream.ValueTransformerSupplier-java.lang.String...-).
+**CAUTION:** If you are using "merge repartition topics" optimization, it is not recommended to use `KStream#processValues` to avoid compatibility issues for future upgrades to newer versions of Kafka Streams. For more details, see the [migration guide](/40/streams/developer-guide/dsl-api.htm#transformers-removal-and-migration-to-processors) in the Kafka Streams 4.0 docs.
+
The following example shows how to leverage, via the `KStream#process()` method, a custom `Processor` that sends an email notification whenever a page view count reaches a predefined threshold.
First, we need to implement a custom stream processor, `PopularPageEmailAlert`, that implements the `Processor` interface:
diff --git a/content/en/40/getting-started/upgrade.md b/content/en/40/getting-started/upgrade.md
index 712e3deaa..49ee55337 100644
--- a/content/en/40/getting-started/upgrade.md
+++ b/content/en/40/getting-started/upgrade.md
@@ -116,6 +116,7 @@ Note: Apache Kafka 4.0 only supports KRaft mode - ZooKeeper mode has been remove
* All public API, deprecated in Apache Kafka 3.6 or an earlier release, have been removed, with the exception of `JoinWindows.of()` and `JoinWindows#grace()`. See [KAFKA-17531](https://issues.apache.org/jira/browse/KAFKA-17531) for details.
* The most important changes are highlighted in the [Kafka Streams upgrade guide](/40/streams/upgrade-guide.html#streams_api_changes_400).
* For a full list of changes, see [KAFKA-12822](https://issues.apache.org/jira/browse/KAFKA-12822).
+ * If you are using `KStream#transformValues()` which was removed with Apache Kafka 4.0.0 release, and you need to rewrite your program to use `KStreams#processValues()` instead, pay close attention to the [migration guide](/40/streams/developer-guide/dsl-api.html#transformers-removal-and-migration-to-processors).
* Other changes:
* The minimum Java version required by clients and Kafka Streams applications has been increased from Java 8 to Java 11 while brokers, connect and tools now require Java 17. See [KIP-750](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181308223) and [KIP-1013](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=284789510) for more details.
* Java 23 support has been added in Apache Kafka 4.0
diff --git a/content/en/40/streams/core-concepts.md b/content/en/40/streams/core-concepts.md
index 68a373a02..581d72da6 100644
--- a/content/en/40/streams/core-concepts.md
+++ b/content/en/40/streams/core-concepts.md
@@ -134,7 +134,7 @@ Kafka Streams allows direct read-only queries of the state stores by methods, th
# Processing Guarantees
-In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](http://lambda-architecture.net/). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
+In stream processing, one of the most frequently asked question is "does my stream processing system guarantee that each record is processed once and only once, even if some failures are encountered in the middle of processing?" Failing to guarantee exactly-once stream processing is a deal-breaker for many applications that cannot tolerate any data-loss or data duplicates, and in that case a batch-oriented framework is usually used in addition to the stream processing pipeline, known as the [Lambda Architecture](https://en.wikipedia.org/wiki/Lambda_architecture). Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
Since the 0.11.0.0 release, Kafka has added support to allow its producers to send messages to different topic partitions in a [transactional and idempotent manner](https://kafka.apache.org/#semantics), and Kafka Streams has hence added the end-to-end exactly-once processing semantics by leveraging these features. More specifically, it guarantees that for any record read from the source Kafka topics, its processing results will be reflected exactly once in the output Kafka topic as well as in the state stores for stateful operations. Note the key difference between Kafka Streams end-to-end exactly-once guarantee with other stream processing frameworks' claimed guarantees is that Kafka Streams tightly integrates with the underlying Kafka storage system and ensure that commits on the input topic offsets, updates on the state stores, and writes to the output topics will be completed atomically instead of treating Kafka as an external system that may have side-effects. For more information on how this is done inside Kafka Streams, see [KIP-129](https://cwiki.apache.org/confluence/display/KAFKA/KIP-129%3A+Streams+Exactly-Once+Semantics).
As of the 2.6.0 release, Kafka Streams supports an improved implementation of exactly-once processing, named "exactly-once v2", which requires broker version 2.5.0 or newer. This implementation is more efficient, because it reduces client and broker resource utilization, like client threads and used network connections, and it enables higher throughput and improved scalability. As of the 3.0.0 release, the first version of exactly-once has been deprecated. Users are encouraged to use exactly-once v2 for exactly-once processing from now on, and prepare by upgrading their brokers if necessary. For more information on how this is done inside the brokers and Kafka Streams, see [KIP-447](https://cwiki.apache.org/confluence/display/KAFKA/KIP-447%3A+Producer+scalability+for+exactly+once+semantics).
To enable exactly-once semantics when running Kafka Streams applications, set the `processing.guarantee` config value (default value is **at_least_once**) to **StreamsConfig.EXACTLY_ONCE_V2** (requires brokers version 2.5 or newer). For more information, see the [Kafka Streams Configs](/40/streams/developer-guide/config-streams.html) section.
diff --git a/content/en/40/streams/developer-guide/datatypes.md b/content/en/40/streams/developer-guide/datatypes.md
index e58fe7254..7db195272 100644
--- a/content/en/40/streams/developer-guide/datatypes.md
+++ b/content/en/40/streams/developer-guide/datatypes.md
@@ -85,7 +85,7 @@ Apache Kafka includes several built-in serde implementations for Java primitives
org.apache.kafkakafka-clients
- 2.8.0
+ 4.0.0
This artifact provides the following serde implementations under the package [org.apache.kafka.common.serialization](https://github.com/apache/kafka/blob/4.0/clients/src/main/java/org/apache/kafka/common/serialization), which you can leverage when e.g., defining default serializers in your Streams configuration.
diff --git a/content/en/40/streams/developer-guide/dsl-api.md b/content/en/40/streams/developer-guide/dsl-api.md
index 1cacb087a..06858cab6 100644
--- a/content/en/40/streams/developer-guide/dsl-api.md
+++ b/content/en/40/streams/developer-guide/dsl-api.md
@@ -1644,11 +1644,11 @@ Beyond the aforementioned stateless and stateful transformations, you may also
## Operations and concepts
* `KStream#process`: Process all records in a stream, one record at a time, by applying a `Processor` (provided by a given `ProcessorSupplier`);
- * `KStream#processValues`: Process all records in a stream, one record at a time, by applying a `FixedKeyProcessor` (provided by a given `FixedKeyProcessorSupplier`);
+ * `KStream#processValues`: Process all records in a stream, one record at a time, by applying a `FixedKeyProcessor` (provided by a given `FixedKeyProcessorSupplier`) [**CAUTION:** If you are deploying a new Kafka Streams application, and you are using the "merge repartition topics" optimization, you should enable the fix for [KAFKA-19668](https://issues.apache.org/jira/browse/KAFKA-19668) to avoid compatibility issues for future upgrades to newer versions of Kafka Streams; For more details, see the migration guide below];
* `Processor`: A processor of key-value pair records;
- * `ContextualProcessor`: An abstract implementation of `Processor` that manages the `ProcessorContext` instance.
+ * `ContextualProcessor`: An abstract implementation of `Processor` that manages the `ProcessorContext` instance;
* `FixedKeyProcessor`: A processor of key-value pair records where keys are immutable;
- * `ContextualFixedKeyProcessor`: An abstract implementation of `FixedKeyProcessor` that manages the `FixedKeyProcessorContext` instance.
+ * `ContextualFixedKeyProcessor`: An abstract implementation of `FixedKeyProcessor` that manages the `FixedKeyProcessorContext` instance;
* `ProcessorSupplier`: A processor supplier that can create one or more `Processor` instances; and
* `FixedKeyProcessorSupplier`: A processor supplier that can create one or more `FixedKeyProcessor` instances.
@@ -1920,6 +1920,18 @@ The following deprecated methods are no longer available in Kafka Streams:
The Processor API now serves as a unified replacement for all these methods. It simplifies the API surface while maintaining support for both stateless and stateful operations.
+**CAUTION:** If you are using `KStream.transformValues()` and you have the "merge repartition topics" optimization enabled, rewriting your program to `KStream.processValues()` might not be safe due to [KAFKA-19668](https://issues.apache.org/jira/browse/KAFKA-19668). For this case, you should not upgrade to Kafka Streams 4.0.0 or 4.1.0, but use Kafka Streams 4.0.1 instead, which contains a fix. Note, that the fix is not enabled by default for backward compatibility reasons, and you would need to enable the fix by setting config `__enable.process.processValue.fix__ = true` and pass it into `StreamsBuilder()` constructor.
+
+
+ final Properties properties = new Properties();
+ properties.put(StreamsConfig.APPLICATION_ID_CONFIG, ...);
+ properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, ...);
+ properties.put(TopologyConfig.InternalConfig.ENABLE_PROCESS_PROCESSVALUE_FIX, true);
+
+ final StreamsBuilder builder = new StreamsBuilder(new TopologyConfig(new StreamsConfig(properties)));
+
+It is recommended, that you compare the output of `Topology.describe()` for the old and new topology, to verify if the rewrite to `processValues()` is correct, and that it does not introduce any incompatibilities. You should also test the upgrade in a non-production environment.
+
## Migration Examples
To migrate from the deprecated `transform`, `transformValues`, `flatTransform`, and `flatTransformValues` methods to the Processor API (PAPI) in Kafka Streams, let's resume the previouss examples. The new `process` and `processValues` methods enable a more flexible and reusable approach by requiring implementations of the `Processor` or `FixedKeyProcessor` interfaces.
diff --git a/content/en/40/streams/upgrade-guide.md b/content/en/40/streams/upgrade-guide.md
index 8ed5b4231..ee6dea2e3 100644
--- a/content/en/40/streams/upgrade-guide.md
+++ b/content/en/40/streams/upgrade-guide.md
@@ -58,6 +58,7 @@ In this release, eos-v1 (Exactly Once Semantics version 1) is no longer supporte
* [Old processor APIs](https://issues.apache.org/jira/browse/KAFKA-12829)
* [KStream#through() in both Java and Scala](https://issues.apache.org/jira/browse/KAFKA-12823)
* ["transformer" methods and classes in both Java and Scala](https://issues.apache.org/jira/browse/KAFKA-16339)
+ * migrating from `KStreams#transformValues()` to `KStreams.processValues()` might not be safe due to [KAFKA-19668](https://issues.apache.org/jira/browse/KAFKA-19668). Please refer to the [migration guide](/40/streams/developer-guide/dsl-api.html#transformers-removal-and-migration-to-processors) for more details.
* [kstream.KStream#branch in both Java and Scala](https://issues.apache.org/jira/browse/KAFKA-12824)
* [builder methods for Time/Session/Join/SlidingWindows](https://issues.apache.org/jira/browse/KAFKA-16332)
* [KafkaStreams#setUncaughtExceptionHandler()](https://issues.apache.org/jira/browse/KAFKA-12827)
@@ -206,7 +207,7 @@ Kafka Streams does not send a "leave group" request when an instance is closed.
* `KStream KStream.process(ProcessorSupplier, ...)`
* `KStream KStream.processValues(FixedKeyProcessorSupplier, ...)`
-Both new methods have multiple overloads and return a `KStream` instead of `void` as the deprecated `process()` methods did. In addition, `FixedKeyProcessor`, `FixedKeyRecord`, `FixedKeyProcessorContext`, and `ContextualFixedKeyProcessor` are introduced to guard against disallowed key modification inside `processValues()`. Furthermore, `ProcessingContext` is added for a better interface hierarchy.
+Both new methods have multiple overloads and return a `KStream` instead of `void` as the deprecated `process()` methods did. In addition, `FixedKeyProcessor`, `FixedKeyRecord`, `FixedKeyProcessorContext`, and `ContextualFixedKeyProcessor` are introduced to guard against disallowed key modification inside `processValues()`. Furthermore, `ProcessingContext` is added for a better interface hierarchy. **CAUTION:** The newly added `KStream.processValues()` method introduced a regression bug ([KAFKA-19668](https://issues.apache.org/jira/browse/KAFKA-19668)). If you have "merge repartition topics" optimization enabled, it is not safe to migrate from `transformValues()` to `processValues()` in 3.3.0 release. The bug is only fixed with Kafka Streams 4.0.1, 4.1.1, and 4.2.0. For more details, please refer to the [migration guide](/40/streams/developer-guide/dsl-api.html#transformers-removal-and-migration-to-processors).
Emitting a windowed aggregation result only after a window is closed is currently supported via the `suppress()` operator. However, `suppress()` uses an in-memory implementation and does not support RocksDB. To close this gap, [KIP-825](https://cwiki.apache.org/confluence/display/KAFKA/KIP-825%3A+introduce+a+new+API+to+control+when+aggregated+results+are+produced) introduces "emit strategies", which are built into the aggregation operator directly to use the already existing RocksDB store. `TimeWindowedKStream.emitStrategy(EmitStrategy)` and `SessionWindowedKStream.emitStrategy(EmitStrategy)` allow picking between "emit on window update" (default) and "emit on window close" strategies. Additionally, a few new emit metrics are added, as well as a necessary new method, `SessionStore.findSessions(long, long)`.
diff --git a/content/en/41/_index.md b/content/en/41/_index.md
new file mode 100644
index 000000000..344218095
--- /dev/null
+++ b/content/en/41/_index.md
@@ -0,0 +1,10 @@
+---
+title: AK 4.1.X
+description: Documentation for AK 4.1.X
+weight:
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
diff --git a/content/en/41/apis/_index.md b/content/en/41/apis/_index.md
new file mode 100644
index 000000000..3b0756a52
--- /dev/null
+++ b/content/en/41/apis/_index.md
@@ -0,0 +1,10 @@
+---
+title: APIs
+description:
+weight: 2
+tags: ['kafka', 'docs', 'apis']
+aliases:
+keywords:
+type: docs
+---
+
diff --git a/content/en/41/apis/api.md b/content/en/41/apis/api.md
new file mode 100644
index 000000000..b6679647f
--- /dev/null
+++ b/content/en/41/apis/api.md
@@ -0,0 +1,114 @@
+---
+title: API
+description:
+weight: 1
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+Kafka includes five core apis:
+
+ 1. The Producer API allows applications to send streams of data to topics in the Kafka cluster.
+ 2. The Consumer API allows applications to read streams of data from topics in the Kafka cluster.
+ 3. The Streams API allows transforming streams of data from input topics to output topics.
+ 4. The Connect API allows implementing connectors that continually pull from some source system or application into Kafka or push from Kafka into some sink system or application.
+ 5. The Admin API allows managing and inspecting topics, brokers, and other Kafka objects.
+Kafka exposes all its functionality over a language independent protocol which has clients available in many programming languages. However only the Java clients are maintained as part of the main Kafka project, the others are available as independent open source projects. A list of non-Java clients is available [here](https://cwiki.apache.org/confluence/x/3gDVAQ).
+
+# Producer API
+
+The Producer API allows applications to send streams of data to topics in the Kafka cluster.
+
+Examples of using the producer are shown in the [javadocs](/41/javadoc/index.html?org/apache/kafka/clients/producer/KafkaProducer.html "Kafka 4.1 Javadoc").
+
+To use the producer, add the following Maven dependency to your project:
+
+
+
+ org.apache.kafka
+ kafka-clients
+ 4.1.0
+
+
+# Consumer API
+
+The Consumer API allows applications to read streams of data from topics in the Kafka cluster.
+
+Examples of using the consumer are shown in the [javadocs](/41/javadoc/index.html?org/apache/kafka/clients/consumer/KafkaConsumer.html "Kafka 4.1 Javadoc").
+
+To use the consumer, add the following Maven dependency to your project:
+
+
+
+ org.apache.kafka
+ kafka-clients
+ 4.1.0
+
+
+# Share Consumer API (Preview)
+
+The Share Consumer API (Preview) enables applications within a share group to cooperatively consume and process data from Kafka topics.
+
+Examples of using the share consumer are shown in the [javadocs](/41/javadoc/index.html?org/apache/kafka/clients/consumer/KafkaShareConsumer.html "Kafka 4.1 Javadoc").
+
+To use the share consumer, add the following Maven dependency to your project:
+
+
+
+ org.apache.kafka
+ kafka-clients
+ 4.1.0
+
+
+# Streams API
+
+The [Streams](/41/streams) API allows transforming streams of data from input topics to output topics.
+
+Examples of using this library are shown in the [javadocs](/41/javadoc/index.html?org/apache/kafka/streams/KafkaStreams.html "Kafka 4.1 Javadoc").
+
+Additional documentation on using the Streams API is available [here](/41/streams).
+
+To use Kafka Streams, add the following Maven dependency to your project:
+
+
+
+ org.apache.kafka
+ kafka-streams
+ 4.1.0
+
+
+When using Scala you may optionally include the `kafka-streams-scala` library. Additional documentation on using the Kafka Streams DSL for Scala is available [in the developer guide](/41/streams/developer-guide/dsl-api.html#scala-dsl).
+
+To use Kafka Streams DSL for Scala 2.13, add the following Maven dependency to your project:
+
+
+
+ org.apache.kafka
+ kafka-streams-scala_2.13
+ 4.1.0
+
+
+# Connect API
+
+The Connect API allows implementing connectors that continually pull from some source data system into Kafka or push from Kafka into some sink data system.
+
+Many users of Connect won't need to use this API directly, though, they can use pre-built connectors without needing to write any code. Additional information on using Connect is available [here](/documentation.html#connect).
+
+Those who want to implement custom connectors can see the [javadoc](/41/javadoc/index.html?org/apache/kafka/connect "Kafka 4.1 Javadoc").
+
+# Admin API
+
+The Admin API supports managing and inspecting topics, brokers, acls, and other Kafka objects.
+
+To use the Admin API, add the following Maven dependency to your project:
+
+
+
+ org.apache.kafka
+ kafka-clients
+ 4.1.0
+
+
+For more information about the Admin APIs, see the [javadoc](/41/javadoc/index.html?org/apache/kafka/clients/admin/Admin.html "Kafka 4.1 Javadoc").
diff --git a/content/en/41/configuration/_index.md b/content/en/41/configuration/_index.md
new file mode 100644
index 000000000..70cf6016c
--- /dev/null
+++ b/content/en/41/configuration/_index.md
@@ -0,0 +1,10 @@
+---
+title: Configuration
+description:
+weight: 3
+tags: ['kafka', 'docs', 'configuration']
+aliases:
+keywords:
+type: docs
+---
+
diff --git a/content/en/41/configuration/configuration.md b/content/en/41/configuration/configuration.md
new file mode 100644
index 000000000..eef8ea503
--- /dev/null
+++ b/content/en/41/configuration/configuration.md
@@ -0,0 +1,448 @@
+---
+title: Configuration
+description:
+weight: 1
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+Kafka uses key-value pairs in the [property file format](https://en.wikipedia.org/wiki/.properties) for configuration. These values can be supplied either from a file or programmatically.
+
+# Broker Configs
+
+The essential configurations are the following:
+
+ * `node.id`
+ * `log.dirs`
+ * `process.roles`
+ * `controller.quorum.bootstrap.servers`
+Topic configurations and defaults are discussed in more detail below. {{< include-html file="/static/41/generated/kafka_config.html" >}}
+
+More details about broker configuration can be found in the scala class `kafka.server.KafkaConfig`.
+
+## Updating Broker Configs
+
+From Kafka version 1.1 onwards, some of the broker configs can be updated without restarting the broker. See the `Dynamic Update Mode` column in Broker Configs for the update mode of each broker config.
+
+ * `read-only`: Requires a broker restart for update
+ * `per-broker`: May be updated dynamically for each broker
+ * `cluster-wide`: May be updated dynamically as a cluster-wide default. May also be updated as a per-broker value for testing.
+
+To alter the current broker configs for broker id 0 (for example, the number of log cleaner threads):
+
+
+ $ bin/kafka-configs.sh --bootstrap-server localhost:9092 --entity-type brokers --entity-name 0 --alter --add-config log.cleaner.threads=2
+
+To describe the current dynamic broker configs for broker id 0:
+
+
+ $ bin/kafka-configs.sh --bootstrap-server localhost:9092 --entity-type brokers --entity-name 0 --describe
+
+To delete a config override and revert to the statically configured or default value for broker id 0 (for example, the number of log cleaner threads):
+
+
+ $ bin/kafka-configs.sh --bootstrap-server localhost:9092 --entity-type brokers --entity-name 0 --alter --delete-config log.cleaner.threads
+
+Some configs may be configured as a cluster-wide default to maintain consistent values across the whole cluster. All brokers in the cluster will process the cluster default update. For example, to update log cleaner threads on all brokers:
+
+
+ $ bin/kafka-configs.sh --bootstrap-server localhost:9092 --entity-type brokers --entity-default --alter --add-config log.cleaner.threads=2
+
+To describe the currently configured dynamic cluster-wide default configs:
+
+
+ $ bin/kafka-configs.sh --bootstrap-server localhost:9092 --entity-type brokers --entity-default --describe
+
+All configs that are configurable at cluster level may also be configured at per-broker level (e.g. for testing). If a config value is defined at different levels, the following order of precedence is used:
+
+ * Dynamic per-broker config stored in the metadata log
+ * Dynamic cluster-wide default config stored in the metadata log
+ * Static broker config from `server.properties`
+ * Kafka default, see broker configs
+
+
+
+### Updating SSL Keystore of an Existing Listener
+
+Brokers may be configured with SSL keystores with short validity periods to reduce the risk of compromised certificates. Keystores may be updated dynamically without restarting the broker. The config name must be prefixed with the listener prefix `listener.name.{listenerName}.` so that only the keystore config of a specific listener is updated. The following configs may be updated in a single alter request at per-broker level:
+
+ * `ssl.keystore.type`
+ * `ssl.keystore.location`
+ * `ssl.keystore.password`
+ * `ssl.key.password`
+
+If the listener is the inter-broker listener, the update is allowed only if the new keystore is trusted by the truststore configured for that listener. For other listeners, no trust validation is performed on the keystore by the broker. Certificates must be signed by the same certificate authority that signed the old certificate to avoid any client authentication failures.
+
+### Updating SSL Truststore of an Existing Listener
+
+Broker truststores may be updated dynamically without restarting the broker to add or remove certificates. Updated truststore will be used to authenticate new client connections. The config name must be prefixed with the listener prefix `listener.name.{listenerName}.` so that only the truststore config of a specific listener is updated. The following configs may be updated in a single alter request at per-broker level:
+
+ * `ssl.truststore.type`
+ * `ssl.truststore.location`
+ * `ssl.truststore.password`
+
+If the listener is the inter-broker listener, the update is allowed only if the existing keystore for that listener is trusted by the new truststore. For other listeners, no trust validation is performed by the broker before the update. Removal of CA certificates used to sign client certificates from the new truststore can lead to client authentication failures.
+
+### Updating Default Topic Configuration
+
+Default topic configuration options used by brokers may be updated without broker restart. The configs are applied to topics without a topic config override for the equivalent per-topic config. One or more of these configs may be overridden at cluster-default level used by all brokers.
+
+ * `log.segment.bytes`
+ * `log.roll.ms`
+ * `log.roll.hours`
+ * `log.roll.jitter.ms`
+ * `log.roll.jitter.hours`
+ * `log.index.size.max.bytes`
+ * `log.flush.interval.messages`
+ * `log.flush.interval.ms`
+ * `log.retention.bytes`
+ * `log.retention.ms`
+ * `log.retention.minutes`
+ * `log.retention.hours`
+ * `log.index.interval.bytes`
+ * `log.cleaner.delete.retention.ms`
+ * `log.cleaner.min.compaction.lag.ms`
+ * `log.cleaner.max.compaction.lag.ms`
+ * `log.cleaner.min.cleanable.ratio`
+ * `log.cleanup.policy`
+ * `log.segment.delete.delay.ms`
+ * `unclean.leader.election.enable`
+ * `min.insync.replicas`
+ * `max.message.bytes`
+ * `compression.type`
+ * `log.preallocate`
+ * `log.message.timestamp.type`
+
+
+
+### Updating Log Cleaner Configs
+
+Log cleaner configs may be updated dynamically at cluster-default level used by all brokers. The changes take effect on the next iteration of log cleaning. One or more of these configs may be updated:
+
+ * `log.cleaner.threads`
+ * `log.cleaner.io.max.bytes.per.second`
+ * `log.cleaner.dedupe.buffer.size`
+ * `log.cleaner.io.buffer.size`
+ * `log.cleaner.io.buffer.load.factor`
+ * `log.cleaner.backoff.ms`
+
+
+
+### Updating Thread Configs
+
+The size of various thread pools used by the broker may be updated dynamically at cluster-default level used by all brokers. Updates are restricted to the range `currentSize / 2` to `currentSize * 2` to ensure that config updates are handled gracefully.
+
+ * `num.network.threads`
+ * `num.io.threads`
+ * `num.replica.fetchers`
+ * `num.recovery.threads.per.data.dir`
+ * `log.cleaner.threads`
+ * `background.threads`
+ * `remote.log.reader.threads`
+ * `remote.log.manager.copier.thread.pool.size`
+ * `remote.log.manager.expiration.thread.pool.size`
+
+
+
+### Updating ConnectionQuota Configs
+
+The maximum number of connections allowed for a given IP/host by the broker may be updated dynamically at cluster-default level used by all brokers. The changes will apply for new connection creations and the existing connections count will be taken into account by the new limits.
+
+ * `max.connections.per.ip`
+ * `max.connections.per.ip.overrides`
+
+
+
+### Adding and Removing Listeners
+
+Listeners may be added or removed dynamically. When a new listener is added, security configs of the listener must be provided as listener configs with the listener prefix `listener.name.{listenerName}.`. If the new listener uses SASL, the JAAS configuration of the listener must be provided using the JAAS configuration property `sasl.jaas.config` with the listener and mechanism prefix. See JAAS configuration for Kafka brokers for details.
+
+In Kafka version 1.1.x, the listener used by the inter-broker listener may not be updated dynamically. To update the inter-broker listener to a new listener, the new listener may be added on all brokers without restarting the broker. A rolling restart is then required to update `inter.broker.listener.name`.
+
+In addition to all the security configs of new listeners, the following configs may be updated dynamically at per-broker level:
+
+ * `listeners`
+ * `advertised.listeners`
+ * `listener.security.protocol.map`
+
+Inter-broker listener must be configured using the static broker configuration `inter.broker.listener.name` or `security.inter.broker.protocol`.
+
+# Topic Configs
+
+Configurations pertinent to topics have both a server default as well an optional per-topic override. If no per-topic configuration is given the server default is used. The override can be set at topic creation time by giving one or more `--config` options. This example creates a topic named _my-topic_ with a custom max message size and flush rate:
+
+
+ $ bin/kafka-topics.sh --bootstrap-server localhost:9092 --create --topic my-topic --partitions 1 \
+ --replication-factor 1 --config max.message.bytes=64000 --config flush.messages=1
+
+Overrides can also be changed or set later using the alter configs command. This example updates the max message size for _my-topic_ :
+
+
+ $ bin/kafka-configs.sh --bootstrap-server localhost:9092 --entity-type topics --entity-name my-topic
+ --alter --add-config max.message.bytes=128000
+
+To check overrides set on the topic you can do
+
+
+ $ bin/kafka-configs.sh --bootstrap-server localhost:9092 --entity-type topics --entity-name my-topic --describe
+
+To remove an override you can do
+
+
+ $ bin/kafka-configs.sh --bootstrap-server localhost:9092 --entity-type topics --entity-name my-topic
+ --alter --delete-config max.message.bytes
+
+Below is the topic configuration. The server's default configuration for this property is given under the Server Default Property heading. A given server default config value only applies to a topic if it does not have an explicit topic config override. {{< include-html file="/static/41/generated/topic_config.html" >}}
+
+# Group Configs
+
+Below is the group configuration: {{< include-html file="/static/41/generated/group_config.html" >}}
+
+# Producer Configs
+
+Below is the producer configuration: {{< include-html file="/static/41/generated/producer_config.html" >}}
+
+# Consumer Configs
+
+Below is the consumer and share consumer configuration: {{< include-html file="/static/41/generated/consumer_config.html" >}}
+
+# Kafka Connect Configs
+
+Below is the Kafka Connect framework configuration. {{< include-html file="/static/41/generated/connect_config.html" >}}
+
+## Source Connector Configs
+
+Below is the source connector configuration. {{< include-html file="/static/41/generated/source_connector_config.html" >}}
+
+## Sink Connector Configs
+
+Below is the sink connector configuration. {{< include-html file="/static/41/generated/sink_connector_config.html" >}}
+
+# Kafka Streams Configs
+
+Below is the Kafka Streams client library configuration. {{< include-html file="/static/41/generated/streams_config.html" >}}
+
+# Admin Configs
+
+Below is the Kafka Admin client library configuration. {{< include-html file="/static/41/generated/admin_client_config.html" >}}
+
+# MirrorMaker Configs
+
+Below is the configuration of the connectors that make up MirrorMaker 2.
+
+## MirrorMaker Common Configs
+
+Below is the common configuration that applies to all three connectors. {{< include-html file="/static/41/generated/mirror_connector_config.html" >}}
+
+## MirrorMaker Source Configs
+
+Below is the configuration of MirrorMaker 2 source connector for replicating topics. {{< include-html file="/static/41/generated/mirror_source_config.html" >}}
+
+## MirrorMaker Checkpoint Configs
+
+Below is the configuration of MirrorMaker 2 checkpoint connector for emitting consumer offset checkpoints. {{< include-html file="/static/41/generated/mirror_checkpoint_config.html" >}}
+
+## MirrorMaker HeartBeat Configs
+
+Below is the configuration of MirrorMaker 2 heartbeat connector for checking connectivity between connectors and clusters. {{< include-html file="/static/41/generated/mirror_heartbeat_config.html" >}}
+
+# System Properties
+
+Kafka supports some configuration that can be enabled through Java system properties. System properties are usually set by passing the -D flag to the Java virtual machine in which Kafka components are running. Below are the supported system properties.
+
+ * #### org.apache.kafka.sasl.oauthbearer.allowed.files
+
+This system property is used to determine which files, if any, are allowed to be read by the SASL OAUTHBEARER plugin. This property accepts comma-separated list of files. By default the value is an empty list.
+
+If users want to enable some files, users need to explicitly set the system property like below.
+
+ -Dorg.apache.kafka.sasl.oauthbearer.allowed.files=/tmp/token,/tmp/private_key.pem
+
+Since:| 4.1.0
+---|---
+Default Value:|
+ * #### org.apache.kafka.sasl.oauthbearer.allowed.urls
+
+This system property is used to set the allowed URLs as SASL OAUTHBEARER token or jwks endpoints. This property accepts comma-separated list of URLs. By default the value is an empty list.
+
+If users want to enable some URLs, users need to explicitly set the system property like below.
+
+ -Dorg.apache.kafka.sasl.oauthbearer.allowed.urls=https://www.example.com,file:///tmp/token
+
+Since:| 4.0.0
+---|---
+Default Value:|
+ * #### org.apache.kafka.disallowed.login.modules
+
+This system property is used to disable the problematic login modules usage in SASL JAAS configuration. This property accepts comma-separated list of loginModule names. By default **com.sun.security.auth.module.JndiLoginModule** loginModule is disabled.
+
+If users want to enable JndiLoginModule, users need to explicitly reset the system property like below. We advise the users to validate configurations and only allow trusted JNDI configurations. For more details [CVE-2023-25194](/community/cve-list/#CVE-2023-25194).
+
+ -Dorg.apache.kafka.disallowed.login.modules=
+
+To disable more loginModules, update the system property with comma-separated loginModule names. Make sure to explicitly add **JndiLoginModule** module name to the comma-separated list like below.
+
+ -Dorg.apache.kafka.disallowed.login.modules=com.sun.security.auth.module.JndiLoginModule,com.ibm.security.auth.module.LdapLoginModule,com.ibm.security.auth.module.Krb5LoginModule
+
+Since:| 3.4.0
+---|---
+Default Value:| com.sun.security.auth.module.JndiLoginModule
+ * #### org.apache.kafka.automatic.config.providers
+
+This system property controls the automatic loading of ConfigProvider implementations in Apache Kafka. ConfigProviders are used to dynamically supply configuration values from sources such as files, directories, or environment variables. This property accepts a comma-separated list of ConfigProvider names. By default, all built-in ConfigProviders are enabled, including **FileConfigProvider** , **DirectoryConfigProvider** , and **EnvVarConfigProvider**.
+
+If users want to disable all automatic ConfigProviders, they need to explicitly set the system property as shown below. Disabling automatic ConfigProviders is recommended in environments where configuration data comes from untrusted sources or where increased security is required. For more details, see [CVE-2024-31141](/community/cve-list/#CVE-2024-31141).
+
+ -Dorg.apache.kafka.automatic.config.providers=none
+
+To allow specific ConfigProviders, update the system property with a comma-separated list of fully qualified ConfigProvider class names. For example, to enable only the **EnvVarConfigProvider** , set the property as follows:
+
+ -Dorg.apache.kafka.automatic.config.providers=org.apache.kafka.common.config.provider.EnvVarConfigProvider
+
+To use multiple ConfigProviders, include their names in a comma-separated list as shown below:
+
+ -Dorg.apache.kafka.automatic.config.providers=org.apache.kafka.common.config.provider.FileConfigProvider,org.apache.kafka.common.config.provider.EnvVarConfigProvider
+
+Since:| 3.8.0
+---|---
+Default Value:| All built-in ConfigProviders are enabled
+
+
+
+# Tiered Storage Configs
+
+Below is the Tiered Storage configuration. {{< include-html file="/static/41/generated/remote_log_manager_config.html" >}} {{< include-html file="/static/41/generated/remote_log_metadata_manager_config.html" >}}
+
+# Configuration Providers
+
+Use configuration providers to load configuration data from external sources. This might include sensitive information, such as passwords, API keys, or other credentials.
+
+You have the following options:
+
+ * Use a custom provider by creating a class implementing the [`ConfigProvider`](/41/javadoc/org/apache/kafka/common/config/provider/ConfigProvider.html) interface and packaging it into a JAR file.
+ * Use a built-in provider:
+ * [`DirectoryConfigProvider`](/41/javadoc/org/apache/kafka/common/config/provider/DirectoryConfigProvider.html)
+ * [`EnvVarConfigProvider`](/41/javadoc/org/apache/kafka/common/config/provider/EnvVarConfigProvider.html)
+ * [`FileConfigProvider`](/41/javadoc/org/apache/kafka/common/config/provider/FileConfigProvider.html)
+
+
+
+To use a configuration provider, specify it in your configuration using the `config.providers` property.
+
+## Using Configuration Providers
+
+Configuration providers allow you to pass parameters and retrieve configuration data from various sources.
+
+To specify configuration providers, you use a comma-separated list of aliases and the fully-qualified class names that implement the configuration providers:
+
+
+ config.providers=provider1,provider2
+ config.providers.provider1.class=com.example.Provider1
+ config.providers.provider2.class=com.example.Provider2
+
+Each provider can have its own set of parameters, which are passed in a specific format:
+
+
+ config.providers..param.=
+
+The `ConfigProvider` interface serves as a base for all configuration providers. Custom implementations of this interface can be created to retrieve configuration data from various sources. You can package the implementation as a JAR file, add the JAR to your classpath, and reference the provider's class in your configuration.
+
+**Example custom provider configuration**
+
+
+ config.providers=customProvider
+ config.providers.customProvider.class=com.example.customProvider
+ config.providers.customProvider.param.param1=value1
+ config.providers.customProvider.param.param2=value2
+
+## DirectoryConfigProvider
+
+The `DirectoryConfigProvider` retrieves configuration data from files stored in a specified directory.
+
+Each file represents a key, and its content is the value. This provider is useful for loading multiple configuration files and for organizing configuration data into separate files.
+
+To restrict the files that the `DirectoryConfigProvider` can access, use the `allowed.paths` parameter. This parameter accepts a comma-separated list of paths that the provider is allowed to access. If not set, all paths are allowed.
+
+**Example`DirectoryConfigProvider` configuration**
+
+
+ config.providers=dirProvider
+ config.providers.dirProvider.class=org.apache.kafka.common.config.provider.DirectoryConfigProvider
+ config.providers.dirProvider.param.allowed.paths=/path/to/dir1,/path/to/dir2
+
+To reference a value supplied by the `DirectoryConfigProvider`, use the correct placeholder syntax:
+
+
+ ${dirProvider::}
+
+## EnvVarConfigProvider
+
+The `EnvVarConfigProvider` retrieves configuration data from environment variables.
+
+No specific parameters are required, as it reads directly from the specified environment variables.
+
+This provider is useful for configuring applications running in containers, for example, to load certificates or JAAS configuration from environment variables mapped from secrets.
+
+To restrict which environment variables the `EnvVarConfigProvider` can access, use the `allowlist.pattern` parameter. This parameter accepts a regular expression that environment variable names must match to be used by the provider.
+
+**Example`EnvVarConfigProvider` configuration**
+
+
+ config.providers=envVarProvider
+ config.providers.envVarProvider.class=org.apache.kafka.common.config.provider.EnvVarConfigProvider
+ config.providers.envVarProvider.param.allowlist.pattern=^MY_ENVAR1_.*
+
+To reference a value supplied by the `EnvVarConfigProvider`, use the correct placeholder syntax:
+
+
+ ${envVarProvider:}
+
+## FileConfigProvider
+
+The `FileConfigProvider` retrieves configuration data from a single properties file.
+
+This provider is useful for loading configuration data from mounted files.
+
+To restrict the file paths that the `FileConfigProvider` can access, use the `allowed.paths` parameter. This parameter accepts a comma-separated list of paths that the provider is allowed to access. If not set, all paths are allowed.
+
+**Example`FileConfigProvider` configuration**
+
+
+ config.providers=fileProvider
+ config.providers.fileProvider.class=org.apache.kafka.common.config.provider.FileConfigProvider
+ config.providers.fileProvider.param.allowed.paths=/path/to/config1,/path/to/config2
+
+To reference a value supplied by the `FileConfigProvider`, use the correct placeholder syntax:
+
+
+ ${fileProvider::}
+
+## Example: Referencing files
+
+Here’s an example that uses a file configuration provider with Kafka Connect to provide authentication credentials to a database for a connector.
+
+First, create a `connector-credentials.properties` configuration file with the following credentials:
+
+
+ dbUsername=my-username
+ dbPassword=my-password
+
+Specify a `FileConfigProvider` in the Kafka Connect configuration:
+
+**Example Kafka Connect configuration with a`FileConfigProvider`**
+
+
+ config.providers=fileProvider
+ config.providers.fileProvider.class=org.apache.kafka.common.config.provider.FileConfigProvider
+
+Next, reference the properties from the file in the connector configuration.
+
+**Example connector configuration referencing file properties**
+
+
+ database.user=${fileProvider:/path/to/connector-credentials.properties:dbUsername}
+ database.password=${fileProvider:/path/to/connector-credentials.properties:dbPassword}
+
+At runtime, the configuration provider reads and extracts the values from the properties file.
diff --git a/content/en/41/design/_index.md b/content/en/41/design/_index.md
new file mode 100644
index 000000000..45615eb06
--- /dev/null
+++ b/content/en/41/design/_index.md
@@ -0,0 +1,10 @@
+---
+title: Design
+description:
+weight: 4
+tags: ['kafka', 'docs', 'design']
+aliases:
+keywords:
+type: docs
+---
+
diff --git a/content/en/41/design/design.md b/content/en/41/design/design.md
new file mode 100644
index 000000000..988bdb408
--- /dev/null
+++ b/content/en/41/design/design.md
@@ -0,0 +1,488 @@
+---
+title: Design
+description:
+weight: 1
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Motivation
+
+We designed Kafka to be able to act as a unified platform for handling all the real-time data feeds a large company might have. To do this we had to think through a fairly broad set of use cases.
+
+It would have to have high-throughput to support high volume event streams such as real-time log aggregation.
+
+It would need to deal gracefully with large data backlogs to be able to support periodic data loads from offline systems.
+
+It also meant the system would have to handle low-latency delivery to handle more traditional messaging use-cases.
+
+We wanted to support partitioned, distributed, real-time processing of these feeds to create new, derived feeds. This motivated our partitioning and consumer model.
+
+Finally in cases where the stream is fed into other data systems for serving, we knew the system would have to be able to guarantee fault-tolerance in the presence of machine failures.
+
+Supporting these uses led us to a design with a number of unique elements, more akin to a database log than a traditional messaging system. We will outline some elements of the design in the following sections.
+
+# Persistence
+
+## Don't fear the filesystem!
+
+Kafka relies heavily on the filesystem for storing and caching messages. There is a general perception that "disks are slow" which makes people skeptical that a persistent structure can offer competitive performance. In fact disks are both much slower and much faster than people expect depending on how they are used; and a properly designed disk structure can often be as fast as the network.
+
+The key fact about disk performance is that the throughput of hard drives has been diverging from the latency of a disk seek for the last decade. As a result the performance of linear writes on a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) configuration with six 7200rpm SATA RAID-5 array is about 600MB/sec but the performance of random writes is only about 100k/sec--a difference of over 6000X. These linear reads and writes are the most predictable of all usage patterns, and are heavily optimized by the operating system. A modern operating system provides read-ahead and write-behind techniques that prefetch data in large block multiples and group smaller logical writes into large physical writes. A further discussion of this issue can be found in this [ACM Queue article](https://queue.acm.org/detail.cfm?id=1563874); they actually find that [sequential disk access can in some cases be faster than random memory access!](https://deliveryimages.acm.org/10.1145/1570000/1563874/jacobs3.jpg)
+
+To compensate for this performance divergence, modern operating systems have become increasingly aggressive in their use of main memory for disk caching. A modern OS will happily divert _all_ free memory to disk caching with little performance penalty when the memory is reclaimed. All disk reads and writes will go through this unified cache. This feature cannot easily be turned off without using direct I/O, so even if a process maintains an in-process cache of the data, this data will likely be duplicated in OS pagecache, effectively storing everything twice.
+
+Furthermore, we are building on top of the JVM, and anyone who has spent any time with Java memory usage knows two things:
+
+ 1. The memory overhead of objects is very high, often doubling the size of the data stored (or worse).
+ 2. Java garbage collection becomes increasingly fiddly and slow as the in-heap data increases.
+
+
+
+As a result of these factors using the filesystem and relying on pagecache is superior to maintaining an in-memory cache or other structure--we at least double the available cache by having automatic access to all free memory, and likely double again by storing a compact byte structure rather than individual objects. Doing so will result in a cache of up to 28-30GB on a 32GB machine without GC penalties. Furthermore, this cache will stay warm even if the service is restarted, whereas the in-process cache will need to be rebuilt in memory (which for a 10GB cache may take 10 minutes) or else it will need to start with a completely cold cache (which likely means terrible initial performance). This also greatly simplifies the code as all logic for maintaining coherency between the cache and filesystem is now in the OS, which tends to do so more efficiently and more correctly than one-off in-process attempts. If your disk usage favors linear reads then read-ahead is effectively pre-populating this cache with useful data on each disk read.
+
+This suggests a design which is very simple: rather than maintain as much as possible in-memory and flush it all out to the filesystem in a panic when we run out of space, we invert that. All data is immediately written to a persistent log on the filesystem without necessarily flushing to disk. In effect this just means that it is transferred into the kernel's pagecache.
+
+This style of pagecache-centric design is described in an [article](https://varnish-cache.org/wiki/ArchitectNotes) on the design of Varnish here (along with a healthy dose of arrogance).
+
+## Constant Time Suffices
+
+The persistent data structure used in messaging systems are often a per-consumer queue with an associated BTree or other general-purpose random access data structures to maintain metadata about messages. BTrees are the most versatile data structure available, and make it possible to support a wide variety of transactional and non-transactional semantics in the messaging system. They do come with a fairly high cost, though: Btree operations are O(log N). Normally O(log N) is considered essentially equivalent to constant time, but this is not true for disk operations. Disk seeks come at 10 ms a pop, and each disk can do only one seek at a time so parallelism is limited. Hence even a handful of disk seeks leads to very high overhead. Since storage systems mix very fast cached operations with very slow physical disk operations, the observed performance of tree structures is often superlinear as data increases with fixed cache--i.e. doubling your data makes things much worse than twice as slow.
+
+Intuitively a persistent queue could be built on simple reads and appends to files as is commonly the case with logging solutions. This structure has the advantage that all operations are O(1) and reads do not block writes or each other. This has obvious performance advantages since the performance is completely decoupled from the data size--one server can now take full advantage of a number of cheap, low-rotational speed 1+TB SATA drives. Though they have poor seek performance, these drives have acceptable performance for large reads and writes and come at 1/3 the price and 3x the capacity.
+
+Having access to virtually unlimited disk space without any performance penalty means that we can provide some features not usually found in a messaging system. For example, in Kafka, instead of attempting to delete messages as soon as they are consumed, we can retain messages for a relatively long period (say a week). This leads to a great deal of flexibility for consumers, as we will describe.
+
+# Efficiency
+
+We have put significant effort into efficiency. One of our primary use cases is handling web activity data, which is very high volume: each page view may generate dozens of writes. Furthermore, we assume each message published is read by at least one consumer (often many), hence we strive to make consumption as cheap as possible.
+
+We have also found, from experience building and running a number of similar systems, that efficiency is a key to effective multi-tenant operations. If the downstream infrastructure service can easily become a bottleneck due to a small bump in usage by the application, such small changes will often create problems. By being very fast we help ensure that the application will tip-over under load before the infrastructure. This is particularly important when trying to run a centralized service that supports dozens or hundreds of applications on a centralized cluster as changes in usage patterns are a near-daily occurrence.
+
+We discussed disk efficiency in the previous section. Once poor disk access patterns have been eliminated, there are two common causes of inefficiency in this type of system: too many small I/O operations, and excessive byte copying.
+
+The small I/O problem happens both between the client and the server and in the server's own persistent operations.
+
+To avoid this, our protocol is built around a "message set" abstraction that naturally groups messages together. This allows network requests to group messages together and amortize the overhead of the network roundtrip rather than sending a single message at a time. The server in turn appends chunks of messages to its log in one go, and the consumer fetches large linear chunks at a time.
+
+This simple optimization produces orders of magnitude speed up. Batching leads to larger network packets, larger sequential disk operations, contiguous memory blocks, and so on, all of which allows Kafka to turn a bursty stream of random message writes into linear writes that flow to the consumers.
+
+The other inefficiency is in byte copying. At low message rates this is not an issue, but under load the impact is significant. To avoid this we employ a standardized binary message format that is shared by the producer, the broker, and the consumer (so data chunks can be transferred without modification between them).
+
+The message log maintained by the broker is itself just a directory of files, each populated by a sequence of message sets that have been written to disk in the same format used by the producer and consumer. Maintaining this common format allows optimization of the most important operation: network transfer of persistent log chunks. Modern unix operating systems offer a highly optimized code path for transferring data out of pagecache to a socket; in Linux this is done with the [sendfile system call](https://man7.org/linux/man-pages/man2/sendfile.2.html).
+
+To understand the impact of sendfile, it is important to understand the common data path for transfer of data from file to socket:
+
+ 1. The operating system reads data from the disk into pagecache in kernel space
+ 2. The application reads the data from kernel space into a user-space buffer
+ 3. The application writes the data back into kernel space into a socket buffer
+ 4. The operating system copies the data from the socket buffer to the NIC buffer where it is sent over the network
+
+
+
+This is clearly inefficient, there are four copies and two system calls. Using sendfile, this re-copying is avoided by allowing the OS to send the data from pagecache to the network directly. So in this optimized path, only the final copy to the NIC buffer is needed.
+
+We expect a common use case to be multiple consumers on a topic. Using the zero-copy optimization above, data is copied into pagecache exactly once and reused on each consumption instead of being stored in memory and copied out to user-space every time it is read. This allows messages to be consumed at a rate that approaches the limit of the network connection.
+
+This combination of pagecache and sendfile means that on a Kafka cluster where the consumers are mostly caught up you will see no read activity on the disks whatsoever as they will be serving data entirely from cache.
+
+TLS/SSL libraries operate at the user space (in-kernel `SSL_sendfile` is currently not supported by Kafka). Due to this restriction, `sendfile` is not used when SSL is enabled. For enabling SSL configuration, refer to `security.protocol` and `security.inter.broker.protocol`
+
+For more background on the sendfile and zero-copy support in Java, see this [article](https://developer.ibm.com/articles/j-zerocopy/).
+
+## End-to-end Batch Compression
+
+In some cases the bottleneck is actually not CPU or disk but network bandwidth. This is particularly true for a data pipeline that needs to send messages between data centers over a wide-area network. Of course, the user can always compress its messages one at a time without any support needed from Kafka, but this can lead to very poor compression ratios as much of the redundancy is due to repetition between messages of the same type (e.g. field names in JSON or user agents in web logs or common string values). Efficient compression requires compressing multiple messages together rather than compressing each message individually.
+
+Kafka supports this with an efficient batching format. A batch of messages can be grouped together, compressed, and sent to the server in this form. The broker decompresses the batch in order to validate it. For example, it validates that the number of records in the batch is same as what batch header states. This batch of messages is then written to disk in compressed form. The batch will remain compressed in the log and it will also be transmitted to the consumer in compressed form. The consumer decompresses any compressed data that it receives.
+
+Kafka supports GZIP, Snappy, LZ4 and ZStandard compression protocols. More details on compression can be found [here](https://cwiki.apache.org/confluence/x/S5qoAQ).
+
+# The Producer
+
+## Load balancing
+
+The producer sends data directly to the broker that is the leader for the partition without any intervening routing tier. To help the producer do this all Kafka nodes can answer a request for metadata about which servers are alive and where the leaders for the partitions of a topic are at any given time to allow the producer to appropriately direct its requests.
+
+The client controls which partition it publishes messages to. This can be done at random, implementing a kind of random load balancing, or it can be done by some semantic partitioning function. We expose the interface for semantic partitioning by allowing the user to specify a key to partition by and using this to hash to a partition (there is also an option to override the partition function if need be). For example if the key chosen was a user id then all data for a given user would be sent to the same partition. This in turn will allow consumers to make locality assumptions about their consumption. This style of partitioning is explicitly designed to allow locality-sensitive processing in consumers.
+
+## Asynchronous send
+
+Batching is one of the big drivers of efficiency, and to enable batching the Kafka producer will attempt to accumulate data in memory and to send out larger batches in a single request. The batching can be configured to accumulate no more than a fixed number of messages and to wait no longer than some fixed latency bound (say 64k or 10 ms). This allows the accumulation of more bytes to send, and few larger I/O operations on the servers. This buffering is configurable and gives a mechanism to trade off a small amount of additional latency for better throughput.
+
+Details on configuration and the api for the producer can be found elsewhere in the documentation.
+
+# The Consumer
+
+The Kafka consumer works by issuing "fetch" requests to the brokers leading the partitions it wants to consume. The consumer specifies its offset in the log with each request and receives back a chunk of log beginning from that position. The consumer thus has significant control over this position and can rewind it to re-consume data if need be.
+
+## Push vs. pull
+
+An initial question we considered is whether consumers should pull data from brokers or brokers should push data to the consumer. In this respect Kafka follows a more traditional design, shared by most messaging systems, where data is pushed to the broker from the producer and pulled from the broker by the consumer. Some logging-centric systems, such as [Scribe](https://github.com/facebook/scribe) and [Apache Flume](https://flume.apache.org/), follow a very different push-based path where data is pushed downstream. There are pros and cons to both approaches. However, a push-based system has difficulty dealing with diverse consumers as the broker controls the rate at which data is transferred. The goal is generally for the consumer to be able to consume at the maximum possible rate; unfortunately, in a push system this means the consumer tends to be overwhelmed when its rate of consumption falls below the rate of production (a denial of service attack, in essence). A pull-based system has the nicer property that the consumer simply falls behind and catches up when it can. This can be mitigated with some kind of backoff protocol by which the consumer can indicate it is overwhelmed, but getting the rate of transfer to fully utilize (but never over-utilize) the consumer is trickier than it seems. Previous attempts at building systems in this fashion led us to go with a more traditional pull model.
+
+Another advantage of a pull-based system is that it lends itself to aggressive batching of data sent to the consumer. A push-based system must choose to either send a request immediately or accumulate more data and then send it later without knowledge of whether the downstream consumer will be able to immediately process it. If tuned for low latency, this will result in sending a single message at a time only for the transfer to end up being buffered anyway, which is wasteful. A pull-based design fixes this as the consumer always pulls all available messages after its current position in the log (or up to some configurable max size). So one gets optimal batching without introducing unnecessary latency.
+
+The deficiency of a naive pull-based system is that if the broker has no data the consumer may end up polling in a tight loop, effectively busy-waiting for data to arrive. To avoid this we have parameters in our pull request that allow the consumer request to block in a "long poll" waiting until data arrives (and optionally waiting until a given number of bytes is available to ensure large transfer sizes).
+
+You could imagine other possible designs which would be only pull, end-to-end. The producer would locally write to a local log, and brokers would pull from that with consumers pulling from them. A similar type of "store-and-forward" producer is often proposed. This is intriguing but we felt not very suitable for our target use cases which have thousands of producers. Our experience running persistent data systems at scale led us to feel that involving thousands of disks in the system across many applications would not actually make things more reliable and would be a nightmare to operate. And in practice we have found that we can run a pipeline with strong SLAs at large scale without a need for producer persistence.
+
+## Consumer Position
+
+Keeping track of _what_ has been consumed is, surprisingly, one of the key performance points of a messaging system.
+
+Most messaging systems keep metadata about what messages have been consumed on the broker. That is, as a message is handed out to a consumer, the broker either records that fact locally immediately or it may wait for acknowledgement from the consumer. This is a fairly intuitive choice, and indeed for a single machine server it is not clear where else this state could go. Since the data structures used for storage in many messaging systems scale poorly, this is also a pragmatic choice--since the broker knows what is consumed it can immediately delete it, keeping the data size small.
+
+What is perhaps not obvious is that getting the broker and consumer to come into agreement about what has been consumed is not a trivial problem. If the broker records a message as **consumed** immediately every time it is handed out over the network, then if the consumer fails to process the message (say because it crashes or the request times out or whatever) that message will be lost. To solve this problem, many messaging systems add an acknowledgement feature which means that messages are only marked as **sent** not **consumed** when they are sent; the broker waits for a specific acknowledgement from the consumer to record the message as **consumed**. This strategy fixes the problem of losing messages, but creates new problems. First of all, if the consumer processes the message but fails before it can send an acknowledgement then the message will be consumed twice. The second problem is around performance, now the broker must keep multiple states about every single message (first to lock it so it is not given out a second time, and then to mark it as permanently consumed so that it can be removed). Tricky problems must be dealt with, like what to do with messages that are sent but never acknowledged.
+
+Kafka handles this differently. Our topic is divided into a set of totally ordered partitions, each of which is consumed by exactly one consumer within each subscribing consumer group at any given time. This means that the position of a consumer in each partition is just a single integer, the offset of the next message to consume. This makes the state about what has been consumed very small, just one number for each partition. This state can be periodically checkpointed. This makes the equivalent of message acknowledgements very cheap.
+
+There is a side benefit of this decision. A consumer can deliberately _rewind_ back to an old offset and re-consume data. This violates the common contract of a queue, but turns out to be an essential feature for many consumers. For example, if the consumer code has a bug and is discovered after some messages are consumed, the consumer can re-consume those messages once the bug is fixed.
+
+## Offline Data Load
+
+Scalable persistence allows for the possibility of consumers that only periodically consume such as batch data loads that periodically bulk-load data into an offline system such as Hadoop or a relational data warehouse.
+
+In the case of Hadoop we parallelize the data load by splitting the load over individual map tasks, one for each node/topic/partition combination, allowing full parallelism in the loading. Hadoop provides the task management, and tasks which fail can restart without danger of duplicate data--they simply restart from their original position.
+
+## Static Membership
+
+Static membership aims to improve the availability of stream applications, consumer groups and other applications built on top of the group rebalance protocol. The rebalance protocol relies on the group coordinator to allocate entity ids to group members. These generated ids are ephemeral and will change when members restart and rejoin. For consumer based apps, this "dynamic membership" can cause a large percentage of tasks re-assigned to different instances during administrative operations such as code deploys, configuration updates and periodic restarts. For large state applications, shuffled tasks need a long time to recover their local states before processing and cause applications to be partially or entirely unavailable. Motivated by this observation, Kafka’s group management protocol allows group members to provide persistent entity ids. Group membership remains unchanged based on those ids, thus no rebalance will be triggered.
+
+If you want to use static membership,
+
+ * Upgrade both broker cluster and client apps to 2.3 or beyond, and also make sure the upgraded brokers are using `inter.broker.protocol.version` of 2.3 or beyond as well.
+ * Set the config `ConsumerConfig#GROUP_INSTANCE_ID_CONFIG` to a unique value for each consumer instance under one group.
+ * For Kafka Streams applications, it is sufficient to set a unique `ConsumerConfig#GROUP_INSTANCE_ID_CONFIG` per KafkaStreams instance, independent of the number of used threads for an instance.
+
+If your broker is on an older version than 2.3, but you choose to set `ConsumerConfig#GROUP_INSTANCE_ID_CONFIG` on the client side, the application will detect the broker version and then throws an UnsupportedException. If you accidentally configure duplicate ids for different instances, a fencing mechanism on broker side will inform your duplicate client to shutdown immediately by triggering a `org.apache.kafka.common.errors.FencedInstanceIdException`. For more details, see [KIP-345](https://cwiki.apache.org/confluence/x/kRg0BQ)
+
+# Message Delivery Semantics
+
+Now that we understand a little about how producers and consumers work, let's discuss the semantic guarantees Kafka provides between producer and consumer. Clearly there are multiple possible message delivery guarantees that could be provided:
+
+ * _At most once_ --Messages may be lost but are never redelivered.
+ * _At least once_ --Messages are never lost but may be redelivered.
+ * _Exactly once_ --Each message is processed once and only once.
+
+It's worth noting that this breaks down into two problems: the durability guarantees for publishing a message and the guarantees when consuming a message.
+
+Many systems claim to provide "exactly-once" delivery semantics, but it is important to read the fine print, because sometimes these claims are misleading (i.e. they don't translate to the case where consumers or producers can fail, cases where there are multiple consumer processes, or cases where data written to disk can be lost).
+
+Kafka's semantics are straightforward. When publishing a message we have a notion of the message being "committed" to the log. A message is considered committed only when all replicas in the in-sync replicas (ISR) for that partition have applied it to their log. Once a published message is committed, it will not be lost as long as one broker that replicates the partition to which this message was written remains "alive". The definition of committed message and alive partition as well as a description of which types of failures we attempt to handle will be described in more detail in the next section. For now let's assume a perfect, lossless broker and try to understand the guarantees to the producer and consumer. If a producer attempts to publish a message and experiences a network error, it cannot be sure if this error happened before or after the message was committed. This is similar to the semantics of inserting into a database table with an autogenerated key.
+
+Prior to 0.11.0.0, if a producer failed to receive a response indicating that a message was committed, it had little choice but to resend the message. This provides at-least-once delivery semantics since the message may be written to the log again during resending if the original request had in fact succeeded. Since 0.11.0.0, the Kafka producer also supports an idempotent delivery option which guarantees that resending will not result in duplicate entries in the log. To achieve this, the broker assigns each producer an ID and deduplicates messages using a sequence number that is sent by the producer along with every message. Also beginning with 0.11.0.0, the producer supports the ability to send messages atomically to multiple topic partitions using transactions, so that either all messages are successfully written or none of them are.
+
+Not all use cases require such strong guarantees. For use cases which are latency-sensitive, we allow the producer to specify the durability level it desires. If the producer specifies that it wants to wait on the message being committed, this can take on the order of 10 ms. However the producer can also specify that it wants to perform the send completely asynchronously or that it wants to wait only until the leader (but not necessarily the followers) have the message.
+
+Now let's describe the semantics from the point of view of the consumer. All replicas have the exact same log with the same offsets. The consumer controls its position in this log. If the consumer never crashed it could just store this position in memory, but if the consumer fails and we want this topic partition to be taken over by another process, the new process will need to choose an appropriate position from which to start processing. Let's say the consumer reads some messages -- it has several options for processing the messages and updating its position.
+
+ 1. It can read the messages, then save its position in the log, and finally process the messages. In this case there is a possibility that the consumer process crashes after saving its position but before saving the output of its message processing. In this case the process that took over processing would start at the saved position even though a few messages prior to that position had not been processed. This corresponds to "at-most-once" semantics as in the case of a consumer failure messages may not be processed.
+ 2. It can read the messages, process the messages, and finally save its position. In this case there is a possibility that the consumer process crashes after processing messages but before saving its position. In this case when the new process takes over the first few messages it receives will already have been processed. This corresponds to the "at-least-once" semantics in the case of consumer failure. In many cases messages have a primary key and so the updates are idempotent (receiving the same message twice just overwrites a record with another copy of itself).
+
+
+So what about exactly-once semantics? When consuming from a Kafka topic and producing to another topic (as in a [Kafka Streams](https://kafka.apache.org/streams) application), we can leverage the new transactional producer capabilities in 0.11.0.0 that were mentioned above. The consumer's position is stored as a message in an internal topic, so we can write the offset to Kafka in the same transaction as the output topics receiving the processed data. If the transaction is aborted, the consumer's stored position will revert to its old value (although the consumer has to refetch the committed offset because it does not automatically rewind) and the produced data on the output topics will not be visible to other consumers, depending on their "isolation level". In the default "read_uncommitted" isolation level, all messages are visible to consumers even if they were part of an aborted transaction, but in "read_committed" isolation level, the consumer will only return messages from transactions which were committed (and any messages which were not part of a transaction).
+
+When writing to an external system, the limitation is in the need to coordinate the consumer's position with what is actually stored as output. The classic way of achieving this would be to introduce a two-phase commit between the storage of the consumer position and the storage of the consumers output. This can be handled more simply and generally by letting the consumer store its offset in the same place as its output. This is better because many of the output systems a consumer might want to write to will not support a two-phase commit. As an example of this, consider a [Kafka Connect](https://kafka.apache.org/#connect) connector which populates data in HDFS along with the offsets of the data it reads so that it is guaranteed that either data and offsets are both updated or neither is. We follow similar patterns for many other data systems which require these stronger semantics and for which the messages do not have a primary key to allow for deduplication.
+
+As a result, Kafka supports exactly-once delivery in [Kafka Streams](https://kafka.apache.org/streams), and the transactional producer and the consumer using read-committed isolation level can be used generally to provide exactly-once delivery when reading, processing and writing data on Kafka topics. Exactly-once delivery for other destination systems generally requires cooperation with such systems, but Kafka provides the primitives which makes implementing this feasible (see also [Kafka Connect](https://kafka.apache.org/#connect)). Otherwise, Kafka guarantees at-least-once delivery by default, and allows the user to implement at-most-once delivery by disabling retries on the producer and committing offsets in the consumer prior to processing a batch of messages.
+
+# Using Transactions
+
+As mentioned above, the simplest way to get exactly-once semantics from Kafka is to use [Kafka Streams](https://kafka.apache.org/streams). However, it is also possible to achieve the same transactional guarantees using the Kafka producer and consumer directly by using them in the same way as Kafka Streams does.
+
+Kafka transactions are a bit different from transactions in other messaging systems. In Kafka, the consumer and producer are separate, and it is only the producer which is transactional. It is however able to make transactional updates to the consumer's position (confusingly called the "committed offset"), and it is this which gives the overall exactly-once behavior.
+
+There are three key aspects to exactly-once processing using the producer and consumer, which match how Kafka Streams works.
+
+ 1. The consumer uses partition assignment to ensure that it is the only consumer in the consumer group currently processing each partition.
+ 2. The producer uses transactions so that all the records it produces, and any offsets it updates on behalf of the consumer, are performed atomically.
+ 3. In order to handle transactions properly in combination with rebalancing, it is advisable to use one producer instance for each consumer instance. More complicated and efficient schemes are possible, but at the cost of greater complexity.
+
+
+
+In addition, it is generally considered a good practice to use the read-committed isolation level if trying to achieve exactly-once processing. Strictly speaking, the consumer doesn't have to use read-committed isolation level, but if it does not, it will see records from aborted transactions and also open transactions which have not yet completed.
+
+The consumer configuration must include `isolation.level=read_committed` and `enable.auto.commit=false`. The producer configuration must set `transactional.id` to the name of the transactional ID to be used, which configures the producer for transactional delivery and also makes sure that a restarted application causes any in-flight transaction from the previous instance to abort. Only the producer has the `transactional.id` configuration.
+
+Here's an example of a [transactional message copier](https://github.com/apache/kafka/blob/trunk/tools/src/main/java/org/apache/kafka/tools/TransactionalMessageCopier.java) which uses these principles. It uses a `KafkaConsumer` to consume records from one topic and a `KafkaProducer` to produce records to another topic. It uses transactions to ensure that there is no duplication or loss of records as they are copied, provided that the `--use-group-metadata` option is set.
+
+It is important to handle exceptions and aborted transactions correctly. Any records written by the transactional producer will be marked as being part of the transactions, and then when the transaction commits or aborts, transaction marker records are written to indicate the outcome of the transaction. This is how the read-committed consumer does not see records from aborted transactions. However, in the event of a transaction abort, the application's state and in particular the current position of the consumer must be reset explicitly so that it can reprocess the records processed by the aborted transaction.
+
+The error handling for transactional producer has been standardized which ensures consistent behavior and clearer error handling patterns. The exception categories are now more precisely defined:
+
+ 1. **RetriableException** : Temporary exceptions that are retried automatically by the client. These are handled internally and don't bubble up to the application.
+ 2. **RefreshRetriableException** : Exceptions requiring metadata refresh before retry. These are handled internally by the client after refreshing metadata and don't bubble up to the application.
+ 3. **AbortableException** : Exceptions that require transaction abort and reprocessing. These bubble up to the application, which must handle them by aborting the transaction and resetting the consumer position.
+ 4. **ApplicationRecoverableException** : Exceptions that bubble up to the application and require application handling. The application must implement its own recovery strategy, which must include restarting the producer.
+ 5. **InvalidConfigurationException** : Configuration-related exceptions that bubble up to the application and require application handling. The producer doesn't need to restart, but the application may choose to restart it.
+ 6. **KafkaException** : General Kafka exceptions that don't fit into the above categories. These bubble up to the application for handling.
+
+
+
+Example template code for handling transaction exceptions link : [ Transaction Client Demo](https://github.com/apache/kafka/blob/trunk/examples/src/main/java/kafka/examples/TransactionalClientDemo.java)
+
+A simple policy for handling exceptions and aborted transactions is to discard and recreate the Kafka producer and consumer objects and start afresh. As part of recreating the consumer, the consumer group will rebalance and fetch the last committed offset, which has the effect of rewinding back to the state before the transaction aborted. Alternatively, a more sophisticated application (such as the transactional message copier) can choose not to use `KafkaConsumer.committed` to retrieve the committed offset from Kafka, and then `KafkaConsumer.seek` to rewind the current position.
+
+# Share groups
+
+Share groups are available as a preview in Apache Kafka 4.1.
+
+Share groups are a new type of group, existing alongside traditional consumer groups. Share groups enable Kafka consumers to cooperatively consume and process records from topics. They offer an alternative to traditional consumer groups, particularly when applications require finer-grained sharing of partitions and records.
+
+The fundamental differences between a share group and a consumer group are:
+
+ * The consumers within a share group cooperatively consume records, and partitions may be assigned to multiple consumers.
+ * The number of consumers in a share group can exceed the number of partitions in a topic.
+ * Records are acknowledged individually, though the system is optimized for batch processing to improve efficiency.
+ * Delivery attempts to consumers in a share group are counted, which enables automated handling of unprocessable records.
+
+
+
+All consumers in the same share group subscribed to the same topic will cooperatively consume the records of that topic. If a topic is accessed by consumers in multiple share groups, each share group consumes from that topic independently of the others.
+
+Each consumer can dynamically set its list of subscribed topics. In practice, all consumers within a share group typically subscribe to the same topic or topics.
+
+When a consumer in a share-group fetches records, it receives available records from any of the topic-partitions matching its subscriptions. Records are acquired for delivery to this consumer with a time-limited acquisition lock. While a record is acquired, it is unavailable to other consumers.
+
+By default, the lock duration is 30 seconds, but you can control it using the group configuration parameter `share.record.lock.duration.ms`. The lock is released automatically once its duration elapses, making the record available to another consumer. A consumer holding the lock can handle the record in the following ways:
+
+ * Acknowledge successful processing of the record.
+ * Release the record, making it available for another delivery attempt.
+ * Reject the record, indicating it's unprocessable and preventing further delivery attempts for that record.
+ * Do nothing, in which case the lock is automatically released when its duration expires.
+
+
+
+The Kafka cluster limits the number of records acquired for consumers for each topic-partition within a share group. Once this limit is reached, fetching operations will temporarily yield no further records until the number of acquired records decreases (as locks naturally time out). This limit is controlled by the broker configuration property `group.share.partition.max.record.locks`. By limiting the duration of the acquisition lock and automatically releasing the locks, the broker ensures delivery progresses even in the presence of consumer failures.
+
+# Replication
+
+Kafka replicates the log for each topic's partitions across a configurable number of servers (you can set this replication factor on a topic-by-topic basis). This allows automatic failover to these replicas when a server in the cluster fails so messages remain available in the presence of failures.
+
+Other messaging systems provide some replication-related features, but, in our (totally biased) opinion, this appears to be a tacked-on thing, not heavily used, and with large downsides: replicas are inactive, throughput is heavily impacted, it requires fiddly manual configuration, etc. Kafka is meant to be used with replication by default--in fact we implement un-replicated topics as replicated topics where the replication factor is one.
+
+The unit of replication is the topic partition. Under non-failure conditions, each partition in Kafka has a single leader and zero or more followers. The total number of replicas including the leader constitute the replication factor. All writes go to the leader of the partition, and reads can go to the leader or the followers of the partition. Typically, there are many more partitions than brokers and the leaders are evenly distributed among brokers. The logs on the followers are identical to the leader's log--all have the same offsets and messages in the same order (though, of course, at any given time the leader may have a few as-yet unreplicated messages at the end of its log).
+
+Followers consume messages from the leader just as a normal Kafka consumer would and apply them to their own log. Having the followers pull from the leader has the nice property of allowing the follower to naturally batch together log entries they are applying to their log.
+
+As with most distributed systems, automatically handling failures requires a precise definition of what it means for a node to be "alive." In Kafka, a special node known as the "controller" is responsible for managing the registration of brokers in the cluster. Broker liveness has two conditions:
+
+ 1. Brokers must maintain an active session with the controller in order to receive regular metadata updates.
+ 2. Brokers acting as followers must replicate the writes from the leader and not fall "too far" behind.
+
+
+
+What is meant by an "active session" depends on the cluster configuration. For KRaft clusters, an active session is maintained by sending periodic heartbeats to the controller. If the controller fails to receive a heartbeat before the timeout configured by `broker.session.timeout.ms` expires, then the node is considered offline.
+
+We refer to nodes satisfying these two conditions as being "in sync" to avoid the vagueness of "alive" or "failed". The leader keeps track of the set of "in sync" replicas, which is known as the ISR. If either of these conditions fail to be satisfied, then the broker will be removed from the ISR. For example, if a follower dies, then the controller will notice the failure through the loss of its session, and will remove the broker from the ISR. On the other hand, if the follower lags too far behind the leader but still has an active session, then the leader can also remove it from the ISR. The determination of lagging replicas is controlled through the `replica.lag.time.max.ms` configuration. Replicas that cannot catch up to the end of the log on the leader within the max time set by this configuration are removed from the ISR.
+
+In distributed systems terminology we only attempt to handle a "fail/recover" model of failures where nodes suddenly cease working and then later recover (perhaps without knowing that they have died). Kafka does not handle so-called "Byzantine" failures in which nodes produce arbitrary or malicious responses (perhaps due to bugs or foul play).
+
+Only committed messages are ever given out to the consumer. This means that the consumer need not worry about potentially seeing a message that could be lost if the leader fails. Producers, on the other hand, have the option of either waiting for the message to be committed or not, depending on their preference for tradeoff between latency and durability. This preference is controlled by the `acks` setting that the producer uses. Note that topics have a setting for the minimum number of in-sync replicas (`min.insync.replicas`) that is checked when the producer requests acknowledgment that a message has been written to the full set of in-sync replicas. If a less stringent acknowledgment is requested by the producer, then the message is committed asynchronously across the set of in-sync replicas if `acks=0`, or synchronously only on the leader if `acks=1`. Regardless of the `acks` setting, the messages will not be visible to the consumers until all the following conditions are met:
+
+ 1. The messages are replicated to all the in-sync replicas.
+ 2. The number of the in-sync replicas is no less than the `min.insync.replicas` setting.
+
+
+
+The guarantee that Kafka offers is that a committed message will not be lost, as long as there is at least one in sync replica alive, at all times.
+
+Kafka will remain available in the presence of node failures after a short fail-over period, but may not remain available in the presence of network partitions.
+
+## Replicated Logs: Quorums, ISRs, and State Machines (Oh my!)
+
+At its heart a Kafka partition is a replicated log. The replicated log is one of the most basic primitives in distributed data systems, and there are many approaches for implementing one. A replicated log can be used by other systems as a primitive for implementing other distributed systems in the [state-machine style](https://en.wikipedia.org/wiki/State_machine_replication).
+
+A replicated log models the process of coming into consensus on the order of a series of values (generally numbering the log entries 0, 1, 2, ...). There are many ways to implement this, but the simplest and fastest is with a leader who chooses the ordering of values provided to it. As long as the leader remains alive, all followers need to only copy the values and ordering the leader chooses.
+
+Of course if leaders didn't fail we wouldn't need followers! When the leader does die we need to choose a new leader from among the followers. But followers themselves may fall behind or crash so we must ensure we choose an up-to-date follower. The fundamental guarantee a log replication algorithm must provide is that if we tell the client a message is committed, and the leader fails, the new leader we elect must also have that message. This yields a tradeoff: if the leader waits for more followers to acknowledge a message before declaring it committed then there will be more potentially electable leaders.
+
+If you choose the number of acknowledgements required and the number of logs that must be compared to elect a leader such that there is guaranteed to be an overlap, then this is called a Quorum.
+
+A common approach to this tradeoff is to use a majority vote for both the commit decision and the leader election. This is not what Kafka does, but let's explore it anyway to understand the tradeoffs. Let's say we have 2 _f_ +1 replicas. If _f_ +1 replicas must receive a message prior to a commit being declared by the leader, and if we elect a new leader by electing the follower with the most complete log from at least _f_ +1 replicas, then, with no more than _f_ failures, the leader is guaranteed to have all committed messages. This is because among any _f_ +1 replicas, there must be at least one replica that contains all committed messages. That replica's log will be the most complete and therefore will be selected as the new leader. There are many remaining details that each algorithm must handle (such as precisely defined what makes a log more complete, ensuring log consistency during leader failure or changing the set of servers in the replica set) but we will ignore these for now.
+
+This majority vote approach has a very nice property: the latency is dependent on only the fastest servers. That is, if the replication factor is three, the latency is determined by the faster follower not the slower one.
+
+There are a rich variety of algorithms in this family including ZooKeeper's [Zab](https://web.archive.org/web/20140602093727/https://www.stanford.edu/class/cs347/reading/zab.pdf), [Raft](https://www.usenix.org/system/files/conference/atc14/atc14-paper-ongaro.pdf), and [Viewstamped Replication](https://pmg.csail.mit.edu/papers/vr-revisited.pdf). The most similar academic publication we are aware of to Kafka's actual implementation is [PacificA](https://research.microsoft.com/apps/pubs/default.aspx?id=66814) from Microsoft.
+
+The downside of majority vote is that it doesn't take many failures to leave you with no electable leaders. To tolerate one failure requires three copies of the data, and to tolerate two failures requires five copies of the data. In our experience having only enough redundancy to tolerate a single failure is not enough for a practical system, but doing every write five times, with 5x the disk space requirements and 1/5th the throughput, is not very practical for large volume data problems. This is likely why quorum algorithms more commonly appear for shared cluster configuration such as ZooKeeper but are less common for primary data storage. For example in HDFS the namenode's high-availability feature is built on a [majority-vote-based journal](https://blog.cloudera.com/blog/2012/10/quorum-based-journaling-in-cdh4-1), but this more expensive approach is not used for the data itself.
+
+Kafka takes a slightly different approach to choosing its quorum set. Instead of majority vote, Kafka dynamically maintains a set of in-sync replicas (ISR) that are caught-up to the leader. Only members of this set are eligible for election as leader. A write to a Kafka partition is not considered committed until _all_ in-sync replicas have received the write. This ISR set is persisted in the cluster metadata whenever it changes. Because of this, any replica in the ISR is eligible to be elected leader. This is an important factor for Kafka's usage model where there are many partitions and ensuring leadership balance is important. With this ISR model and _f+1_ replicas, a Kafka topic can tolerate _f_ failures without losing committed messages.
+
+For most use cases we hope to handle, we think this tradeoff is a reasonable one. In practice, to tolerate _f_ failures, both the majority vote and the ISR approach will wait for the same number of replicas to acknowledge before committing a message (e.g. to survive one failure a majority quorum needs three replicas and one acknowledgement and the ISR approach requires two replicas and one acknowledgement). The ability to commit without the slowest servers is an advantage of the majority vote approach. However, we think it is ameliorated by allowing the client to choose whether they block on the message commit or not, and the additional throughput and disk space due to the lower required replication factor is worth it.
+
+Another important design distinction is that Kafka does not require that crashed nodes recover with all their data intact. It is not uncommon for replication algorithms in this space to depend on the existence of "stable storage" that cannot be lost in any failure-recovery scenario without potential consistency violations. There are two primary problems with this assumption. First, disk errors are the most common problem we observe in real operation of persistent data systems and they often do not leave data intact. Secondly, even if this were not a problem, we do not want to require the use of fsync on every write for our consistency guarantees as this can reduce performance by two to three orders of magnitude. Our protocol for allowing a replica to rejoin the ISR ensures that before rejoining, it must fully re-sync again even if it lost unflushed data in its crash.
+
+## Unclean leader election: What if they all die?
+
+Note that Kafka's guarantee with respect to data loss is predicated on at least one replica remaining in sync. If all the nodes replicating a partition die, this guarantee no longer holds.
+
+However a practical system needs to do something reasonable when all the replicas die. If you are unlucky enough to have this occur, it is important to consider what will happen. There are two behaviors that could be implemented:
+
+ 1. Wait for a replica in the ISR to come back to life and choose this replica as the leader (hopefully it still has all its data).
+ 2. Choose the first replica (not necessarily in the ISR) that comes back to life as the leader.
+
+
+This is a simple tradeoff between availability and consistency. If we wait for replicas in the ISR, then we will remain unavailable as long as those replicas are down. If such replicas were destroyed or their data was lost, then we are permanently down. If, on the other hand, a non-in-sync replica comes back to life and we allow it to become leader, then its log becomes the source of truth even though it is not guaranteed to have every committed message. By default from version 0.11.0.0, Kafka chooses the first strategy and favor waiting for a consistent replica. This behavior can be changed using configuration property `unclean.leader.election.enable`, to support use cases where uptime is preferable to consistency.
+
+This dilemma is not specific to Kafka. It exists in any quorum-based scheme. For example in a majority voting scheme, if a majority of servers suffer a permanent failure, then you must either choose to lose 100% of your data or violate consistency by taking what remains on an existing server as your new source of truth.
+
+## Availability and Durability Guarantees
+
+When writing to Kafka, producers can choose whether they wait for the message to be acknowledged by 0,1 or all (-1) replicas. Note that "acknowledgement by all replicas" does not guarantee that the full set of assigned replicas have received the message. By default, when acks=all, acknowledgement happens as soon as all the current in-sync replicas have received the message. For example, if a topic is configured with only two replicas and one fails (i.e., only one in sync replica remains), then writes that specify acks=all will succeed. However, these writes could be lost if the remaining replica also fails. Although this ensures maximum availability of the partition, this behavior may be undesirable to some users who prefer durability over availability. Therefore, we provide two topic configurations that can be used to prefer message durability over availability:
+
+ 1. Disable unclean leader election - if all replicas become unavailable, then the partition will remain unavailable until the most recent leader becomes available again. This effectively prefers unavailability over the risk of message loss. See the previous section on Unclean Leader Election for clarification.
+ 2. Specify a minimum ISR size - the partition will only accept writes if the size of the ISR is above a certain minimum, in order to prevent the loss of messages that were written to just a single replica, which subsequently becomes unavailable. This setting only takes effect if the producer uses acks=all and guarantees that the message will be acknowledged by at least this many in-sync replicas. This setting offers a trade-off between consistency and availability. A higher setting for minimum ISR size guarantees better consistency since the message is guaranteed to be written to more replicas which reduces the probability that it will be lost. However, it reduces availability since the partition will be unavailable for writes if the number of in-sync replicas drops below the minimum threshold.
+
+
+
+## Replica Management
+
+The above discussion on replicated logs really covers only a single log, i.e. one topic partition. However a Kafka cluster will manage hundreds or thousands of these partitions. We attempt to balance partitions within a cluster in a round-robin fashion to avoid clustering all partitions for high-volume topics on a small number of nodes. Likewise we try to balance leadership so that each node is the leader for a proportional share of its partitions.
+
+It is also important to optimize the leadership election process as that is the critical window of unavailability. A naive implementation of leader election would end up running an election per partition for all partitions a node hosted when that node failed. As discussed above in the section on replication, Kafka clusters have a special role known as the "controller" which is responsible for managing the registration of brokers. If the controller detects the failure of a broker, it is responsible for electing one of the remaining members of the ISR to serve as the new leader. The result is that we are able to batch together many of the required leadership change notifications which makes the election process far cheaper and faster for a large number of partitions. If the controller itself fails, then another controller will be elected.
+
+# Log Compaction
+
+Log compaction ensures that Kafka will always retain at least the last known value for each message key within the log of data for a single topic partition. It addresses use cases and scenarios such as restoring state after application crashes or system failure, or reloading caches after application restarts during operational maintenance. Let's dive into these use cases in more detail and then describe how compaction works.
+
+So far we have described only the simpler approach to data retention where old log data is discarded after a fixed period of time or when the log reaches some predetermined size. This works well for temporal event data such as logging where each record stands alone. However an important class of data streams are the log of changes to keyed, mutable data (for example, the changes to a database table).
+
+Let's discuss a concrete example of such a stream. Say we have a topic containing user email addresses; every time a user updates their email address we send a message to this topic using their user id as the primary key. Now say we send the following messages over some time period for a user with id 123, each message corresponding to a change in email address (messages for other ids are omitted):
+
+
+ 123 => bill@microsoft.com
+ .
+ .
+ .
+ 123 => bill@gatesfoundation.org
+ .
+ .
+ .
+ 123 => bill@gmail.com
+
+Log compaction gives us a more granular retention mechanism so that we are guaranteed to retain at least the last update for each primary key (e.g. `bill@gmail.com`). By doing this we guarantee that the log contains a full snapshot of the final value for every key not just keys that changed recently. This means downstream consumers can restore their own state off this topic without us having to retain a complete log of all changes.
+
+Let's start by looking at a few use cases where this is useful, then we'll see how it can be used.
+
+ 1. _Database change subscription_. It is often necessary to have a data set in multiple data systems, and often one of these systems is a database of some kind (either a RDBMS or perhaps a new-fangled key-value store). For example you might have a database, a cache, a search cluster, and a Hadoop cluster. Each change to the database will need to be reflected in the cache, the search cluster, and eventually in Hadoop. In the case that one is only handling the real-time updates you only need recent log. But if you want to be able to reload the cache or restore a failed search node you may need a complete data set.
+ 2. _Event sourcing_. This is a style of application design which co-locates query processing with application design and uses a log of changes as the primary store for the application.
+ 3. _Journaling for high-availability_. A process that does local computation can be made fault-tolerant by logging out changes that it makes to its local state so another process can reload these changes and carry on if it should fail. A concrete example of this is handling counts, aggregations, and other "group by"-like processing in a stream query system. Samza, a real-time stream-processing framework, [uses this feature](https://samza.apache.org/learn/0.7.0/container/state-management.html) for exactly this purpose.
+In each of these cases one needs primarily to handle the real-time feed of changes, but occasionally, when a machine crashes or data needs to be re-loaded or re-processed, one needs to do a full load. Log compaction allows feeding both of these use cases off the same backing topic. This style of usage of a log is described in more detail in [this blog post](https://engineering.linkedin.com/distributed-systems/log-what-every-software-engineer-should-know-about-real-time-datas-unifying).
+
+The general idea is quite simple. If we had infinite log retention, and we logged each change in the above cases, then we would have captured the state of the system at each time from when it first began. Using this complete log, we could restore to any point in time by replaying the first N records in the log. This hypothetical complete log is not very practical for systems that update a single record many times as the log will grow without bound even for a stable dataset. The simple log retention mechanism which throws away old updates will bound space but the log is no longer a way to restore the current state--now restoring from the beginning of the log no longer recreates the current state as old updates may not be captured at all.
+
+Log compaction is a mechanism to give finer-grained per-record retention, rather than the coarser-grained time-based retention. The idea is to selectively remove records where we have a more recent update with the same primary key. This way the log is guaranteed to have at least the last state for each key.
+
+This retention policy can be set per-topic, so a single cluster can have some topics where retention is enforced by size or time and other topics where retention is enforced by compaction.
+
+This functionality is inspired by one of LinkedIn's oldest and most successful pieces of infrastructure--a database changelog caching service called [Databus](https://github.com/linkedin/databus). Unlike most log-structured storage systems Kafka is built for subscription and organizes data for fast linear reads and writes. Unlike Databus, Kafka acts as a source-of-truth store so it is useful even in situations where the upstream data source would not otherwise be replayable.
+
+## Log Compaction Basics
+
+Here is a high-level picture that shows the logical structure of a Kafka log with the offset for each message.
+
+
+
+The head of the log is identical to a traditional Kafka log. It has dense, sequential offsets and retains all messages. Log compaction adds an option for handling the tail of the log. The picture above shows a log with a compacted tail. Note that the messages in the tail of the log retain the original offset assigned when they were first written--that never changes. Note also that all offsets remain valid positions in the log, even if the message with that offset has been compacted away; in this case this position is indistinguishable from the next highest offset that does appear in the log. For example, in the picture above the offsets 36, 37, and 38 are all equivalent positions and a read beginning at any of these offsets would return a message set beginning with 38.
+
+Compaction also allows for deletes. A message with a key and a null payload will be treated as a delete from the log. Such a record is sometimes referred to as a _tombstone_. This delete marker will cause any prior message with that key to be removed (as would any new message with that key), but delete markers are special in that they will themselves be cleaned out of the log after a period of time to free up space. The point in time at which deletes are no longer retained is marked as the "delete retention point" in the above diagram.
+
+The compaction is done in the background by periodically recopying log segments. Cleaning does not block reads and can be throttled to use no more than a configurable amount of I/O throughput to avoid impacting producers and consumers. The actual process of compacting a log segment looks something like this:
+
+
+
+## What guarantees does log compaction provide?
+
+Log compaction guarantees the following:
+
+ 1. Any consumer that stays caught-up to within the head of the log will see every message that is written; these messages will have sequential offsets. The topic's `min.compaction.lag.ms` can be used to guarantee the minimum length of time must pass after a message is written before it could be compacted. I.e. it provides a lower bound on how long each message will remain in the (uncompacted) head. The topic's `max.compaction.lag.ms` can be used to guarantee the maximum delay between the time a message is written and the time the message becomes eligible for compaction.
+ 2. Ordering of messages is always maintained. Compaction will never re-order messages, just remove some.
+ 3. The offset for a message never changes. It is the permanent identifier for a position in the log.
+ 4. Any consumer progressing from the start of the log will see at least the final state of all records in the order they were written. Additionally, all delete markers for deleted records will be seen, provided the consumer reaches the head of the log in a time period less than the topic's `delete.retention.ms` setting (the default is 24 hours). In other words: since the removal of delete markers happens concurrently with reads, it is possible for a consumer to miss delete markers if it lags by more than `delete.retention.ms`.
+
+
+## Log Compaction Details
+
+Log compaction is handled by the log cleaner, a pool of background threads that recopy log segment files, removing records whose key appears in the head of the log. Each compactor thread works as follows:
+
+ 1. It chooses the log that has the highest ratio of log head to log tail
+ 2. It creates a succinct summary of the last offset for each key in the head of the log
+ 3. It recopies the log from beginning to end removing keys which have a later occurrence in the log. New, clean segments are swapped into the log immediately so the additional disk space required is just one additional log segment (not a fully copy of the log).
+ 4. The summary of the log head is essentially just a space-compact hash table. It uses exactly 24 bytes per entry. As a result with 8GB of cleaner buffer one cleaner iteration can clean around 366GB of log head (assuming 1k messages).
+
+
+## Configuring The Log Cleaner
+
+The log cleaner is enabled by default. This will start the pool of cleaner threads. To enable log cleaning on a particular topic, add the log-specific property
+
+
+ log.cleanup.policy=compact
+
+The `log.cleanup.policy` property is a broker configuration setting defined in the broker's `server.properties` file; it affects all of the topics in the cluster that do not have a configuration override in place as documented [here](/documentation.html#brokerconfigs). The log cleaner can be configured to retain a minimum amount of the uncompacted "head" of the log. This is enabled by setting the compaction time lag.
+
+
+ log.cleaner.min.compaction.lag.ms
+
+This can be used to prevent messages newer than a minimum message age from being subject to compaction. If not set, all log segments are eligible for compaction except for the last segment, i.e. the one currently being written to. The active segment will not be compacted even if all of its messages are older than the minimum compaction time lag. The log cleaner can be configured to ensure a maximum delay after which the uncompacted "head" of the log becomes eligible for log compaction.
+
+
+ log.cleaner.max.compaction.lag.ms
+
+This can be used to prevent log with low produce rate from remaining ineligible for compaction for an unbounded duration. If not set, logs that do not exceed min.cleanable.dirty.ratio are not compacted. Note that this compaction deadline is not a hard guarantee since it is still subjected to the availability of log cleaner threads and the actual compaction time. You will want to monitor the uncleanable-partitions-count, max-clean-time-secs and max-compaction-delay-secs metrics.
+
+Further cleaner configurations are described [here](/documentation.html#brokerconfigs).
+
+# Quotas
+
+Kafka cluster has the ability to enforce quotas on requests to control the broker resources used by clients. Two types of client quotas can be enforced by Kafka brokers for each group of clients sharing a quota:
+
+ 1. Network bandwidth quotas define byte-rate thresholds (since 0.9)
+ 2. Request rate quotas define CPU utilization thresholds as a percentage of network and I/O threads (since 0.11)
+
+
+
+## Why are quotas necessary?
+
+It is possible for producers and consumers to produce/consume very high volumes of data or generate requests at a very high rate and thus monopolize broker resources, cause network saturation and generally DOS other clients and the brokers themselves. Having quotas protects against these issues and is all the more important in large multi-tenant clusters where a small set of badly behaved clients can degrade user experience for the well behaved ones. In fact, when running Kafka as a service this even makes it possible to enforce API limits according to an agreed upon contract.
+
+## Client groups
+
+The identity of Kafka clients is the user principal which represents an authenticated user in a secure cluster. In a cluster that supports unauthenticated clients, user principal is a grouping of unauthenticated users chosen by the broker using a configurable `PrincipalBuilder`. Client-id is a logical grouping of clients with a meaningful name chosen by the client application. The tuple (user, client-id) defines a secure logical group of clients that share both user principal and client-id.
+
+Quotas can be applied to (user, client-id), user or client-id groups. For a given connection, the most specific quota matching the connection is applied. All connections of a quota group share the quota configured for the group. For example, if (user="test-user", client-id="test-client") has a produce quota of 10MB/sec, this is shared across all producer instances of user "test-user" with the client-id "test-client".
+
+## Quota Configuration
+
+Quota configuration may be defined for (user, client-id), user and client-id groups. It is possible to override the default quota at any of the quota levels that needs a higher (or even lower) quota. The mechanism is similar to the per-topic log config overrides. User and (user, client-id) quota overrides are written to the metadata log. These overrides are read by all brokers and are effective immediately. This lets us change quotas without having to do a rolling restart of the entire cluster. See here for details. Default quotas for each group may also be updated dynamically using the same mechanism.
+
+The order of precedence for quota configuration is:
+
+ 1. matching user and client-id quotas
+ 2. matching user and default client-id quotas
+ 3. matching user quota
+ 4. default user and matching client-id quotas
+ 5. default user and default client-id quotas
+ 6. default user quota
+ 7. matching client-id quota
+ 8. default client-id quota
+
+
+
+## Network Bandwidth Quotas
+
+Network bandwidth quotas are defined as the byte rate threshold for each group of clients sharing a quota. By default, each unique client group receives a fixed quota in bytes/sec as configured by the cluster. This quota is defined on a per-broker basis. Each group of clients can publish/fetch a maximum of X bytes/sec per broker before clients are throttled.
+
+## Request Rate Quotas
+
+Request rate quotas are defined as the percentage of time a client can utilize on request handler I/O threads and network threads of each broker within a quota window. A quota of `n%` represents `n%` of one thread, so the quota is out of a total capacity of `((num.io.threads + num.network.threads) * 100)%`. Each group of clients may use a total percentage of upto `n%` across all I/O and network threads in a quota window before being throttled. Since the number of threads allocated for I/O and network threads are typically based on the number of cores available on the broker host, request rate quotas represent the total percentage of CPU that may be used by each group of clients sharing the quota.
+
+## Enforcement
+
+By default, each unique client group receives a fixed quota as configured by the cluster. This quota is defined on a per-broker basis. Each client can utilize this quota per broker before it gets throttled. We decided that defining these quotas per broker is much better than having a fixed cluster wide bandwidth per client because that would require a mechanism to share client quota usage among all the brokers. This can be harder to get right than the quota implementation itself!
+
+How does a broker react when it detects a quota violation? In our solution, the broker first computes the amount of delay needed to bring the violating client under its quota and returns a response with the delay immediately. In case of a fetch request, the response will not contain any data. Then, the broker mutes the channel to the client, not to process requests from the client anymore, until the delay is over. Upon receiving a response with a non-zero delay duration, the Kafka client will also refrain from sending further requests to the broker during the delay. Therefore, requests from a throttled client are effectively blocked from both sides. Even with older client implementations that do not respect the delay response from the broker, the back pressure applied by the broker via muting its socket channel can still handle the throttling of badly behaving clients. Those clients who sent further requests to the throttled channel will receive responses only after the delay is over.
+
+Byte-rate and thread utilization are measured over multiple small windows (e.g. 30 windows of 1 second each) in order to detect and correct quota violations quickly. Typically, having large measurement windows (for e.g. 10 windows of 30 seconds each) leads to large bursts of traffic followed by long delays which is not great in terms of user experience.
diff --git a/content/en/41/design/protocol.md b/content/en/41/design/protocol.md
new file mode 100644
index 000000000..f28db3b64
--- /dev/null
+++ b/content/en/41/design/protocol.md
@@ -0,0 +1,203 @@
+---
+title: Protocol
+description:
+weight: 2
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Kafka protocol guide
+
+This document covers the wire protocol implemented in Kafka. It is meant to give a readable guide to the protocol that covers the available requests, their binary format, and the proper way to make use of them to implement a client. This document assumes you understand the basic design and terminology described [here](https://kafka.apache.org/documentation.html#design)
+
+ * Preliminaries
+ * Network
+ * Partitioning and bootstrapping
+ * Partitioning Strategies
+ * Batching
+ * Versioning and Compatibility
+ * Retrieving Supported API versions
+ * SASL Authentication Sequence
+ * The Protocol
+ * Protocol Primitive Types
+ * Notes on reading the request format grammars
+ * Common Request and Response Structure
+ * Request and Response Headers
+ * Record Batch
+ * Constants
+ * Error Codes
+ * Api Keys
+ * The Messages
+ * Some Common Philosophical Questions
+
+
+
+## Preliminaries
+
+### Network
+
+Kafka uses a binary protocol over TCP. The protocol defines all APIs as request response message pairs. All messages are size delimited and are made up of the following primitive types.
+
+The client initiates a socket connection and then writes a sequence of request messages and reads back the corresponding response message. No handshake is required on connection or disconnection. TCP is happier if you maintain persistent connections used for many requests to amortize the cost of the TCP handshake, but beyond this penalty connecting is pretty cheap.
+
+The client will likely need to maintain a connection to multiple brokers, as data is partitioned and the clients will need to talk to the server that has their data. However it should not generally be necessary to maintain multiple connections to a single broker from a single client instance (i.e. connection pooling).
+
+The server guarantees that on a single TCP connection, requests will be processed in the order they are sent and responses will return in that order as well. The broker's request processing allows only a single in-flight request per connection in order to guarantee this ordering. Note that clients can (and ideally should) use non-blocking IO to implement request pipelining and achieve higher throughput. i.e., clients can send requests even while awaiting responses for preceding requests since the outstanding requests will be buffered in the underlying OS socket buffer. All requests are initiated by the client, and result in a corresponding response message from the server except where noted.
+
+The server has a configurable maximum limit on request size and any request that exceeds this limit will result in the socket being disconnected.
+
+### Partitioning and bootstrapping
+
+Kafka is a partitioned system so not all servers have the complete data set. Instead recall that topics are split into a pre-defined number of partitions, P, and each partition is replicated with some replication factor, N. Topic partitions themselves are just ordered "commit logs" numbered 0, 1, ..., P-1.
+
+All systems of this nature have the question of how a particular piece of data is assigned to a particular partition. Kafka clients directly control this assignment, the brokers themselves enforce no particular semantics of which messages should be published to a particular partition. Rather, to publish messages the client directly addresses messages to a particular partition, and when fetching messages, fetches from a particular partition. If two clients want to use the same partitioning scheme they must use the same method to compute the mapping of key to partition.
+
+These requests to publish or fetch data must be sent to the broker that is currently acting as the leader for a given partition. This condition is enforced by the broker, so a request for a particular partition to the wrong broker will result in an the NotLeaderForPartition error code (described below).
+
+How can the client find out which topics exist, what partitions they have, and which brokers currently host those partitions so that it can direct its requests to the right hosts? This information is dynamic, so you can't just configure each client with some static mapping file. Instead all Kafka brokers can answer a metadata request that describes the current state of the cluster: what topics there are, which partitions those topics have, which broker is the leader for those partitions, and the host and port information for these brokers.
+
+In other words, the client needs to somehow find one broker and that broker will tell the client about all the other brokers that exist and what partitions they host. This first broker may itself go down so the best practice for a client implementation is to take a list of two or three URLs to bootstrap from. The user can then choose to use a load balancer or just statically configure two or three of their Kafka hosts in the clients.
+
+The client does not need to keep polling to see if the cluster has changed; it can fetch metadata once when it is instantiated cache that metadata until it receives an error indicating that the metadata is out of date. This error can come in two forms: (1) a socket error indicating the client cannot communicate with a particular broker, (2) an error code in the response to a request indicating that this broker no longer hosts the partition for which data was requested.
+
+ 1. Cycle through a list of "bootstrap" Kafka URLs until we find one we can connect to. Fetch cluster metadata.
+ 2. Process fetch or produce requests, directing them to the appropriate broker based on the topic/partitions they send to or fetch from.
+ 3. If we get an appropriate error, refresh the metadata and try again.
+
+
+
+### Partitioning Strategies
+
+As mentioned above the assignment of messages to partitions is something the producing client controls. That said, how should this functionality be exposed to the end-user?
+
+Partitioning really serves two purposes in Kafka:
+
+ 1. It balances data and request load over brokers
+ 2. It serves as a way to divvy up processing among consumer processes while allowing local state and preserving order within the partition. We call this semantic partitioning.
+
+
+
+For a given use case you may care about only one of these or both.
+
+To accomplish simple load balancing a simple approach would be for the client to just round robin requests over all brokers. Another alternative, in an environment where there are many more producers than brokers, would be to have each client chose a single partition at random and publish to that. This later strategy will result in far fewer TCP connections.
+
+Semantic partitioning means using some key in the message to assign messages to partitions. For example if you were processing a click message stream you might want to partition the stream by the user id so that all data for a particular user would go to a single consumer. To accomplish this the client can take a key associated with the message and use some hash of this key to choose the partition to which to deliver the message.
+
+### Batching
+
+Our APIs encourage batching small things together for efficiency. We have found this is a very significant performance win. Both our API to send messages and our API to fetch messages always work with a sequence of messages not a single message to encourage this. A clever client can make use of this and support an "asynchronous" mode in which it batches together messages sent individually and sends them in larger clumps. We go even further with this and allow the batching across multiple topics and partitions, so a produce request may contain data to append to many partitions and a fetch request may pull data from many partitions all at once.
+
+The client implementer can choose to ignore this and send everything one at a time if they like.
+
+### Compatibility
+
+Kafka has a "bidirectional" client compatibility policy. In other words, new clients can talk to old servers, and old clients can talk to new servers. This allows users to upgrade either clients or servers without experiencing any downtime.
+
+Since the Kafka protocol has changed over time, clients and servers need to agree on the schema of the message that they are sending over the wire. This is done through API versioning.
+
+Before each request is sent, the client sends the API key and the API version. These two 16-bit numbers, when taken together, uniquely identify the schema of the message to follow.
+
+The intention is that clients will support a range of API versions. When communicating with a particular broker, a given client should use the highest API version supported by both and indicate this version in their requests.
+
+The server will reject requests with a version it does not support, and will always respond to the client with exactly the protocol format it expects based on the version it included in its request. The intended upgrade path is that new features would first be rolled out on the server (with the older clients not making use of them) and then as newer clients are deployed these new features would gradually be taken advantage of. Note there is an exceptional case while retrieving supported API versions where the server can respond with a different version.
+
+Note that [KIP-482 tagged fields](https://cwiki.apache.org/confluence/x/OhMyBw) can be added to a request without incrementing the version number. This offers an additional way of evolving the message schema without breaking compatibility. Tagged fields do not take up any space when the field is not set. Therefore, if a field is rarely used, it is more efficient to make it a tagged field than to put it in the mandatory schema. However, tagged fields are ignored by recipients that don't know about them, which could pose a challenge if this is not the behavior that the sender wants. In such cases, a version bump may be more appropriate.
+
+### Retrieving Supported API versions
+
+In order to work against multiple broker versions, clients need to know what versions of various APIs a broker supports. The broker exposes this information since 0.10.0.0 as described in [KIP-35](https://cwiki.apache.org/confluence/x/KK6nAw). Clients should use the supported API versions information to choose the highest API version supported by both client and broker. If no such version exists, an error should be reported to the user.
+
+The following sequence may be used by a client to obtain supported API versions from a broker.
+
+ 1. Client sends `ApiVersionsRequest` to a broker after connection has been established with the broker. If SSL is enabled, this happens after SSL connection has been established.
+ 2. On receiving `ApiVersionsRequest`, a broker returns its full list of supported ApiKeys and versions regardless of current authentication state (e.g., before SASL authentication on an SASL listener, do note that no Kafka protocol requests may take place on an SSL listener before the SSL handshake is finished). If this is considered to leak information about the broker version a workaround is to use SSL with client authentication which is performed at an earlier stage of the connection where the `ApiVersionRequest` is not available. Also, note that broker versions older than 0.10.0.0 do not support this API and will either ignore the request or close connection in response to the request. Also note that if the client `ApiVersionsRequest` version is unsupported by the broker (client is ahead), and the broker version is 2.4.0 or greater, then the broker will respond with a version 0 ApiVersionsResponse with the error code set to `UNSUPPORTED_VERSION` and the `api_versions` field populated with the supported version of the `ApiVersionsRequest`. It is then up to the client to retry, making another `ApiVersionsRequest` using the highest version supported by the client and broker. See [KIP-511: Collect and Expose Client's Name and Version in the Brokers](https://cwiki.apache.org/confluence/x/qRJ4Bw)
+ 3. If multiple versions of an API are supported by broker and client, clients are recommended to use the latest version supported by the broker and itself.
+ 4. Deprecation of a protocol version is done by marking an API version as deprecated in the protocol documentation.
+ 5. Supported API versions obtained from a broker are only valid for the connection on which that information is obtained. In the event of disconnection, the client should obtain the information from the broker again, as the broker might have been upgraded/downgraded in the mean time.
+
+
+
+### SASL Authentication Sequence
+
+The following sequence is used for SASL authentication:
+
+ 1. Kafka `ApiVersionsRequest` may be sent by the client to obtain the version ranges of requests supported by the broker. This is optional.
+ 2. Kafka `SaslHandshakeRequest` containing the SASL mechanism for authentication is sent by the client. If the requested mechanism is not enabled in the server, the server responds with the list of supported mechanisms and closes the client connection. If the mechanism is enabled in the server, the server sends a successful response and continues with SASL authentication.
+ 3. The actual SASL authentication is now performed. If `SaslHandshakeRequest` version is v0, a series of SASL client and server tokens corresponding to the mechanism are sent as opaque packets without wrapping the messages with Kafka protocol headers. If `SaslHandshakeRequest` version is v1, the `SaslAuthenticate` request/response are used, where the actual SASL tokens are wrapped in the Kafka protocol. The error code in the final message from the broker will indicate if authentication succeeded or failed.
+ 4. If authentication succeeds, subsequent packets are handled as Kafka API requests. Otherwise, the client connection is closed.
+
+
+
+For interoperability with 0.9.0.x clients, the first packet received by the server is handled as a SASL/GSSAPI client token if it is not a valid Kafka request. SASL/GSSAPI authentication is performed starting with this packet, skipping the first two steps above.
+
+## The Protocol
+
+### Protocol Primitive Types
+
+The protocol is built out of the following primitive types.
+
+{{< include-html file="/static/41/generated/protocol_types.html" >}}
+
+### Notes on reading the request format grammars
+
+The [BNF](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form)s below give an exact context free grammar for the request and response binary format. The BNF is intentionally not compact in order to give human-readable name. As always in a BNF a sequence of productions indicates concatenation. When there are multiple possible productions these are separated with '|' and may be enclosed in parenthesis for grouping. The top-level definition is always given first and subsequent sub-parts are indented.
+
+### Common Request and Response Structure
+
+All requests and responses originate from the following grammar which will be incrementally describe through the rest of this document:
+
+
+ RequestOrResponse => Size (RequestMessage | ResponseMessage)
+ Size => int32
+
+Field| Description
+---|---
+message_size| The message_size field gives the size of the subsequent request or response message in bytes. The client can read requests by first reading this 4 byte size as an integer N, and then reading and parsing the subsequent N bytes of the request.
+
+### Request and Response Headers
+
+Different request and response versions require different versions of the corresponding headers. These header versions are specified below together with API message descriptions.
+
+### Record Batch
+
+A description of the record batch format can be found [here](/#recordbatch).
+
+## Constants
+
+### Error Codes
+
+We use numeric codes to indicate what problem occurred on the server. These can be translated by the client into exceptions or whatever the appropriate error handling mechanism in the client language. Here is a table of the error codes currently in use:
+
+{{< include-html file="/static/41/generated/protocol_errors.html" >}}
+
+### Api Keys
+
+The following are the numeric codes that the stable ApiKey in the request can take for each of the below request types.
+
+{{< include-html file="/static/41/generated/protocol_api_keys.html" >}}
+
+## The Messages
+
+This section gives details on each of the individual API Messages, their usage, their binary format, and the meaning of their fields.
+
+The message consists of the header and body:
+
+
+ Message => RequestOrResponseHeader Body
+
+
+`RequestOrResponseHeader` is the versioned request or response header. `Body` is the message-specific body.
+
+{{< include-html file="/static/41/generated/protocol_messages.html" >}}
+
+## Some Common Philosophical Questions
+
+Some people have asked why we don't use HTTP. There are a number of reasons, the best is that client implementors can make use of some of the more advanced TCP features--the ability to multiplex requests, the ability to simultaneously poll many connections, etc. We have also found HTTP libraries in many languages to be surprisingly shabby.
+
+Others have asked if maybe we shouldn't support many different protocols. Prior experience with this was that it makes it very hard to add and test new features if they have to be ported across many protocol implementations. Our feeling is that most users don't really see multiple protocols as a feature, they just want a good reliable client in the language of their choice.
+
+Another question is why we don't adopt XMPP, STOMP, AMQP or an existing protocol. The answer to this varies by protocol, but in general the problem is that the protocol does determine large parts of the implementation and we couldn't do what we are doing if we didn't have control over the protocol. Our belief is that it is possible to do better than existing messaging systems have in providing a truly distributed messaging system, and to do this we need to build something that works differently.
+
+A final question is why we don't use a system like Protocol Buffers or Thrift to define our request messages. These packages excel at helping you to managing lots and lots of serialized messages. However we have only a few messages. Support across languages is somewhat spotty (depending on the package). Finally the mapping between binary log format and wire protocol is something we manage somewhat carefully and this would not be possible with these systems. Finally we prefer the style of versioning APIs explicitly and checking this to inferring new values as nulls as it allows more nuanced control of compatibility.
diff --git a/content/en/41/getting-started/_index.md b/content/en/41/getting-started/_index.md
new file mode 100644
index 000000000..c5eccc36a
--- /dev/null
+++ b/content/en/41/getting-started/_index.md
@@ -0,0 +1,10 @@
+---
+title: Getting Started
+description: This section provides an overview of what Kafka is, why it is useful, and how to get started using it.
+weight: 1
+tags: ['kafka', 'docs', 'getting-started']
+aliases:
+keywords:
+type: docs
+---
+
diff --git a/content/en/41/getting-started/compatibility.md b/content/en/41/getting-started/compatibility.md
new file mode 100644
index 000000000..4bc1417ad
--- /dev/null
+++ b/content/en/41/getting-started/compatibility.md
@@ -0,0 +1,56 @@
+---
+title: Compatibility
+description:
+weight: 7
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Compatibility
+
+With the release of Kafka 4.0, significant changes have been introduced that impact compatibility across various components. To assist users in planning upgrades and ensuring seamless interoperability, a comprehensive compatibility matrix has been prepared.
+
+# JDK Compatibility Across Kafka Versions
+
+Module | Kafka Version | Java 11 | Java 17 | Java 23
+---|---|---|---|---
+Clients | 4.0.0 | ✅ | ✅ | ✅
+Streams | 4.0.0 | ✅ | ✅ | ✅
+Connect | 4.0.0 | ❌ | ✅ | ✅
+Server | 4.0.0 | ❌ | ✅ | ✅
+
+**Note: Java 8 is removed in Kafka 4.0 and is no longer supported.**
+
+# Server Compatibility
+
+KRaft Cluster Version | Compatibility 4.0 Server (dynamic voter) | Compatibility 4.0 Server (static voter)
+---|---|---
+before 3.2.x | ❌ | ❌
+3.3.x | ❌ | ✅
+3.4.x | ❌ | ✅
+3.5.x | ❌ | ✅
+3.6.x | ❌ | ✅
+3.7.x | ❌ | ✅
+3.8.x | ❌ | ✅
+3.9.x | ✅ | ✅
+4.0.x | ✅ | ✅
+
+**Note: Can’t upgrade server from static voter to dynamic voter, see[KAFKA-16538](https://issues.apache.org/jira/browse/KAFKA-16538).**
+
+## Client/Broker Forward Compatibility
+
+Kafka Version | Module | Compatibility with Kafka 4.0 | Key Differences/Limitations
+---|---|---|---
+0.x, 1.x, 2.0 | Client | ❌ Not Compatible | Pre-0.10.x protocols are fully removed in Kafka 4.0 ([KIP-896](https://cwiki.apache.org/confluence/x/K5sODg)).
+Streams | ❌ Not Compatible | Pre-0.10.x protocols are fully removed in Kafka 4.0 ([KIP-896](https://cwiki.apache.org/confluence/x/K5sODg)).
+Connect | ❌ Not Compatible | Pre-0.10.x protocols are fully removed in Kafka 4.0 ([KIP-896](https://cwiki.apache.org/confluence/x/K5sODg)).
+2.1 ~ 2.8 | Client | ⚠️ Partially Compatible | More details in the [Consumer](/40/documentation.html#upgrade_400_notable_consumer), [Producer](/40/documentation.html#upgrade_400_notable_producer), and [Admin Client](/40/documentation.html#upgrade_400_notable_admin_client) section.
+Streams | ⚠️ Limited Compatibility | More details in the [Kafka Streams](/40/documentation.html#upgrade_400_notable_kafka_streams) section.
+Connect | ⚠️ Limited Compatibility | More details in the [Connect](/40/documentation.html#upgrade_400_notable_connect) section.
+3.x | Client | ✅ Fully Compatible |
+Streams | ✅ Fully Compatible |
+Connect | ✅ Fully Compatible |
+
+Note: Starting with Kafka 4.0, the `--zookeeper` option in AdminClient commands has been removed. Users must use the `--bootstrap-server` option to interact with the Kafka cluster. This change aligns with the transition to KRaft mode.
diff --git a/content/en/41/getting-started/docker.md b/content/en/41/getting-started/docker.md
new file mode 100644
index 000000000..50e96b527
--- /dev/null
+++ b/content/en/41/getting-started/docker.md
@@ -0,0 +1,52 @@
+---
+title: Docker
+description:
+weight: 8
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+## JVM Based Apache Kafka Docker Image
+
+[Docker](https://www.docker.com/) is a popular container runtime. Docker images for the JVM based Apache Kafka can be found on [Docker Hub](https://hub.docker.com/r/apache/kafka) and are available from version 3.7.0.
+
+Docker image can be pulled from Docker Hub using the following command:
+
+
+ $ docker pull apache/kafka:4.1.0
+
+If you want to fetch the latest version of the Docker image use following command:
+
+
+ $ docker pull apache/kafka:latest
+
+To start the Kafka container using this Docker image with default configs and on default port 9092:
+
+
+ $ docker run -p 9092:9092 apache/kafka:4.1.0
+
+## GraalVM Based Native Apache Kafka Docker Image
+
+Docker images for the GraalVM Based Native Apache Kafka can be found on [Docker Hub](https://hub.docker.com/r/apache/kafka-native) and are available from version 3.8.0.
+NOTE: This image is experimental and intended for local development and testing purposes only; it is not recommended for production use.
+
+Docker image can be pulled from Docker Hub using the following command:
+
+
+ $ docker pull apache/kafka-native:4.1.0
+
+If you want to fetch the latest version of the Docker image use following command:
+
+
+ $ docker pull apache/kafka-native:latest
+
+To start the Kafka container using this Docker image with default configs and on default port 9092:
+
+
+ $ docker run -p 9092:9092 apache/kafka-native:4.1.0
+
+## Usage guide
+
+Detailed instructions for using the Docker image are mentioned [here](https://github.com/apache/kafka/blob/trunk/docker/examples/README.md).
diff --git a/content/en/41/getting-started/ecosystem.md b/content/en/41/getting-started/ecosystem.md
new file mode 100644
index 000000000..5d916e268
--- /dev/null
+++ b/content/en/41/getting-started/ecosystem.md
@@ -0,0 +1,11 @@
+---
+title: Ecosystem
+description:
+weight: 4
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+There are a plethora of tools that integrate with Kafka outside the main distribution. The [ ecosystem page](https://cwiki.apache.org/confluence/x/Ri3VAQ) lists many of these, including stream processing systems, Hadoop integration, monitoring, and deployment tools.
diff --git a/content/en/41/getting-started/introduction.md b/content/en/41/getting-started/introduction.md
new file mode 100644
index 000000000..069e9fcb0
--- /dev/null
+++ b/content/en/41/getting-started/introduction.md
@@ -0,0 +1,92 @@
+---
+title: Introduction
+description:
+weight: 1
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+## What is event streaming?
+
+Event streaming is the digital equivalent of the human body's central nervous system. It is the technological foundation for the 'always-on' world where businesses are increasingly software-defined and automated, and where the user of software is more software.
+
+Technically speaking, event streaming is the practice of capturing data in real-time from event sources like databases, sensors, mobile devices, cloud services, and software applications in the form of streams of events; storing these event streams durably for later retrieval; manipulating, processing, and reacting to the event streams in real-time as well as retrospectively; and routing the event streams to different destination technologies as needed. Event streaming thus ensures a continuous flow and interpretation of data so that the right information is at the right place, at the right time.
+
+## What can I use event streaming for?
+
+Event streaming is applied to a [wide variety of use cases](/powered-by) across a plethora of industries and organizations. Its many examples include:
+
+ * To process payments and financial transactions in real-time, such as in stock exchanges, banks, and insurances.
+ * To track and monitor cars, trucks, fleets, and shipments in real-time, such as in logistics and the automotive industry.
+ * To continuously capture and analyze sensor data from IoT devices or other equipment, such as in factories and wind parks.
+ * To collect and immediately react to customer interactions and orders, such as in retail, the hotel and travel industry, and mobile applications.
+ * To monitor patients in hospital care and predict changes in condition to ensure timely treatment in emergencies.
+ * To connect, store, and make available data produced by different divisions of a company.
+ * To serve as the foundation for data platforms, event-driven architectures, and microservices.
+
+
+
+## Apache Kafka® is an event streaming platform. What does that mean?
+
+Kafka combines three key capabilities so you can implement [your use cases](/powered-by) for event streaming end-to-end with a single battle-tested solution:
+
+ 1. To **publish** (write) and **subscribe to** (read) streams of events, including continuous import/export of your data from other systems.
+ 2. To **store** streams of events durably and reliably for as long as you want.
+ 3. To **process** streams of events as they occur or retrospectively.
+
+
+
+And all this functionality is provided in a distributed, highly scalable, elastic, fault-tolerant, and secure manner. Kafka can be deployed on bare-metal hardware, virtual machines, and containers, and on-premises as well as in the cloud. You can choose between self-managing your Kafka environments and using fully managed services offered by a variety of vendors.
+
+## How does Kafka work in a nutshell?
+
+Kafka is a distributed system consisting of **servers** and **clients** that communicate via a high-performance [TCP network protocol](/protocol.html). It can be deployed on bare-metal hardware, virtual machines, and containers in on-premise as well as cloud environments.
+
+**Servers** : Kafka is run as a cluster of one or more servers that can span multiple datacenters or cloud regions. Some of these servers form the storage layer, called the brokers. Other servers run [Kafka Connect](/#connect) to continuously import and export data as event streams to integrate Kafka with your existing systems such as relational databases as well as other Kafka clusters. To let you implement mission-critical use cases, a Kafka cluster is highly scalable and fault-tolerant: if any of its servers fails, the other servers will take over their work to ensure continuous operations without any data loss.
+
+**Clients** : They allow you to write distributed applications and microservices that read, write, and process streams of events in parallel, at scale, and in a fault-tolerant manner even in the case of network problems or machine failures. Kafka ships with some such clients included, which are augmented by [dozens of clients](https://cwiki.apache.org/confluence/x/3gDVAQ) provided by the Kafka community: clients are available for Java and Scala including the higher-level [Kafka Streams](/streams/) library, for Go, Python, C/C++, and many other programming languages as well as REST APIs.
+
+## Main Concepts and Terminology
+
+An **event** records the fact that "something happened" in the world or in your business. It is also called record or message in the documentation. When you read or write data to Kafka, you do this in the form of events. Conceptually, an event has a key, value, timestamp, and optional metadata headers. Here's an example event:
+
+ * Event key: "Alice"
+ * Event value: "Made a payment of $200 to Bob"
+ * Event timestamp: "Jun. 25, 2020 at 2:06 p.m."
+
+
+
+**Producers** are those client applications that publish (write) events to Kafka, and **consumers** are those that subscribe to (read and process) these events. In Kafka, producers and consumers are fully decoupled and agnostic of each other, which is a key design element to achieve the high scalability that Kafka is known for. For example, producers never need to wait for consumers. Kafka provides various [guarantees](/#semantics) such as the ability to process events exactly-once.
+
+Events are organized and durably stored in **topics**. Very simplified, a topic is similar to a folder in a filesystem, and the events are the files in that folder. An example topic name could be "payments". Topics in Kafka are always multi-producer and multi-subscriber: a topic can have zero, one, or many producers that write events to it, as well as zero, one, or many consumers that subscribe to these events. Events in a topic can be read as often as needed—unlike traditional messaging systems, events are not deleted after consumption. Instead, you define for how long Kafka should retain your events through a per-topic configuration setting, after which old events will be discarded. Kafka's performance is effectively constant with respect to data size, so storing data for a long time is perfectly fine.
+
+Topics are **partitioned** , meaning a topic is spread over a number of "buckets" located on different Kafka brokers. This distributed placement of your data is very important for scalability because it allows client applications to both read and write the data from/to many brokers at the same time. When a new event is published to a topic, it is actually appended to one of the topic's partitions. Events with the same event key (e.g., a customer or vehicle ID) are written to the same partition, and Kafka [guarantees](/#semantics) that any consumer of a given topic-partition will always read that partition's events in exactly the same order as they were written.
+
+ Figure: This example topic has four partitions P1–P4. Two different producer clients are publishing, independently from each other, new events to the topic by writing events over the network to the topic's partitions. Events with the same key (denoted by their color in the figure) are written to the same partition. Note that both producers can write to the same partition if appropriate.
+
+To make your data fault-tolerant and highly-available, every topic can be **replicated** , even across geo-regions or datacenters, so that there are always multiple brokers that have a copy of the data just in case things go wrong, you want to do maintenance on the brokers, and so on. A common production setting is a replication factor of 3, i.e., there will always be three copies of your data. This replication is performed at the level of topic-partitions.
+
+This primer should be sufficient for an introduction. The [Design](/#design) section of the documentation explains Kafka's various concepts in full detail, if you are interested.
+
+## Kafka APIs
+
+In addition to command line tooling for management and administration tasks, Kafka has five core APIs for Java and Scala:
+
+ * The [Admin API](/documentation.html#adminapi) to manage and inspect topics, brokers, and other Kafka objects.
+ * The [Producer API](/documentation.html#producerapi) to publish (write) a stream of events to one or more Kafka topics.
+ * The [Consumer API](/documentation.html#consumerapi) to subscribe to (read) one or more topics and to process the stream of events produced to them.
+ * The [Kafka Streams API](/streams) to implement stream processing applications and microservices. It provides higher-level functions to process event streams, including transformations, stateful operations like aggregations and joins, windowing, processing based on event-time, and more. Input is read from one or more topics in order to generate output to one or more topics, effectively transforming the input streams to output streams.
+ * The [Kafka Connect API](/documentation.html#connect) to build and run reusable data import/export connectors that consume (read) or produce (write) streams of events from and to external systems and applications so they can integrate with Kafka. For example, a connector to a relational database like PostgreSQL might capture every change to a set of tables. However, in practice, you typically don't need to implement your own connectors because the Kafka community already provides hundreds of ready-to-use connectors.
+
+
+
+## Where to go from here
+
+ * To get hands-on experience with Kafka, follow the [Quickstart](/quickstart).
+ * To understand Kafka in more detail, read the [Documentation](/). You also have your choice of [Kafka books and academic papers](/books-and-papers).
+ * Browse through the [Use Cases](/powered-by) to learn how other users in our world-wide community are getting value out of Kafka.
+ * Join a [local Kafka meetup group](/events) and [watch talks from Kafka Summit](https://kafka-summit.org/past-events/), the main conference of the Kafka community.
+
+
diff --git a/content/en/41/getting-started/quickstart.md b/content/en/41/getting-started/quickstart.md
new file mode 100644
index 000000000..46319932b
--- /dev/null
+++ b/content/en/41/getting-started/quickstart.md
@@ -0,0 +1,210 @@
+---
+title: Quick Start
+description:
+weight: 3
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+## Step 1: Get Kafka
+
+[Download](https://www.apache.org/dyn/closer.cgi?path=/kafka/4.1.0/kafka_2.13-4.1.0.tgz) the latest Kafka release and extract it:
+
+
+ $ tar -xzf kafka_2.13-4.1.0.tgz
+ $ cd kafka_2.13-4.1.0
+
+## Step 2: Start the Kafka environment
+
+NOTE: Your local environment must have Java 17+ installed.
+
+Kafka can be run using local scripts and downloaded files or the docker image.
+
+### Using downloaded files
+
+Generate a Cluster UUID
+
+
+ $ KAFKA_CLUSTER_ID="$(bin/kafka-storage.sh random-uuid)"
+
+Format Log Directories
+
+
+ $ bin/kafka-storage.sh format --standalone -t $KAFKA_CLUSTER_ID -c config/server.properties
+
+Start the Kafka Server
+
+
+ $ bin/kafka-server-start.sh config/server.properties
+
+Once the Kafka server has successfully launched, you will have a basic Kafka environment running and ready to use.
+
+### Using JVM Based Apache Kafka Docker Image
+
+Get the Docker image:
+
+
+ $ docker pull apache/kafka:4.1.0
+
+Start the Kafka Docker container:
+
+
+ $ docker run -p 9092:9092 apache/kafka:4.1.0
+
+### Using GraalVM Based Native Apache Kafka Docker Image
+
+Get the Docker image:
+
+
+ $ docker pull apache/kafka-native:4.1.0
+
+Start the Kafka Docker container:
+
+
+ $ docker run -p 9092:9092 apache/kafka-native:4.1.0
+
+## Step 3: Create a topic to store your events
+
+Kafka is a distributed _event streaming platform_ that lets you read, write, store, and process [_events_](/#messages) (also called _records_ or _messages_ in the documentation) across many machines.
+
+Example events are payment transactions, geolocation updates from mobile phones, shipping orders, sensor measurements from IoT devices or medical equipment, and much more. These events are organized and stored in [_topics_](/#intro_concepts_and_terms). Very simplified, a topic is similar to a folder in a filesystem, and the events are the files in that folder.
+
+So before you can write your first events, you must create a topic. Open another terminal session and run:
+
+
+ $ bin/kafka-topics.sh --create --topic quickstart-events --bootstrap-server localhost:9092
+
+All of Kafka's command line tools have additional options: run the `kafka-topics.sh` command without any arguments to display usage information. For example, it can also show you [details such as the partition count](/#intro_concepts_and_terms) of the new topic:
+
+
+ $ bin/kafka-topics.sh --describe --topic quickstart-events --bootstrap-server localhost:9092
+ Topic: quickstart-events TopicId: NPmZHyhbR9y00wMglMH2sg PartitionCount: 1 ReplicationFactor: 1 Configs:
+ Topic: quickstart-events Partition: 0 Leader: 0 Replicas: 0 Isr: 0
+
+## Step 4: Write some events into the topic
+
+A Kafka client communicates with the Kafka brokers via the network for writing (or reading) events. Once received, the brokers will store the events in a durable and fault-tolerant manner for as long as you need—even forever.
+
+Run the console producer client to write a few events into your topic. By default, each line you enter will result in a separate event being written to the topic.
+
+
+ $ bin/kafka-console-producer.sh --topic quickstart-events --bootstrap-server localhost:9092
+ >This is my first event
+ >This is my second event
+
+You can stop the producer client with `Ctrl-C` at any time.
+
+## Step 5: Read the events
+
+Open another terminal session and run the console consumer client to read the events you just created:
+
+
+ $ bin/kafka-console-consumer.sh --topic quickstart-events --from-beginning --bootstrap-server localhost:9092
+ This is my first event
+ This is my second event
+
+You can stop the consumer client with `Ctrl-C` at any time.
+
+Feel free to experiment: for example, switch back to your producer terminal (previous step) to write additional events, and see how the events immediately show up in your consumer terminal.
+
+Because events are durably stored in Kafka, they can be read as many times and by as many consumers as you want. You can easily verify this by opening yet another terminal session and re-running the previous command again.
+
+## Step 6: Import/export your data as streams of events with Kafka Connect
+
+You probably have lots of data in existing systems like relational databases or traditional messaging systems, along with many applications that already use these systems. [Kafka Connect](/#connect) allows you to continuously ingest data from external systems into Kafka, and vice versa. It is an extensible tool that runs _connectors_ , which implement the custom logic for interacting with an external system. It is thus very easy to integrate existing systems with Kafka. To make this process even easier, there are hundreds of such connectors readily available.
+
+In this quickstart we'll see how to run Kafka Connect with simple connectors that import data from a file to a Kafka topic and export data from a Kafka topic to a file.
+
+First, make sure to add `connect-file-4.1.0.jar` to the `plugin.path` property in the Connect worker's configuration. For the purpose of this quickstart we'll use a relative path and consider the connectors' package as an uber jar, which works when the quickstart commands are run from the installation directory. However, it's worth noting that for production deployments using absolute paths is always preferable. See [plugin.path](/#connectconfigs_plugin.path) for a detailed description of how to set this config.
+
+Edit the `config/connect-standalone.properties` file, add or change the `plugin.path` configuration property match the following, and save the file:
+
+
+ $ echo "plugin.path=libs/connect-file-4.1.0.jar" >> config/connect-standalone.properties
+
+Then, start by creating some seed data to test with:
+
+
+ $ echo -e "foo
+ bar" > test.txt
+
+Or on Windows:
+
+
+ $ echo foo > test.txt
+ $ echo bar >> test.txt
+
+Next, we'll start two connectors running in _standalone_ mode, which means they run in a single, local, dedicated process. We provide three configuration files as parameters. The first is always the configuration for the Kafka Connect process, containing common configuration such as the Kafka brokers to connect to and the serialization format for data. The remaining configuration files each specify a connector to create. These files include a unique connector name, the connector class to instantiate, and any other configuration required by the connector.
+
+
+ $ bin/connect-standalone.sh config/connect-standalone.properties config/connect-file-source.properties config/connect-file-sink.properties
+
+These sample configuration files, included with Kafka, use the default local cluster configuration you started earlier and create two connectors: the first is a source connector that reads lines from an input file and produces each to a Kafka topic and the second is a sink connector that reads messages from a Kafka topic and produces each as a line in an output file.
+
+During startup you'll see a number of log messages, including some indicating that the connectors are being instantiated. Once the Kafka Connect process has started, the source connector should start reading lines from `test.txt` and producing them to the topic `connect-test`, and the sink connector should start reading messages from the topic `connect-test` and write them to the file `test.sink.txt`. We can verify the data has been delivered through the entire pipeline by examining the contents of the output file:
+
+
+ $ more test.sink.txt
+ foo
+ bar
+
+Note that the data is being stored in the Kafka topic `connect-test`, so we can also run a console consumer to see the data in the topic (or use custom consumer code to process it):
+
+
+ $ bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic connect-test --from-beginning
+ {"schema":{"type":"string","optional":false},"payload":"foo"}
+ {"schema":{"type":"string","optional":false},"payload":"bar"}
+ …
+
+The connectors continue to process data, so we can add data to the file and see it move through the pipeline:
+
+
+ $ echo "Another line" >> test.txt
+
+You should see the line appear in the console consumer output and in the sink file.
+
+## Step 7: Process your events with Kafka Streams
+
+Once your data is stored in Kafka as events, you can process the data with the [Kafka Streams](/streams) client library for Java/Scala. It allows you to implement mission-critical real-time applications and microservices, where the input and/or output data is stored in Kafka topics. Kafka Streams combines the simplicity of writing and deploying standard Java and Scala applications on the client side with the benefits of Kafka's server-side cluster technology to make these applications highly scalable, elastic, fault-tolerant, and distributed. The library supports exactly-once processing, stateful operations and aggregations, windowing, joins, processing based on event-time, and much more.
+
+To give you a first taste, here's how one would implement the popular `WordCount` algorithm:
+
+
+ KStream textLines = builder.stream("quickstart-events");
+
+ KTable wordCounts = textLines
+ .flatMapValues(line -> Arrays.asList(line.toLowerCase().split(" ")))
+ .groupBy((keyIgnored, word) -> word)
+ .count();
+
+ wordCounts.toStream().to("output-topic", Produced.with(Serdes.String(), Serdes.Long()));
+
+The [Kafka Streams demo](/streams/quickstart) and the [app development tutorial](/41/streams/tutorial) demonstrate how to code and run such a streaming application from start to finish.
+
+## Step 8: Terminate the Kafka environment
+
+Now that you reached the end of the quickstart, feel free to tear down the Kafka environment—or continue playing around.
+
+ 1. Stop the producer and consumer clients with `Ctrl-C`, if you haven't done so already.
+ 2. Stop the Kafka broker with `Ctrl-C`.
+
+
+
+If you also want to delete any data of your local Kafka environment including any events you have created along the way, run the command:
+
+
+ $ rm -rf /tmp/kafka-logs /tmp/kraft-combined-logs
+
+## Congratulations!
+
+You have successfully finished the Apache Kafka quickstart.
+
+To learn more, we suggest the following next steps:
+
+ * Read through the brief [Introduction](/intro) to learn how Kafka works at a high level, its main concepts, and how it compares to other technologies. To understand Kafka in more detail, head over to the [Documentation](/).
+ * Browse through the [Use Cases](/powered-by) to learn how other users in our world-wide community are getting value out of Kafka.
+ * Join a [local Kafka meetup group](/events) and [watch talks from Kafka Summit](https://kafka-summit.org/past-events/), the main conference of the Kafka community.
+
+
diff --git a/content/en/41/getting-started/upgrade.md b/content/en/41/getting-started/upgrade.md
new file mode 100644
index 000000000..ab2281797
--- /dev/null
+++ b/content/en/41/getting-started/upgrade.md
@@ -0,0 +1,173 @@
+---
+title: Upgrading
+description:
+weight: 5
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+## Upgrading to 4.1.0
+
+### Upgrading Servers to 4.1.0 from any version 3.3.x through 4.0.x
+
+### Notable changes in 4.1.0
+
+ * Apache Kafka 4.1 ships with a preview of Queues for Kafka ([KIP-932](https://cwiki.apache.org/confluence/x/4hA0Dw)). This feature introduces a new kind of group called share groups, as an alternative to consumer groups. Consumers in a share group cooperatively consume records from topics, without assigning each partition to just one consumer. Share groups also introduce per-record acknowledgement and counting of delivery attempts. Use share groups in cases where records are processed one at a time, rather than as part of an ordered stream. To enable share groups, use the `kafka-features.sh` tool to upgrade to `share.version=1`. For more information, please read the [ release notes](https://cwiki.apache.org/confluence/x/CIq3FQ).
+ * **Common**
+ * The logger class name for LogCleaner has been updated from `kafka.log.LogCleaner` to `org.apache.kafka.storage.internals.log.LogCleaner` in the log4j2.yaml configuration file. Added loggers for `org.apache.kafka.storage.internals.log.LogCleaner$CleanerThread` and `org.apache.kafka.storage.internals.log.Cleaner` classes to CleanerAppender.
+ * The filename for rotated `state-change.log` files has been updated from `stage-change.log.[date]` to `state-change.log.[date]` in the log4j2.yaml configuration file.
+ * **Broker**
+ * The configuration `log.cleaner.enable` is deprecated. Users should no longer set it to `false` to prepare for future removal. After the removal, `log.cleaner.threads` will also have a lower bound of 1. For further details, please refer to [KIP-1148](https://cwiki.apache.org/confluence/x/XAyWF).
+ * The KIP-966 part 1: Eligible Leader Replicas(ELR) will be enabled by default on the new clusters. After the ELR feature enabled, the previously set `min.insync.replicas` value at the broker-level config will be removed. Please set at the cluster-level if necessary. For further details, please refer to [here](/41/documentation.html#eligible_leader_replicas).
+ * **Producer**
+ * The `flush` method now detects potential deadlocks and prohibits its use inside a callback. This change prevents unintended blocking behavior, which was a known risk in earlier versions.
+ * **Command**
+ * The `force` option of `ConfigCommand` has been removed, as it has been non-operational since version 0.10.1.0.
+ * **Admin**
+ * The `listConsumerGroups()` and `listConsumerGroups(ListConsumerGroupsOptions)` methods in `Admin` are deprecated, and will be removed in the next major version. Use `Admin.listGroups(ListGroupsOptions.forConsumerGroups())` instead.
+ * **Kafka Streams**
+ * The `window.size.ms` and `window.inner.serde.class` in `StreamsConfig` are deprecated. Use the corresponding string constants defined in `TimeWindowedSerializer`, `TimeWindowedDeserializer`, `SessionWindowedSerializer` and `SessionWindowedDeserializer` instead.
+
+
+
+## Upgrading to 4.0.1
+
+### Upgrading Clients to 4.0.1
+
+**For a rolling upgrade:**
+
+ 1. Upgrade the clients one at a time: shut down the client, update the code, and restart it.
+ 2. Clients (including Streams and Connect) must be on version 2.1 or higher before upgrading to 4.0. Many deprecated APIs were removed in Kafka 4.0. For more information about the compatibility, please refer to the [compatibility matrix](/41/compatibility.html) or [KIP-1124](https://cwiki.apache.org/confluence/x/y4kgF).
+
+
+
+### Upgrading Servers to 4.0.1 from any version 3.3.x through 3.9.x
+
+Note: Apache Kafka 4.0 only supports KRaft mode - ZooKeeper mode has been removed. As such, **broker upgrades to 4.0.0 (and higher) require KRaft mode and the software and metadata versions must be at least 3.3.x** (the first version when KRaft mode was deemed production ready). For clusters in KRaft mode with versions older than 3.3.x, we recommend upgrading to 3.9.x before upgrading to 4.0.x. Clusters in ZooKeeper mode have to be [migrated to KRaft mode](/41/documentation.html#kraft_zk_migration) before they can be upgraded to 4.0.x.
+
+**For a rolling upgrade:**
+
+ 1. Upgrade the brokers one at a time: shut down the broker, update the code, and restart it. Once you have done so, the brokers will be running the latest version and you can verify that the cluster's behavior and performance meet expectations.
+ 2. Once the cluster's behavior and performance have been verified, finalize the upgrade by running ` bin/kafka-features.sh --bootstrap-server localhost:9092 upgrade --release-version 4.0 `
+ 3. Note that cluster metadata downgrade is not supported in this version since it has metadata changes. Every [MetadataVersion](https://github.com/apache/kafka/blob/trunk/server-common/src/main/java/org/apache/kafka/server/common/MetadataVersion.java) has a boolean parameter that indicates if there are metadata changes (i.e. `IBP_4_0_IV1(23, "4.0", "IV1", true)` means this version has metadata changes). Given your current and target versions, a downgrade is only possible if there are no metadata changes in the versions between.
+
+
+
+### Notable changes in 4.0.1
+
+ * The filename for rotated `state-change.log` files has been updated from `stage-change.log.[date]` to `state-change.log.[date]` in the log4j2.yaml configuration file. See [KAFKA-19576](https://issues.apache.org/jira/browse/KAFKA-19576) for details.
+
+
+
+### Notable changes in 4.0.0
+
+ * Old protocol API versions have been removed. Users should ensure brokers are version 2.1 or higher before upgrading Java clients (including Connect and Kafka Streams which use the clients internally) to 4.0. Similarly, users should ensure their Java clients (including Connect and Kafka Streams) version is 2.1 or higher before upgrading brokers to 4.0. Finally, care also needs to be taken when it comes to kafka clients that are not part of Apache Kafka, please see [KIP-896](https://cwiki.apache.org/confluence/x/K5sODg) for the details.
+ * Apache Kafka 4.0 only supports KRaft mode - ZooKeeper mode has been removed. About version upgrade, check [Upgrading to 4.0.1 from any version 3.3.x through 3.9.x](/41/documentation.html#upgrade_4_0_1) for more info.
+ * Apache Kafka 4.0 ships with a brand-new group coordinator implementation (See [here](https://cwiki.apache.org/confluence/x/HhD1D)). Functionally speaking, it implements all the same APIs. There are reasonable defaults, but the behavior of the new group coordinator can be tuned by setting the configurations with prefix `group.coordinator`.
+ * The Next Generation of the Consumer Rebalance Protocol ([KIP-848](https://cwiki.apache.org/confluence/x/HhD1D)) is now Generally Available (GA) in Apache Kafka 4.0. The protocol is automatically enabled on the server when the upgrade to 4.0 is finalized. Note that once the new protocol is used by consumer groups, the cluster can only be downgraded to version 3.4.1 or newer. For more information check [here](/41/documentation.html#consumer_rebalance_protocol).
+ * Transactions Server-Side Defense ([KIP-890](https://cwiki.apache.org/confluence/x/B40ODg)) brings a strengthened transactional protocol to Apache Kafka 4.0. The new and improved transactional protocol is enabled when the upgrade to 4.0 is finalized. When using 4.0 producer clients, the producer epoch is bumped on every transaction to ensure every transaction includes the intended messages and duplicates are not written as part of the next transaction. Downgrading the protocol is safe. For more information check [here](/41/documentation.html#transaction_protocol).
+ * Eligible Leader Replicas ([KIP-966 Part 1](https://cwiki.apache.org/confluence/x/mpOzDw)) enhances the replication protocol for the Apache Kafka 4.0. Now the KRaft controller keeps track of the data partition replicas that are not included in ISR but are safe to be elected as leader without data loss. Such replicas are stored in the partition metadata as the `Eligible Leader Replicas`(ELR). For more information check [here](/41/documentation.html#eligible_leader_replicas).
+ * Since Apache Kafka 4.0.0, we have added a system property (`org.apache.kafka.sasl.oauthbearer.allowed.urls`) to set the allowed URLs as SASL OAUTHBEARER token or jwks endpoints. By default, the value is an empty list. Users should explicitly set the allowed list if necessary.
+ * A number of deprecated classes, methods, configurations and tools have been removed.
+ * **Common**
+ * The `metrics.jmx.blacklist` and `metrics.jmx.whitelist` configurations were removed from the `org.apache.kafka.common.metrics.JmxReporter` Please use `metrics.jmx.exclude` and `metrics.jmx.include` respectively instead.
+ * The `auto.include.jmx.reporter` configuration was removed. The `metric.reporters` configuration is now set to `org.apache.kafka.common.metrics.JmxReporter` by default.
+ * The constructor `org.apache.kafka.common.metrics.JmxReporter` with string argument was removed. See [KIP-606](https://cwiki.apache.org/confluence/x/SxIRCQ) for details.
+ * The `bufferpool-wait-time-total`, `io-waittime-total`, and `iotime-total` metrics were removed. Please use `bufferpool-wait-time-ns-total`, `io-wait-time-ns-total`, and `io-time-ns-total` metrics as replacements, respectively.
+ * The `kafka.common.requests.DescribeLogDirsResponse.LogDirInfo` class was removed. Please use the `kafka.clients.admin.DescribeLogDirsResult.descriptions()` class and `kafka.clients.admin.DescribeLogDirsResult.allDescriptions()` instead.
+ * The `kafka.common.requests.DescribeLogDirsResponse.ReplicaInfo` class was removed. Please use the `kafka.clients.admin.DescribeLogDirsResult.descriptions()` class and `kafka.clients.admin.DescribeLogDirsResult.allDescriptions()` instead.
+ * The `org.apache.kafka.common.security.oauthbearer.secured.OAuthBearerLoginCallbackHandler` class was removed. Please use the `org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginCallbackHandler` class instead.
+ * The `org.apache.kafka.common.security.oauthbearer.secured.OAuthBearerValidatorCallbackHandler` class was removed. Please use the `org.apache.kafka.common.security.oauthbearer.OAuthBearerValidatorCallbackHandler` class instead.
+ * The `org.apache.kafka.common.errors.NotLeaderForPartitionException` class was removed. The `org.apache.kafka.common.errors.NotLeaderOrFollowerException` is returned if a request could not be processed because the broker is not the leader or follower for a topic partition.
+ * The `org.apache.kafka.clients.producer.internals.DefaultPartitioner` and `org.apache.kafka.clients.producer.UniformStickyPartitioner` classes were removed.
+ * The `log.message.format.version` and `message.format.version` configs were removed.
+ * The function `onNewBatch` in `org.apache.kafka.clients.producer.Partitioner` class was removed.
+ * The default properties files for KRaft mode are no longer stored in the separate `config/kraft` directory since Zookeeper has been removed. These files have been consolidated with other configuration files. Now all configuration files are in `config` directory.
+ * The valid format for `--bootstrap-server` only supports comma-separated value, such as `host1:port1,host2:port2,...`. Providing other formats, like space-separated bootstrap servers (e.g., `host1:port1 host2:port2 host3:port3`), will result in an exception, even though this was allowed in Apache Kafka versions prior to 4.0.
+ * **Broker**
+ * The `delegation.token.master.key` configuration was removed. Please use `delegation.token.secret.key` instead.
+ * The `offsets.commit.required.acks` configuration was removed. See [KIP-1041](https://cwiki.apache.org/confluence/x/9YobEg) for details.
+ * The `log.message.timestamp.difference.max.ms` configuration was removed. Please use `log.message.timestamp.before.max.ms` and `log.message.timestamp.after.max.ms` instead. See [KIP-937](https://cwiki.apache.org/confluence/x/thQ0Dw) for details.
+ * The `remote.log.manager.copier.thread.pool.size` configuration default value was changed to 10 from -1. Values of -1 are no longer valid; a minimum value of 1 or higher is required. See [KIP-1030](https://cwiki.apache.org/confluence/x/FAqpEQ)
+ * The `remote.log.manager.expiration.thread.pool.size` configuration default value was changed to 10 from -1. Values of -1 are no longer valid; a minimum value of 1 or higher is required. See [KIP-1030](https://cwiki.apache.org/confluence/x/FAqpEQ)
+ * The `remote.log.manager.thread.pool.size` configuration default value was changed to 2 from 10. See [KIP-1030](https://cwiki.apache.org/confluence/x/FAqpEQ)
+ * The minimum `segment.bytes/log.segment.bytes` has changed from 14 bytes to 1MB. See [KIP-1030](https://cwiki.apache.org/confluence/x/FAqpEQ)
+ * **MirrorMaker**
+ * The original MirrorMaker (MM1) and related classes were removed. Please use the Connect-based MirrorMaker (MM2), as described in the [Geo-Replication section.](/41/#georeplication).
+ * The `use.incremental.alter.configs` configuration was removed from `MirrorSourceConnector`. The modified behavior is now identical to the previous `required` configuration, therefore users should ensure that brokers in the target cluster are at least running 2.3.0.
+ * The `add.source.alias.to.metrics` configuration was removed from `MirrorSourceConnector`. The source cluster alias is now always added to the metrics.
+ * The `config.properties.blacklist` was removed from the `org.apache.kafka.connect.mirror.MirrorSourceConfig` Please use `config.properties.exclude` instead.
+ * The `topics.blacklist` was removed from the `org.apache.kafka.connect.mirror.MirrorSourceConfig` Please use `topics.exclude` instead.
+ * The `groups.blacklist` was removed from the `org.apache.kafka.connect.mirror.MirrorSourceConfig` Please use `groups.exclude` instead.
+ * **Tools**
+ * The `kafka.common.MessageReader` class was removed. Please use the [`org.apache.kafka.tools.api.RecordReader`](/41/javadoc/org/apache/kafka/tools/api/RecordReader.html) interface to build custom readers for the `kafka-console-producer` tool.
+ * The `kafka.tools.DefaultMessageFormatter` class was removed. Please use the `org.apache.kafka.tools.consumer.DefaultMessageFormatter` class instead.
+ * The `kafka.tools.LoggingMessageFormatter` class was removed. Please use the `org.apache.kafka.tools.consumer.LoggingMessageFormatter` class instead.
+ * The `kafka.tools.NoOpMessageFormatter` class was removed. Please use the `org.apache.kafka.tools.consumer.NoOpMessageFormatter` class instead.
+ * The `--whitelist` option was removed from the `kafka-console-consumer` command line tool. Please use `--include` instead.
+ * Redirections from the old tools packages have been removed: `kafka.admin.FeatureCommand`, `kafka.tools.ClusterTool`, `kafka.tools.EndToEndLatency`, `kafka.tools.StateChangeLogMerger`, `kafka.tools.StreamsResetter`, `kafka.tools.JmxTool`.
+ * The `--authorizer`, `--authorizer-properties`, and `--zk-tls-config-file` options were removed from the `kafka-acls` command line tool. Please use `--bootstrap-server` or `--bootstrap-controller` instead.
+ * The `kafka.serializer.Decoder` trait was removed, please use the [`org.apache.kafka.tools.api.Decoder`](/41/javadoc/org/apache/kafka/tools/api/Decoder.html) interface to build custom decoders for the `kafka-dump-log` tool.
+ * The `kafka.coordinator.group.OffsetsMessageFormatter` class was removed. Please use the `org.apache.kafka.tools.consumer.OffsetsMessageFormatter` class instead.
+ * The `kafka.coordinator.group.GroupMetadataMessageFormatter` class was removed. Please use the `org.apache.kafka.tools.consumer.GroupMetadataMessageFormatter` class instead.
+ * The `kafka.coordinator.transaction.TransactionLogMessageFormatter` class was removed. Please use the `org.apache.kafka.tools.consumer.TransactionLogMessageFormatter` class instead.
+ * The `--topic-white-list` option was removed from the `kafka-replica-verification` command line tool. Please use `--topics-include` instead.
+ * The `--broker-list` option was removed from the `kafka-verifiable-consumer` command line tool. Please use `--bootstrap-server` instead.
+ * `kafka-configs.sh` now uses incrementalAlterConfigs API to alter broker configurations instead of the deprecated alterConfigs API, and it will fall directly if the broker doesn't support incrementalAlterConfigs API, which means the broker version is prior to 2.3.x. See [KIP-1011](https://cwiki.apache.org/confluence/x/wIn5E) for more details.
+ * The `kafka.admin.ZkSecurityMigrator` tool was removed.
+ * **Connect**
+ * The `whitelist` and `blacklist` configurations were removed from the `org.apache.kafka.connect.transforms.ReplaceField` transformation. Please use `include` and `exclude` respectively instead.
+ * The `onPartitionsRevoked(Collection)` and `onPartitionsAssigned(Collection)` methods were removed from `SinkTask`.
+ * The `commitRecord(SourceRecord)` method was removed from `SourceTask`.
+ * **Consumer**
+ * The `poll(long)` method was removed from the consumer. Please use `poll(Duration)` instead. Note that there is a difference in behavior between the two methods. The `poll(Duration)` method does not block beyond the timeout awaiting partition assignment, whereas the earlier `poll(long)` method used to wait beyond the timeout.
+ * The `committed(TopicPartition)` and `committed(TopicPartition, Duration)` methods were removed from the consumer. Please use `committed(Set)` and `committed(Set, Duration)` instead.
+ * The `setException(KafkaException)` method was removed from the `org.apache.kafka.clients.consumer.MockConsumer`. Please use `setPollException(KafkaException)` instead.
+ * **Producer**
+ * The `enable.idempotence` configuration will no longer automatically fall back when the `max.in.flight.requests.per.connection` value exceeds 5.
+ * The deprecated `sendOffsetsToTransaction(Map, String)` method has been removed from the Producer API.
+ * The default `linger.ms` changed from 0 to 5 in Apache Kafka 4.0 as the efficiency gains from larger batches typically result in similar or lower producer latency despite the increased linger.
+ * **Admin client**
+ * The `alterConfigs` method was removed from the `org.apache.kafka.clients.admin.Admin`. Please use `incrementalAlterConfigs` instead.
+ * The `org.apache.kafka.common.ConsumerGroupState` enumeration and related methods have been deprecated. Please use `GroupState` instead which applies to all types of group.
+ * The `Admin.describeConsumerGroups` method used to return a `ConsumerGroupDescription` in state `DEAD` if the group ID was not found. In Apache Kafka 4.0, the `GroupIdNotFoundException` is thrown instead as part of the support for new types of group.
+ * The `org.apache.kafka.clients.admin.DeleteTopicsResult.values()` method was removed. Please use `org.apache.kafka.clients.admin.DeleteTopicsResult.topicNameValues()` instead.
+ * The `org.apache.kafka.clients.admin.TopicListing.TopicListing(String, boolean)` method was removed. Please use `org.apache.kafka.clients.admin.TopicListing.TopicListing(String, Uuid, boolean)` instead.
+ * The `org.apache.kafka.clients.admin.ListConsumerGroupOffsetsOptions.topicPartitions(List)` method was removed. Please use `org.apache.kafka.clients.admin.Admin.listConsumerGroupOffsets(Map, ListConsumerGroupOffsetsOptions)` instead.
+ * The deprecated `dryRun` methods were removed from the `org.apache.kafka.clients.admin.UpdateFeaturesOptions`. Please use `validateOnly` instead.
+ * The constructor `org.apache.kafka.clients.admin.FeatureUpdate` with short and boolean arguments was removed. Please use the constructor that accepts short and the specified `UpgradeType` enum instead.
+ * The `allowDowngrade` method was removed from the `org.apache.kafka.clients.admin.FeatureUpdate`.
+ * The `org.apache.kafka.clients.admin.DescribeTopicsResult.DescribeTopicsResult(Map>)` method was removed. Please use `org.apache.kafka.clients.admin.DescribeTopicsResult.DescribeTopicsResult(Map>, Map>)` instead.
+ * The `values()` method was removed from the `org.apache.kafka.clients.admin.DescribeTopicsResult`. Please use `topicNameValues()` instead.
+ * The `all()` method was removed from the `org.apache.kafka.clients.admin.DescribeTopicsResult`. Please use `allTopicNames()` instead.
+ * **Kafka Streams**
+ * All public APIs, deprecated in Apache Kafka 3.6 or an earlier release, have been removed, with the exception of `JoinWindows.of()` and `JoinWindows#grace()`. See [KAFKA-17531](https://issues.apache.org/jira/browse/KAFKA-17531) for details.
+ * The most important changes are highlighted in the [Kafka Streams upgrade guide](/41/streams/upgrade-guide.html#streams_api_changes_400).
+ * For a full list of changes, see [KAFKA-12822](https://issues.apache.org/jira/browse/KAFKA-12822).
+ * If you are using `KStream#transformValues()` which was removed with Apache Kafka 4.0.0 release, and you need to rewrite your program to use `KStreams#processValues()` instead, pay close attention to the [migration guide](/41/streams/developer-guide/dsl-api.html#transformers-removal-and-migration-to-processors).
+ * Other changes:
+ * The minimum Java version required by clients and Kafka Streams applications has been increased from Java 8 to Java 11 while brokers, connect and tools now require Java 17. See [KIP-750](https://cwiki.apache.org/confluence/x/P4vOCg) and [KIP-1013](https://cwiki.apache.org/confluence/x/Bov5E) for more details.
+ * Java 23 support has been added in Apache Kafka 4.0.
+ * Scala 2.12 support has been removed in Apache Kafka 4.0. See [KIP-751](https://cwiki.apache.org/confluence/x/OovOCg) for more details
+ * Logging framework has been migrated from Log4j to Log4j2. Users can use the log4j-transform-cli tool to automatically convert their existing Log4j configuration files to Log4j2 format. See [log4j-transform-cli](https://logging.staged.apache.org/log4j/transform/cli.html#log4j-transform-cli) for more details. Log4j2 provides limited compatibility for Log4j configurations. See [Use Log4j 1 to Log4j 2 bridge](https://logging.apache.org/log4j/2.x/migrate-from-log4j1.html#ConfigurationCompatibility) for more information,
+ * KafkaLog4jAppender has been removed, users should migrate to the log4j2 appender See [KafkaAppender](https://logging.apache.org/log4j/2.x/manual/appenders.html#KafkaAppender) for more details
+ * The `--delete-config` option in the `kafka-topics` command line tool has been deprecated.
+ * For implementors of RemoteLogMetadataManager (RLMM), a new API `nextSegmentWithTxnIndex` is introduced in RLMM to allow the implementation to return the next segment metadata with a transaction index. This API is used when the consumers are enabled with isolation level as READ_COMMITTED. See [KIP-1058](https://cwiki.apache.org/confluence/x/BwuTEg) for more details.
+ * The criteria for identifying internal topics in ReplicationPolicy and DefaultReplicationPolicy have been updated to enable the replication of topics that appear to be internal but aren't truly internal to Kafka and Mirror Maker 2. See [KIP-1074](https://cwiki.apache.org/confluence/x/jA3OEg) for more details.
+ * [KIP-714](https://cwiki.apache.org/confluence/x/2xRRCg) is now enabled for Kafka Streams via [KIP-1076](https://cwiki.apache.org/confluence/x/XA-OEg). This allows to not only collect the metric of the internally used clients of a Kafka Streams application via a broker-side plugin, but also to collect the [metrics](/41/#kafka_streams_monitoring) of the Kafka Streams runtime itself.
+ * The default value of `num.recovery.threads.per.data.dir` has been changed from 1 to 2. The impact of this is faster recovery post unclean shutdown at the expense of extra IO cycles. See [KIP-1030](https://cwiki.apache.org/confluence/x/FAqpEQ)
+ * The default value of `message.timestamp.after.max.ms` has been changed from Long.Max to 1 hour. The impact of this messages with a timestamp of more than 1 hour in the future will be rejected when message.timestamp.type=CreateTime is set. See [KIP-1030](https://cwiki.apache.org/confluence/x/FAqpEQ)
+ * Introduced in [KIP-890](https://cwiki.apache.org/confluence/x/B40ODg), the `TransactionAbortableException` enhances error handling within transactional operations by clearly indicating scenarios where transactions should be aborted due to errors. It is important for applications to properly manage both `TimeoutException` and `TransactionAbortableException` when working with transaction producers.
+ * **TimeoutException:** This exception indicates that a transactional operation has timed out. Given the risk of message duplication that can arise from retrying operations after a timeout (potentially violating exactly-once semantics), applications should treat timeouts as reasons to abort the ongoing transaction.
+ * **TransactionAbortableException:** Specifically introduced to signal errors that should lead to transaction abortion, ensuring this exception is properly handled is critical for maintaining the integrity of transactional processing.
+ * To ensure seamless operation and compatibility with future Kafka versions, developers are encouraged to update their error-handling logic to treat both exceptions as triggers for aborting transactions. This approach is pivotal for preserving exactly-once semantics.
+ * See [KIP-890](https://cwiki.apache.org/confluence/x/B40ODg) and [KIP-1050](https://cwiki.apache.org/confluence/x/8ItyEg) for more details
+ * The filename for rotated `state-change.log` files incorrectly rotates to `stage-change.log.[date]` (changing state to stage). This issue is corrected in 4.0.1. See [KAFKA-19576](https://issues.apache.org/jira/browse/KAFKA-19576) for details.
+
+
+
+## Upgrading to 3.9.0 and older versions
+
+See [Upgrading From Previous Versions](/39/#upgrade) in the 3.9 documentation.
diff --git a/content/en/41/getting-started/uses.md b/content/en/41/getting-started/uses.md
new file mode 100644
index 000000000..e3715df79
--- /dev/null
+++ b/content/en/41/getting-started/uses.md
@@ -0,0 +1,45 @@
+---
+title: Use Cases
+description:
+weight: 2
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+Here is a description of a few of the popular use cases for Apache Kafka®. For an overview of a number of these areas in action, see [this blog post](https://engineering.linkedin.com/distributed-systems/log-what-every-software-engineer-should-know-about-real-time-datas-unifying/).
+
+## Messaging
+
+Kafka works well as a replacement for a more traditional message broker. Message brokers are used for a variety of reasons (to decouple processing from data producers, to buffer unprocessed messages, etc). In comparison to most messaging systems Kafka has better throughput, built-in partitioning, replication, and fault-tolerance which makes it a good solution for large scale message processing applications.
+
+In our experience messaging uses are often comparatively low-throughput, but may require low end-to-end latency and often depend on the strong durability guarantees Kafka provides.
+
+In this domain Kafka is comparable to traditional messaging systems such as [ActiveMQ](https://activemq.apache.org) or [RabbitMQ](https://www.rabbitmq.com).
+
+## Website Activity Tracking
+
+The original use case for Kafka was to be able to rebuild a user activity tracking pipeline as a set of real-time publish-subscribe feeds. This means site activity (page views, searches, or other actions users may take) is published to central topics with one topic per activity type. These feeds are available for subscription for a range of use cases including real-time processing, real-time monitoring, and loading into Hadoop or offline data warehousing systems for offline processing and reporting.
+
+Activity tracking is often very high volume as many activity messages are generated for each user page view.
+
+## Metrics
+
+Kafka is often used for operational monitoring data. This involves aggregating statistics from distributed applications to produce centralized feeds of operational data.
+
+## Log Aggregation
+
+Many people use Kafka as a replacement for a log aggregation solution. Log aggregation typically collects physical log files off servers and puts them in a central place (a file server or HDFS perhaps) for processing. Kafka abstracts away the details of files and gives a cleaner abstraction of log or event data as a stream of messages. This allows for lower-latency processing and easier support for multiple data sources and distributed data consumption. In comparison to log-centric systems like Scribe or Flume, Kafka offers equally good performance, stronger durability guarantees due to replication, and much lower end-to-end latency.
+
+## Stream Processing
+
+Many users of Kafka process data in processing pipelines consisting of multiple stages, where raw input data is consumed from Kafka topics and then aggregated, enriched, or otherwise transformed into new topics for further consumption or follow-up processing. For example, a processing pipeline for recommending news articles might crawl article content from RSS feeds and publish it to an "articles" topic; further processing might normalize or deduplicate this content and publish the cleansed article content to a new topic; a final processing stage might attempt to recommend this content to users. Such processing pipelines create graphs of real-time data flows based on the individual topics. Starting in 0.10.0.0, a light-weight but powerful stream processing library called [Kafka Streams](/streams) is available in Apache Kafka to perform such data processing as described above. Apart from Kafka Streams, alternative open source stream processing tools include [Apache Storm](https://storm.apache.org/) and [Apache Samza](https://samza.apache.org/).
+
+## Event Sourcing
+
+[Event sourcing](https://martinfowler.com/eaaDev/EventSourcing.html) is a style of application design where state changes are logged as a time-ordered sequence of records. Kafka's support for very large stored log data makes it an excellent backend for an application built in this style.
+
+## Commit Log
+
+Kafka can serve as a kind of external commit-log for a distributed system. The log helps replicate data between nodes and acts as a re-syncing mechanism for failed nodes to restore their data. The [log compaction](/documentation.html#compaction) feature in Kafka helps support this usage. In this usage Kafka is similar to [Apache BookKeeper](https://bookkeeper.apache.org/) project.
diff --git a/content/en/41/getting-started/zk2kraft.md b/content/en/41/getting-started/zk2kraft.md
new file mode 100644
index 000000000..fb4776f8c
--- /dev/null
+++ b/content/en/41/getting-started/zk2kraft.md
@@ -0,0 +1,219 @@
+---
+title: KRaft vs ZooKeeper
+description:
+weight: 6
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Differences Between KRaft mode and ZooKeeper mode
+
+# Removed ZooKeeper Features
+
+This section documents differences in behavior between KRaft mode and ZooKeeper mode. Specifically, several configurations, metrics and features have changed or are no longer required in KRaft mode. To migrate an existing cluster from ZooKeeper mode to KRaft mode, please refer to the [ZooKeeper to KRaft Migration](/39/documentation.html#kraft_zk_migration) section.
+
+# Configurations
+
+ * Removed password encoder-related configurations. These configurations were used in ZooKeeper mode to define the key and backup key for encrypting sensitive data (e.g., passwords), specify the algorithm and key generation method for password encryption (e.g., AES, RSA), and control the key length and encryption strength.
+
+ * `password.encoder.secret`
+ * `password.encoder.old.secret`
+ * `password.encoder.keyfactory.algorithm`
+ * `password.encoder.cipher.algorithm`
+ * `password.encoder.key.length`
+ * `password.encoder.iterations`
+
+In KRaft mode, Kafka stores sensitive data in records, and the data is not encrypted in Kafka.
+
+ * Removed `control.plane.listener.name`. Kafka relies on ZooKeeper to manage metadata, but some internal operations (e.g., communication between controllers (a.k.a., broker controller) and brokers) still require Kafka’s internal control plane for coordination.
+
+In KRaft mode, Kafka eliminates its dependency on ZooKeeper, and the control plane functionality is fully integrated into Kafka itself. The process roles are clearly separated: brokers handle data-related requests, while the controllers (a.k.a., quorum controller) manages metadata-related requests. The controllers use the Raft protocol for internal communication, which operates differently from the ZooKeeper model. Use the following parameters to configure the control plane listener:
+
+ * `controller.listener.names`
+ * `listeners`
+ * `listener.security.protocol.map`
+ * Removed graceful broker shutdowns-related configurations. These configurations were used in ZooKeeper mode to define the maximum number of retries and the retry backoff time for controlled shutdowns. It can reduce the risk of unplanned leader changes and data inconsistencies.
+
+ * `controlled.shutdown.max.retries`
+ * `controlled.shutdown.retry.backoff.ms`
+
+In KRaft mode, Kafka uses the Raft protocol to manage metadata. The broker shutdown process differs from ZooKeeper mode as it is managed by the quorum-based controller. The shutdown process is more reliable and efficient due to automated leader transfers and metadata updates handled by the controller.
+
+ * Removed the broker id generation-related configurations. These configurations were used in ZooKeeper mode to specify the broker id auto generation and control the broker id generation process.
+
+ * `reserved.broker.max.id`
+ * `broker.id.generation.enable`
+
+Kafka uses the node id in KRaft mode to identify servers.
+
+ * `node.id`
+ * Removed broker protocol version-related configurations. These configurations were used in ZooKeeper mode to define communication protocol version between brokers. In KRaft mode, Kafka uses `metadata.version` to control the feature level of the cluster, which can be managed using `bin/kafka-features.sh`.
+
+ * `inter.broker.protocol.version`
+ * Removed dynamic configurations which relied on ZooKeeper. In KRaft mode, to change these configurations, you need to restart the broker/controller.
+
+ * `advertised.listeners`
+ * Removed the leader imbalance configuration used only in ZooKeeper. `leader.imbalance.per.broker.percentage` was used to limit the preferred leader election frequency in ZooKeeper.
+
+ * `leader.imbalance.per.broker.percentage`
+ * Removed ZooKeeper related configurations.
+
+ * `zookeeper.connect`
+ * `zookeeper.session.timeout.ms`
+ * `zookeeper.connection.timeout.ms`
+ * `zookeeper.set.acl`
+ * `zookeeper.max.in.flight.requests`
+ * `zookeeper.ssl.client.enable`
+ * `zookeeper.clientCnxnSocket`
+ * `zookeeper.ssl.keystore.location`
+ * `zookeeper.ssl.keystore.password`
+ * `zookeeper.ssl.keystore.type`
+ * `zookeeper.ssl.truststore.location`
+ * `zookeeper.ssl.truststore.password`
+ * `zookeeper.ssl.truststore.type`
+ * `zookeeper.ssl.protocol`
+ * `zookeeper.ssl.enabled.protocols`
+ * `zookeeper.ssl.cipher.suites`
+ * `zookeeper.ssl.endpoint.identification.algorithm`
+ * `zookeeper.ssl.crl.enable`
+ * `zookeeper.ssl.ocsp.enable`
+
+
+
+# Dynamic Log Levels
+
+ * The dynamic log levels feature allows you to change the log4j settings of a running broker or controller process without restarting it. The command-line syntax for setting dynamic log levels on brokers has not changed in KRaft mode. Here is an example of setting the log level on a broker:
+
+
+ ./bin/kafka-configs.sh --bootstrap-server localhost:9092 \
+ --entity-type broker-loggers \
+ --entity-name 1 \
+ --alter \
+ --add-config org.apache.kafka.raft.KafkaNetworkChannel=TRACE
+
+
+ * When setting dynamic log levels on the controllers, the `--bootstrap-controller` flag must be used. Here is an example of setting the log level ona controller:
+
+
+ ./bin/kafka-configs.sh --bootstrap-controller localhost:9093 \
+ --entity-type broker-loggers \
+ --entity-name 1 \
+ --alter \
+ --add-config org.apache.kafka.raft.KafkaNetworkChannel=TRACE
+
+
+
+Note that the entity-type must be specified as `broker-loggers`, even though we are changing a controller's log level rather than a broker's log level.
+
+ * When changing the log level of a combined node, which has both broker and controller roles, either --bootstrap-servers or --bootstrap-controllers may be used. Combined nodes have only a single set of log levels; there are not different log levels for the broker and controller parts of the process.
+
+
+
+
+# Dynamic Controller Configurations
+
+ * Some Kafka configurations can be changed dynamically, without restarting the process. The command-line syntax for setting dynamic log levels on brokers has not changed in KRaft mode. Here is an example of setting the number of IO threads on a broker:
+
+
+ ./bin/kafka-configs.sh --bootstrap-server localhost:9092 \
+ --entity-type brokers \
+ --entity-name 1 \
+ --alter \
+ --add-config num.io.threads=5
+
+
+ * Controllers will apply all applicable cluster-level dynamic configurations. For example, the following command-line will change the `max.connections` setting on all of the brokers and all of the controllers in the cluster:
+
+
+ ./bin/kafka-configs.sh --bootstrap-server localhost:9092 \
+ --entity-type brokers \
+ --entity-default \
+ --alter \
+ --add-config max.connections=10000
+
+
+
+It is not currently possible to apply a dynamic configuration on only a single controller.
+
+
+
+
+# Metrics
+
+ * Removed the following metrics related to ZooKeeper. `ControlPlaneNetworkProcessorAvgIdlePercent` is to monitor the average fraction of time the network processors are idle. The other `ControlPlaneExpiredConnectionsKilledCount` is to monitor the total number of connections disconnected, across all processors.
+
+ * `ControlPlaneNetworkProcessorAvgIdlePercent`
+ * `ControlPlaneExpiredConnectionsKilledCount`
+
+In KRaft mode, Kafka also provides metrics to monitor the network processors and expired connections. Use the following metrics to monitor the network processors and expired connections:
+
+ * `NetworkProcessorAvgIdlePercent`
+ * `ExpiredConnectionsKilledCount`
+ * Removed the metrics which are only used in ZooKeeper mode.
+
+ * `kafka.controller:type=ControllerChannelManager,name=QueueSize`
+ * `kafka.controller:type=ControllerChannelManager,name=RequestRateAndQueueTimeMs`
+ * `kafka.controller:type=ControllerEventManager,name=EventQueueSize`
+ * `kafka.controller:type=ControllerEventManager,name=EventQueueTimeMs`
+ * `kafka.controller:type=ControllerStats,name=AutoLeaderBalanceRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=ControlledShutdownRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=ControllerChangeRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=ControllerShutdownRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=IdleRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=IsrChangeRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=LeaderAndIsrResponseReceivedRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=ListPartitionReassignmentRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=LogDirChangeRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=ManualLeaderBalanceRateAndTimeMs`
+ * `kafka.controller:type=KafkaController,name=MigratingZkBrokerCount`
+ * `kafka.controller:type=ControllerStats,name=PartitionReassignmentRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=TopicChangeRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=TopicDeletionRateAndTimeMs`
+ * `kafka.controller:type=KafkaController,name=TopicsIneligibleToDeleteCount`
+ * `kafka.controller:type=ControllerStats,name=TopicUncleanLeaderElectionEnableRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=UncleanLeaderElectionEnableRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec`
+ * `kafka.controller:type=ControllerStats,name=UpdateFeaturesRateAndTimeMs`
+ * `kafka.controller:type=ControllerStats,name=UpdateMetadataResponseReceivedRateAndTimeMs`
+ * `kafka.controller:type=KafkaController,name=ActiveBrokerCount`
+ * `kafka.controller:type=KafkaController,name=ActiveControllerCount`
+ * `kafka.controller:type=KafkaController,name=ControllerState`
+ * `kafka.controller:type=KafkaController,name=FencedBrokerCount`
+ * `kafka.controller:type=KafkaController,name=GlobalPartitionCount`
+ * `kafka.controller:type=KafkaController,name=GlobalTopicCount`
+ * `kafka.controller:type=KafkaController,name=OfflinePartitionsCount`
+ * `kafka.controller:type=KafkaController,name=PreferredReplicaImbalanceCount`
+ * `kafka.controller:type=KafkaController,name=ReplicasIneligibleToDeleteCount`
+ * `kafka.controller:type=KafkaController,name=ReplicasToDeleteCount`
+ * `kafka.controller:type=KafkaController,name=TopicsToDeleteCount`
+ * `kafka.controller:type=KafkaController,name=ZkMigrationState`
+ * `kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=ElectLeader`
+ * `kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=topic`
+ * `kafka.server:type=DelayedOperationPurgatory,name=NumDelayedOperations,delayedOperation=ElectLeader`
+ * `kafka.server:type=DelayedOperationPurgatory,name=NumDelayedOperations,delayedOperation=topic`
+ * `kafka.server:type=SessionExpireListener,name=SessionState`
+ * `kafka.server:type=SessionExpireListener,name=ZooKeeperAuthFailuresPerSec`
+ * `kafka.server:type=SessionExpireListener,name=ZooKeeperDisconnectsPerSec`
+ * `kafka.server:type=SessionExpireListener,name=ZooKeeperExpiresPerSec`
+ * `kafka.server:type=SessionExpireListener,name=ZooKeeperReadOnlyConnectsPerSec`
+ * `kafka.server:type=SessionExpireListener,name=ZooKeeperSaslAuthenticationsPerSec`
+ * `kafka.server:type=SessionExpireListener,name=ZooKeeperSyncConnectsPerSec`
+ * `kafka.server:type=ZooKeeperClientMetrics,name=ZooKeeperRequestLatencyMs`
+
+
+
+# Behavioral Change Reference
+
+This document catalogs the functional and operational differences between ZooKeeper mode and KRaft mode.
+
+ * **Configuration Value Size Limitation** : KRaft mode restricts configuration values to a maximum size of `Short.MAX_VALUE`, which prevents using the append operation to create larger configuration values.
+ * **Policy Class Deployment** : In KRaft mode, the `CreateTopicPolicy` and `AlterConfigPolicy` plugins run on the controller instead of the broker. This requires users to deploy the policy class JAR files on the controller and configure the parameters (`create.topic.policy.class.name` and `alter.config.policy.class.name`) on the controller.
+
+Note: If migrating from ZooKeeper mode, ensure policy JARs are moved from brokers to controllers.
+
+ * **Custom implementations of`KafkaPrincipalBuilder`**: In KRaft mode, custom implementations of `KafkaPrincipalBuilder` must also implement `KafkaPrincipalSerde`; otherwise brokers will not be able to forward requests to the controller.
+
+
diff --git a/content/en/41/implementation/_index.md b/content/en/41/implementation/_index.md
new file mode 100644
index 000000000..7426a2273
--- /dev/null
+++ b/content/en/41/implementation/_index.md
@@ -0,0 +1,10 @@
+---
+title: Implementation
+description:
+weight: 5
+tags: ['kafka', 'docs', 'implementation']
+aliases:
+keywords:
+type: docs
+---
+
diff --git a/content/en/41/implementation/distribution.md b/content/en/41/implementation/distribution.md
new file mode 100644
index 000000000..7c3b1c1ba
--- /dev/null
+++ b/content/en/41/implementation/distribution.md
@@ -0,0 +1,19 @@
+---
+title: Distribution
+description: Distribution
+weight: 5
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Distribution
+
+## Consumer Offset Tracking
+
+Kafka consumer tracks the maximum offset it has consumed in each partition and has the capability to commit offsets so that it can resume from those offsets in the event of a restart. Kafka provides the option to store all the offsets for a given consumer group in a designated broker (for that group) called the group coordinator. i.e., any consumer instance in that consumer group should send its offset commits and fetches to that group coordinator (broker). Consumer groups are assigned to coordinators based on their group names. A consumer can look up its coordinator by issuing a FindCoordinatorRequest to any Kafka broker and reading the FindCoordinatorResponse which will contain the coordinator details. The consumer can then proceed to commit or fetch offsets from the coordinator broker. In case the coordinator moves, the consumer will need to rediscover the coordinator. Offset commits can be done automatically or manually by consumer instance.
+
+When the group coordinator receives an OffsetCommitRequest, it appends the request to a special compacted Kafka topic named ___consumer_offsets_. The broker sends a successful offset commit response to the consumer only after all the replicas of the offsets topic receive the offsets. In case the offsets fail to replicate within a configurable timeout, the offset commit will fail and the consumer may retry the commit after backing off. The brokers periodically compact the offsets topic since it only needs to maintain the most recent offset commit per partition. The coordinator also caches the offsets in an in-memory table in order to serve offset fetches quickly.
+
+When the coordinator receives an offset fetch request, it simply returns the last committed offset vector from the offsets cache. In case coordinator was just started or if it just became the coordinator for a new set of consumer groups (by becoming a leader for a partition of the offsets topic), it may need to load the offsets topic partition into the cache. In this case, the offset fetch will fail with an CoordinatorLoadInProgressException and the consumer may retry the OffsetFetchRequest after backing off.
diff --git a/content/en/41/implementation/log.md b/content/en/41/implementation/log.md
new file mode 100644
index 000000000..411f46017
--- /dev/null
+++ b/content/en/41/implementation/log.md
@@ -0,0 +1,61 @@
+---
+title: Log
+description: Log
+weight: 4
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Log
+
+A log for a topic named "my-topic" with two partitions consists of two directories (namely `my-topic-0` and `my-topic-1`) populated with data files containing the messages for that topic. The format of the log files is a sequence of "log entries"; each log entry is a 4 byte integer _N_ storing the message length which is followed by the _N_ message bytes. Each message is uniquely identified by a 64-bit integer _offset_ giving the byte position of the start of this message in the stream of all messages ever sent to that topic on that partition. The on-disk format of each message is given below. Each log file is named with the offset of the first message it contains. So the first file created will be 00000000000000000000.log, and each additional file will have an integer name roughly _S_ bytes from the previous file where _S_ is the max log file size given in the configuration.
+
+The exact binary format for records is versioned and maintained as a standard interface so record batches can be transferred between producer, broker, and client without recopying or conversion when desirable. The previous section included details about the on-disk format of records.
+
+The use of the message offset as the message id is unusual. Our original idea was to use a GUID generated by the producer, and maintain a mapping from GUID to offset on each broker. But since a consumer must maintain an ID for each server, the global uniqueness of the GUID provides no value. Furthermore, the complexity of maintaining the mapping from a random id to an offset requires a heavy weight index structure which must be synchronized with disk, essentially requiring a full persistent random-access data structure. Thus to simplify the lookup structure we decided to use a simple per-partition atomic counter which could be coupled with the partition id and node id to uniquely identify a message; this makes the lookup structure simpler, though multiple seeks per consumer request are still likely. However once we settled on a counter, the jump to directly using the offset seemed natural--both after all are monotonically increasing integers unique to a partition. Since the offset is hidden from the consumer API this decision is ultimately an implementation detail and we went with the more efficient approach.
+
+
+
+## Writes
+
+The log allows serial appends which always go to the last file. This file is rolled over to a fresh file when it reaches a configurable size (say 1GB). The log takes two configuration parameters: _M_ , which gives the number of messages to write before forcing the OS to flush the file to disk, and _S_ , which gives a number of seconds after which a flush is forced. This gives a durability guarantee of losing at most _M_ messages or _S_ seconds of data in the event of a system crash.
+
+## Reads
+
+Reads are done by giving the 64-bit logical offset of a message and an _S_ -byte max chunk size. This will return an iterator over the messages contained in the _S_ -byte buffer. _S_ is intended to be larger than any single message, but in the event of an abnormally large message, the read can be retried multiple times, each time doubling the buffer size, until the message is read successfully. A maximum message and buffer size can be specified to make the server reject messages larger than some size, and to give a bound to the client on the maximum it needs to ever read to get a complete message. It is likely that the read buffer ends with a partial message, this is easily detected by the size delimiting.
+
+The actual process of reading from an offset requires first locating the log segment file in which the data is stored, calculating the file-specific offset from the global offset value, and then reading from that file offset. The search is done as a simple binary search variation against an in-memory range maintained for each file.
+
+The log provides the capability of getting the most recently written message to allow clients to start subscribing as of "right now". This is also useful in the case the consumer fails to consume its data within its SLA-specified number of days. In this case when the client attempts to consume a non-existent offset it is given an OutOfRangeException and can either reset itself or fail as appropriate to the use case.
+
+The following is the format of the results sent to the consumer.
+
+
+ MessageSetSend (fetch result)
+
+ total length : 4 bytes
+ error code : 2 bytes
+ message 1 : x bytes
+ ...
+ message n : x bytes
+
+
+ MultiMessageSetSend (multiFetch result)
+
+ total length : 4 bytes
+ error code : 2 bytes
+ messageSetSend 1
+ ...
+ messageSetSend n
+
+## Deletes
+
+Data is deleted one log segment at a time. The log manager applies two metrics to identify segments which are eligible for deletion: time and size. For time-based policies, the record timestamps are considered, with the largest timestamp in a segment file (order of records is not relevant) defining the retention time for the entire segment. Size-based retention is disabled by default. When enabled the log manager keeps deleting the oldest segment file until the overall size of the partition is within the configured limit again. If both policies are enabled at the same time, a segment that is eligible for deletion due to either policy will be deleted. To avoid locking reads while still allowing deletes that modify the segment list we use a copy-on-write style segment list implementation that provides consistent views to allow a binary search to proceed on an immutable static snapshot view of the log segments while deletes are progressing.
+
+## Guarantees
+
+The log provides a configuration parameter _M_ which controls the maximum number of messages that are written before forcing a flush to disk. On startup a log recovery process is run that iterates over all messages in the newest log segment and verifies that each message entry is valid. A message entry is valid if the sum of its size and offset are less than the length of the file AND the CRC32 of the message payload matches the CRC stored with the message. In the event corruption is detected the log is truncated to the last valid offset.
+
+Note that two kinds of corruption must be handled: truncation in which an unwritten block is lost due to a crash, and corruption in which a nonsense block is ADDED to the file. The reason for this is that in general the OS makes no guarantee of the write order between the file inode and the actual block data so in addition to losing written data the file can gain nonsense data if the inode is updated with a new size but a crash occurs before the block containing that data is written. The CRC detects this corner case, and prevents it from corrupting the log (though the unwritten messages are, of course, lost).
diff --git a/content/en/41/implementation/message-format.md b/content/en/41/implementation/message-format.md
new file mode 100644
index 000000000..fea94ceec
--- /dev/null
+++ b/content/en/41/implementation/message-format.md
@@ -0,0 +1,95 @@
+---
+title: Message Format
+description: Message Format
+weight: 3
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Message Format
+
+Messages (aka Records) are always written in batches. The technical term for a batch of messages is a record batch, and a record batch contains one or more records. In the degenerate case, we could have a record batch containing a single record. Record batches and records have their own headers. The format of each is described below.
+
+## Record Batch
+
+The following is the on-disk format of a RecordBatch.
+
+
+ baseOffset: int64
+ batchLength: int32
+ partitionLeaderEpoch: int32
+ magic: int8 (current magic value is 2)
+ crc: uint32
+ attributes: int16
+ bit 0~2:
+ 0: no compression
+ 1: gzip
+ 2: snappy
+ 3: lz4
+ 4: zstd
+ bit 3: timestampType
+ bit 4: isTransactional (0 means not transactional)
+ bit 5: isControlBatch (0 means not a control batch)
+ bit 6: hasDeleteHorizonMs (0 means baseTimestamp is not set as the delete horizon for compaction)
+ bit 7~15: unused
+ lastOffsetDelta: int32
+ baseTimestamp: int64
+ maxTimestamp: int64
+ producerId: int64
+ producerEpoch: int16
+ baseSequence: int32
+ recordsCount: int32
+ records: [Record]
+
+Note that when compression is enabled, the compressed record data is serialized directly following the count of the number of records.
+
+The CRC covers the data from the attributes to the end of the batch (i.e. all the bytes that follow the CRC). It is located after the magic byte, which means that clients must parse the magic byte before deciding how to interpret the bytes between the batch length and the magic byte. The partition leader epoch field is not included in the CRC computation to avoid the need to recompute the CRC when this field is assigned for every batch that is received by the broker. The CRC-32C (Castagnoli) polynomial is used for the computation.
+
+On compaction, we preserve the first and last offset/sequence numbers from the original batch when the log is cleaned. This is required in order to be able to restore the producer's state when the log is reloaded. If we did not retain the last sequence number, for example, then after a partition leader failure, the producer might see an OutOfSequence error. The base sequence number must be preserved for duplicate checking (the broker checks incoming Produce requests for duplicates by verifying that the first and last sequence numbers of the incoming batch match the last from that producer). As a result, it is possible to have empty batches in the log when all the records in the batch are cleaned but batch is still retained in order to preserve a producer's last sequence number. One oddity here is that the baseTimestamp field is not preserved during compaction, so it will change if the first record in the batch is compacted away.
+
+Compaction may also modify the baseTimestamp if the record batch contains records with a null payload or aborted transaction markers. The baseTimestamp will be set to the timestamp of when those records should be deleted with the delete horizon attribute bit also set.
+
+### Control Batches
+
+A control batch contains a single record called the control record. Control records should not be passed on to applications. Instead, they are used by consumers to filter out aborted transactional messages.
+
+The key of a control record conforms to the following schema:
+
+
+ version: int16 (current version is 0)
+ type: int16 (0 indicates an abort marker, 1 indicates a commit)
+
+The schema for the value of a control record is dependent on the type. The value is opaque to clients.
+
+## Record
+
+The on-disk format of each record is delineated below.
+
+
+ length: varint
+ attributes: int8
+ bit 0~7: unused
+ timestampDelta: varlong
+ offsetDelta: varint
+ keyLength: varint
+ key: byte[]
+ valueLength: varint
+ value: byte[]
+ headersCount: varint
+ Headers => [Header]
+
+### Record Header
+
+
+ headerKeyLength: varint
+ headerKey: String
+ headerValueLength: varint
+ Value: byte[]
+
+We use the same varint encoding as Protobuf. More information on the latter can be found [here](https://developers.google.com/protocol-buffers/docs/encoding#varints). The count of headers in a record is also encoded as a varint.
+
+## Old Message Format
+
+Prior to Kafka 0.11, messages were transferred and stored in _message sets_. See [Old Message Format](https://kafka.apache.org/39/#messageset) for more details.
diff --git a/content/en/41/implementation/messages.md b/content/en/41/implementation/messages.md
new file mode 100644
index 000000000..30775ac4c
--- /dev/null
+++ b/content/en/41/implementation/messages.md
@@ -0,0 +1,13 @@
+---
+title: Messages
+description: Messages
+weight: 2
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Messages
+
+Messages consist of a variable-length header, a variable-length opaque key byte array and a variable-length opaque value byte array. The format of the header is described in the following section. Leaving the key and value opaque is the right decision: there is a great deal of progress being made on serialization libraries right now, and any particular choice is unlikely to be right for all uses. Needless to say a particular application using Kafka would likely mandate a particular serialization type as part of its usage. The `RecordBatch` interface is simply an iterator over messages with specialized methods for bulk reading and writing to an NIO `Channel`.
diff --git a/content/en/41/implementation/network-layer.md b/content/en/41/implementation/network-layer.md
new file mode 100644
index 000000000..6faa03778
--- /dev/null
+++ b/content/en/41/implementation/network-layer.md
@@ -0,0 +1,13 @@
+---
+title: Network Layer
+description: Network Layer
+weight: 1
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Network Layer
+
+The network layer is a fairly straight-forward NIO server, and will not be described in great detail. The sendfile implementation is done by giving the `TransferableRecords` interface a `writeTo` method. This allows the file-backed message set to use the more efficient `transferTo` implementation instead of an in-process buffered write. The threading model is a single acceptor thread and _N_ processor threads which handle a fixed number of connections each. This design has been pretty thoroughly tested [elsewhere](https://web.archive.org/web/20120619234320/https://sna-projects.com/blog/2009/08/introducing-the-nio-socketserver-implementation/) and found to be simple to implement and fast. The protocol is kept quite simple to allow for future implementation of clients in other languages.
diff --git a/content/en/41/kafka-connect/_index.md b/content/en/41/kafka-connect/_index.md
new file mode 100644
index 000000000..9f00c35b3
--- /dev/null
+++ b/content/en/41/kafka-connect/_index.md
@@ -0,0 +1,10 @@
+---
+title: Kafka Connect
+description:
+weight: 8
+tags: ['kafka', 'docs', 'security']
+aliases:
+keywords:
+type: docs
+---
+
diff --git a/content/en/41/kafka-connect/administration.md b/content/en/41/kafka-connect/administration.md
new file mode 100644
index 000000000..178cb6e36
--- /dev/null
+++ b/content/en/41/kafka-connect/administration.md
@@ -0,0 +1,60 @@
+---
+title: Administration
+description: Administration
+weight: 4
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Administration
+
+Kafka Connect's REST layer provides a set of APIs to enable administration of the cluster. This includes APIs to view the configuration of connectors and the status of their tasks, as well as to alter their current behavior (e.g. changing configuration and restarting tasks).
+
+When a connector is first submitted to the cluster, a rebalance is triggered between the Connect workers in order to distribute the load that consists of the tasks of the new connector. This same rebalancing procedure is also used when connectors increase or decrease the number of tasks they require, when a connector's configuration is changed, or when a worker is added or removed from the group as part of an intentional upgrade of the Connect cluster or due to a failure.
+
+In versions prior to 2.3.0, the Connect workers would rebalance the full set of connectors and their tasks in the cluster as a simple way to make sure that each worker has approximately the same amount of work. This behavior can be still enabled by setting `connect.protocol=eager`.
+
+Starting with 2.3.0, Kafka Connect is using by default a protocol that performs [incremental cooperative rebalancing](https://cwiki.apache.org/confluence/x/Y4MCBg) that incrementally balances the connectors and tasks across the Connect workers, affecting only tasks that are new, to be removed, or need to move from one worker to another. Other tasks are not stopped and restarted during the rebalance, as they would have been with the old protocol.
+
+If a Connect worker leaves the group, intentionally or due to a failure, Connect waits for `scheduled.rebalance.max.delay.ms` before triggering a rebalance. This delay defaults to five minutes (`300000ms`) to tolerate failures or upgrades of workers without immediately redistributing the load of a departing worker. If this worker returns within the configured delay, it gets its previously assigned tasks in full. However, this means that the tasks will remain unassigned until the time specified by `scheduled.rebalance.max.delay.ms` elapses. If a worker does not return within that time limit, Connect will reassign those tasks among the remaining workers in the Connect cluster.
+
+The new Connect protocol is enabled when all the workers that form the Connect cluster are configured with `connect.protocol=compatible`, which is also the default value when this property is missing. Therefore, upgrading to the new Connect protocol happens automatically when all the workers upgrade to 2.3.0. A rolling upgrade of the Connect cluster will activate incremental cooperative rebalancing when the last worker joins on version 2.3.0.
+
+You can use the REST API to view the current status of a connector and its tasks, including the ID of the worker to which each was assigned. For example, the `GET /connectors/file-source/status` request shows the status of a connector named `file-source`:
+
+
+ {
+ "name": "file-source",
+ "connector": {
+ "state": "RUNNING",
+ "worker_id": "192.168.1.208:8083"
+ },
+ "tasks": [
+ {
+ "id": 0,
+ "state": "RUNNING",
+ "worker_id": "192.168.1.209:8083"
+ }
+ ]
+ }
+
+Connectors and their tasks publish status updates to a shared topic (configured with `status.storage.topic`) which all workers in the cluster monitor. Because the workers consume this topic asynchronously, there is typically a (short) delay before a state change is visible through the status API. The following states are possible for a connector or one of its tasks:
+
+ * **UNASSIGNED:** The connector/task has not yet been assigned to a worker.
+ * **RUNNING:** The connector/task is running.
+ * **PAUSED:** The connector/task has been administratively paused.
+ * **STOPPED:** The connector has been stopped. Note that this state is not applicable to tasks because the tasks for a stopped connector are shut down and won't be visible in the status API.
+ * **FAILED:** The connector/task has failed (usually by raising an exception, which is reported in the status output).
+ * **RESTARTING:** The connector/task is either actively restarting or is expected to restart soon
+
+
+
+In most cases, connector and task states will match, though they may be different for short periods of time when changes are occurring or if tasks have failed. For example, when a connector is first started, there may be a noticeable delay before the connector and its tasks have all transitioned to the RUNNING state. States will also diverge when tasks fail since Connect does not automatically restart failed tasks. To restart a connector/task manually, you can use the restart APIs listed above. Note that if you try to restart a task while a rebalance is taking place, Connect will return a 409 (Conflict) status code. You can retry after the rebalance completes, but it might not be necessary since rebalances effectively restart all the connectors and tasks in the cluster.
+
+Starting with 2.5.0, Kafka Connect uses the `status.storage.topic` to also store information related to the topics that each connector is using. Connect Workers use these per-connector topic status updates to respond to requests to the REST endpoint `GET /connectors/{name}/topics` by returning the set of topic names that a connector is using. A request to the REST endpoint `PUT /connectors/{name}/topics/reset` resets the set of active topics for a connector and allows a new set to be populated, based on the connector's latest pattern of topic usage. Upon connector deletion, the set of the connector's active topics is also deleted. Topic tracking is enabled by default but can be disabled by setting `topic.tracking.enable=false`. If you want to disallow requests to reset the active topics of connectors during runtime, set the Worker property `topic.tracking.allow.reset=false`.
+
+It's sometimes useful to temporarily stop the message processing of a connector. For example, if the remote system is undergoing maintenance, it would be preferable for source connectors to stop polling it for new data instead of filling logs with exception spam. For this use case, Connect offers a pause/resume API. While a source connector is paused, Connect will stop polling it for additional records. While a sink connector is paused, Connect will stop pushing new messages to it. The pause state is persistent, so even if you restart the cluster, the connector will not begin message processing again until the task has been resumed. Note that there may be a delay before all of a connector's tasks have transitioned to the PAUSED state since it may take time for them to finish whatever processing they were in the middle of when being paused. Additionally, failed tasks will not transition to the PAUSED state until they have been restarted.
+
+In 3.5.0, Connect introduced a stop API that completely shuts down the tasks for a connector and deallocates any resources claimed by them. This is different from pausing a connector where tasks are left idling and any resources claimed by them are left allocated (which allows the connector to begin processing data quickly once it is resumed). Stopping a connector is more efficient from a resource usage standpoint than pausing it, but can cause it to take longer to begin processing data once resumed. Note that the offsets for a connector can be only modified via the offsets management endpoints if it is in the stopped state.
diff --git a/content/en/41/kafka-connect/connector-development-guide.md b/content/en/41/kafka-connect/connector-development-guide.md
new file mode 100644
index 000000000..55fd15664
--- /dev/null
+++ b/content/en/41/kafka-connect/connector-development-guide.md
@@ -0,0 +1,396 @@
+---
+title: Connector Development Guide
+description: Connector Development Guide
+weight: 3
+tags: ['kafka', 'docs']
+aliases:
+keywords:
+type: docs
+---
+
+# Connector Development Guide
+
+This guide describes how developers can write new connectors for Kafka Connect to move data between Kafka and other systems. It briefly reviews a few key concepts and then describes how to create a simple connector.
+
+## Core Concepts and APIs
+
+### Connectors and Tasks
+
+To copy data between Kafka and another system, users create a `Connector` for the system they want to pull data from or push data to. Connectors come in two flavors: `SourceConnectors` import data from another system (e.g. `JDBCSourceConnector` would import a relational database into Kafka) and `SinkConnectors` export data (e.g. `HDFSSinkConnector` would export the contents of a Kafka topic to an HDFS file).
+
+`Connectors` do not perform any data copying themselves: their configuration describes the data to be copied, and the `Connector` is responsible for breaking that job into a set of `Tasks` that can be distributed to workers. These `Tasks` also come in two corresponding flavors: `SourceTask` and `SinkTask`.
+
+With an assignment in hand, each `Task` must copy its subset of the data to or from Kafka. In Kafka Connect, it should always be possible to frame these assignments as a set of input and output streams consisting of records with consistent schemas. Sometimes this mapping is obvious: each file in a set of log files can be considered a stream with each parsed line forming a record using the same schema and offsets stored as byte offsets in the file. In other cases it may require more effort to map to this model: a JDBC connector can map each table to a stream, but the offset is less clear. One possible mapping uses a timestamp column to generate queries incrementally returning new data, and the last queried timestamp can be used as the offset.
+
+### Streams and Records
+
+Each stream should be a sequence of key-value records. Both the keys and values can have complex structure -- many primitive types are provided, but arrays, objects, and nested data structures can be represented as well. The runtime data format does not assume any particular serialization format; this conversion is handled internally by the framework.
+
+In addition to the key and value, records (both those generated by sources and those delivered to sinks) have associated stream IDs and offsets. These are used by the framework to periodically commit the offsets of data that have been processed so that in the event of failures, processing can resume from the last committed offsets, avoiding unnecessary reprocessing and duplication of events.
+
+### Dynamic Connectors
+
+Not all jobs are static, so `Connector` implementations are also responsible for monitoring the external system for any changes that might require reconfiguration. For example, in the `JDBCSourceConnector` example, the `Connector` might assign a set of tables to each `Task`. When a new table is created, it must discover this so it can assign the new table to one of the `Tasks` by updating its configuration. When it notices a change that requires reconfiguration (or a change in the number of `Tasks`), it notifies the framework and the framework updates any corresponding `Tasks`.
+
+## Developing a Simple Connector
+
+Developing a connector only requires implementing two interfaces, the `Connector` and `Task`. A simple example is included with the source code for Kafka in the `file` package. This connector is meant for use in standalone mode and has implementations of a `SourceConnector`/`SourceTask` to read each line of a file and emit it as a record and a `SinkConnector`/`SinkTask` that writes each record to a file.
+
+The rest of this section will walk through some code to demonstrate the key steps in creating a connector, but developers should also refer to the full example source code as many details are omitted for brevity.
+
+### Connector Example
+
+We'll cover the `SourceConnector` as a simple example. `SinkConnector` implementations are very similar. Pick a package and class name, these examples will use the `FileStreamSourceConnector` but substitute your own class name where appropriate. In order to make the plugin discoverable at runtime, add a ServiceLoader manifest to your resources in `META-INF/services/org.apache.kafka.connect.source.SourceConnector` with your fully-qualified class name on a single line:
+
+
+ com.example.FileStreamSourceConnector
+
+Create a class that inherits from `SourceConnector` and add a field that will store the configuration information to be propagated to the task(s) (the topic to send data to, and optionally - the filename to read from and the maximum batch size):
+
+
+ package com.example;
+
+ public class FileStreamSourceConnector extends SourceConnector {
+ private Map props;
+
+The easiest method to fill in is `taskClass()`, which defines the class that should be instantiated in worker processes to actually read the data:
+
+
+ @Override
+ public Class extends Task> taskClass() {
+ return FileStreamSourceTask.class;
+ }
+
+We will define the `FileStreamSourceTask` class below. Next, we add some standard lifecycle methods, `start()` and `stop()`:
+
+
+ @Override
+ public void start(Map props) {
+ // Initialization logic and setting up of resources can take place in this method.
+ // This connector doesn't need to do any of that, but we do log a helpful message to the user.
+
+ this.props = props;
+ AbstractConfig config = new AbstractConfig(CONFIG_DEF, props);
+ String filename = config.getString(FILE_CONFIG);
+ filename = (filename == null || filename.isEmpty()) ? "standard input" : config.getString(FILE_CONFIG);
+ log.info("Starting file source connector reading from {}", filename);
+ }
+
+ @Override
+ public void stop() {
+ // Nothing to do since no background monitoring is required.
+ }
+
+Finally, the real core of the implementation is in `taskConfigs()`. In this case we are only handling a single file, so even though we may be permitted to generate more tasks as per the `maxTasks` argument, we return a list with only one entry:
+
+
+ @Override
+ public List