Skip to content

Commit 9ec26ee

Browse files
committed
feat(clients): add new interface ConsumedErrorHandler (#1)
This commit adds the new interface ConsumedErrorHandler to handle error thrown while processing records. In addition, it adds two built-in implementations: - CloseTaskOnConsumedError - LogAndCommitOnConsumedError Additional changes: - ConsumerTask interface now exposes the two methods: commitAsync, commitSync - ConsumerWorker.Builder has a new onConsumedError method - ConsumerBatchRecordsListener is only invoked on non-empty list of records - add some unit-tests for KafkaConsumerTask Resolves: #1
1 parent a828239 commit 9ec26ee

File tree

7 files changed

+415
-29
lines changed

7 files changed

+415
-29
lines changed

clients/src/main/kotlin/io/streamthoughts/kafka/clients/consumer/ConsumerTask.kt

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
package io.streamthoughts.kafka.clients.consumer
2020

2121
import org.apache.kafka.clients.consumer.Consumer
22+
import org.apache.kafka.clients.consumer.OffsetAndMetadata
23+
import org.apache.kafka.common.TopicPartition
24+
import java.time.Duration
2225

2326
interface ConsumerTask{
2427

@@ -72,11 +75,17 @@ interface ConsumerTask{
7275
fun resume()
7376

7477
/**
75-
* Shutdowns the [ConsumerTask]
78+
* Shutdowns the [ConsumerTask] and wait for completion.
7679
* @see org.apache.kafka.clients.consumer.Consumer.close
7780
*/
7881
fun shutdown()
7982

83+
/**
84+
* Shutdowns the [ConsumerTask] and wait for completion until the given [timeout].
85+
* @see org.apache.kafka.clients.consumer.Consumer.close
86+
*/
87+
fun shutdown(timeout: Duration)
88+
8089
/**
8190
* @return the [State] of this [ConsumerTask].
8291
*/
@@ -86,4 +95,20 @@ interface ConsumerTask{
8695
* Executes the given [action] with the underlying [Consumer].
8796
*/
8897
fun <T> execute(action: (consumer: Consumer<ByteArray, ByteArray>) -> T): T
98+
99+
/**
100+
* Commits asynchronously the positions of the internal [Consumer] for the given [offsets].
101+
* If passed [offsets] is {@code null} then commit the [Consumer] positions for its current partition assignments.
102+
*
103+
* @see [Consumer.commitAsync]
104+
*/
105+
fun commitAsync(offsets: Map<TopicPartition, OffsetAndMetadata>? = null)
106+
107+
/**
108+
* Commits synchronously the positions of the internal [Consumer] for the given offsets.
109+
* If passed [offsets] is {@code null} then commit the [Consumer] positions for its current partition assignments.
110+
*
111+
* @see [Consumer.commitAsync]
112+
*/
113+
fun commitSync(offsets: Map<TopicPartition, OffsetAndMetadata>? = null)
89114
}

clients/src/main/kotlin/io/streamthoughts/kafka/clients/consumer/ConsumerWorker.kt

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package io.streamthoughts.kafka.clients.consumer
2020

21+
import io.streamthoughts.kafka.clients.consumer.error.ConsumedErrorHandler
2122
import io.streamthoughts.kafka.clients.consumer.error.serialization.DeserializationErrorHandler
2223
import io.streamthoughts.kafka.clients.consumer.listener.ConsumerBatchRecordsListener
2324
import java.util.regex.Pattern
@@ -41,30 +42,39 @@ interface ConsumerWorker<K, V> {
4142
fun factory(consumerFactory: ConsumerFactory): Builder<K, V>
4243

4344
/**
44-
* Sets the [RebalanceListener] to invoke when a rebalance is in progress and partitions are assigned.
45+
* Sets the [listener] to invoke when a rebalance is in progress and partitions are assigned.
4546
*/
4647
fun onPartitionsAssigned(listener: RebalanceListener): Builder<K, V>
4748

4849
/**
49-
* Sets the [RebalanceListener] to invoke when a rebalance is in progress and partitions are revoked.
50+
* Sets the [listener] to invoke when a rebalance is in progress and partitions are revoked.
5051
*/
5152
fun onPartitionsRevokedBeforeCommit(listener: RebalanceListener): Builder<K, V>
5253

5354
/**
54-
* Sets the [RebalanceListener] to invoke when a rebalance is in progress and partitions are revoked.
55+
* Sets the [listener] to invoke when a rebalance is in progress and partitions are revoked.
5556
*/
5657
fun onPartitionsRevokedAfterCommit(listener: RebalanceListener): Builder<K, V>
5758

5859
/**
59-
* Sets the [RebalanceListener] to invoke when a rebalance is in progress and partitions are lost.
60+
* Sets the [listener] to invoke when a rebalance is in progress and partitions are lost.
6061
*/
6162
fun onPartitionsLost(listener: RebalanceListener): Builder<K, V>
6263

6364
/**
64-
* Sets the [DeserializationErrorHandler] to invoke when a exception happen while de-serializing a record.
65+
* Sets the [handler] to invoke when a exception happen while de-serializing a record.
6566
*/
6667
fun onDeserializationError(handler: DeserializationErrorHandler<K, V>): Builder<K, V>
6768

69+
/**
70+
* Sets the [handler] to invoked when a error is thrown while processing last records returned from the
71+
* the [org.apache.kafka.clients.consumer.Consumer.poll] method, i.e. an exception thrown by the provided
72+
* [ConsumerBatchRecordsListener].
73+
*
74+
* @see [onConsumed]
75+
*/
76+
fun onConsumedError(handler: ConsumedErrorHandler): Builder<K, V>
77+
6878
/**
6979
* Sets the [ConsumerBatchRecordsListener] to invoke when a non-empty batch of records is returned from
7080
* the [org.apache.kafka.clients.consumer.Consumer.poll] method.

clients/src/main/kotlin/io/streamthoughts/kafka/clients/consumer/KafkaConsumerTask.kt

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package io.streamthoughts.kafka.clients.consumer
2020

2121
import ch.qos.logback.classic.Level
2222
import io.streamthoughts.kafka.clients.consumer.ConsumerTask.State
23+
import io.streamthoughts.kafka.clients.consumer.error.ConsumedErrorHandler
2324
import io.streamthoughts.kafka.clients.consumer.error.serialization.DeserializationErrorHandler
2425
import io.streamthoughts.kafka.clients.consumer.listener.ConsumerBatchRecordsListener
2526
import io.streamthoughts.kafka.clients.loggerFor
@@ -38,8 +39,9 @@ import org.apache.kafka.common.errors.WakeupException
3839
import org.apache.kafka.common.serialization.ByteArrayDeserializer
3940
import org.apache.kafka.common.serialization.Deserializer
4041
import java.time.Duration
41-
import java.util.*
42+
import java.util.LinkedList
4243
import java.util.concurrent.CountDownLatch
44+
import java.util.concurrent.TimeUnit
4345
import java.util.concurrent.atomic.AtomicBoolean
4446
import kotlin.collections.HashMap
4547
import kotlin.math.max
@@ -53,11 +55,16 @@ class KafkaConsumerTask<K, V>(
5355
private val listener: ConsumerBatchRecordsListener<K, V>,
5456
private var clientId: String = "",
5557
private val deserializationErrorHandler: DeserializationErrorHandler<K, V>,
58+
private val consumedErrorHandler: ConsumedErrorHandler? = null,
5659
private val consumerAwareRebalanceListener : ConsumerAwareRebalanceListener? = null
5760
) : ConsumerTask {
5861

5962
companion object {
6063
private val Log = loggerFor(KafkaConsumerTask::class.java)
64+
65+
private fun <K, V> flatten(records: Map<TopicPartition, List<ConsumerRecord<K?, V?>>>): List<ConsumerRecord<K?, V?>> {
66+
return records.flatMap { (_, v) -> v }.toList()
67+
}
6168
}
6269

6370
@Volatile
@@ -134,10 +141,10 @@ class KafkaConsumerTask<K, V>(
134141
} catch (e: WakeupException) {
135142
if (!isShutdown.get()) throw e
136143
else {
137-
logWithConsumerInfo(Level.INFO, "Stop polling due to the io.streamthoughts.kafka.clients.consumer-task is being closed")
144+
logWithConsumerInfo(Level.INFO, "Stop polling due to the consumer-task is being closed")
138145
}
139146
} catch (e: CancellationException) {
140-
logWithConsumerInfo(Level.INFO, "Stop polling due to the io.streamthoughts.kafka.clients.consumer-task has been canceled")
147+
logWithConsumerInfo(Level.INFO, "Stop polling due to the consumer-task has been canceled")
141148
throw e
142149
} finally {
143150
state = State.PENDING_SHUTDOWN
@@ -160,21 +167,36 @@ class KafkaConsumerTask<K, V>(
160167
}
161168

162169
private fun pollOnce() {
163-
val records: ConsumerRecords<ByteArray, ByteArray> = consumer.poll(pollTime)
170+
val rawRecords: ConsumerRecords<ByteArray, ByteArray> = consumer.poll(pollTime)
164171

165172
if (state == State.PARTITIONS_ASSIGNED) {
166173
state = State.RUNNING
167174
}
168175

169-
// deserialize all records using user-provided Deserializer
170-
val deserialized : Map<TopicPartition, List<ConsumerRecord<K?, V?>>> =
171-
records.partitions()
172-
.map { Pair(it, deserialize(records.records(it))) }
173-
.toMap()
176+
if (!rawRecords.isEmpty) {
177+
// deserialize all records using user-provided Deserializer
178+
val recordsPerPartitions: Map<TopicPartition, List<ConsumerRecord<K?, V?>>> =
179+
rawRecords.partitions()
180+
.map { Pair(it, deserialize(rawRecords.records(it))) }
181+
.toMap()
182+
try {
183+
processBatchRecords(ConsumerRecords(recordsPerPartitions))
184+
updateConsumedOffsets(rawRecords) // only update once all records from batch have been processed.
185+
mayCommitAfterBatch()
186+
} catch (e: Exception) {
187+
mayHandleConsumedError(recordsPerPartitions, e)
188+
}
189+
}
190+
}
174191

175-
processBatchRecords(ConsumerRecords(deserialized))
176-
updateConsumedOffsets(records) // only update once all records from batch have been processed.
177-
mayCommitAfterBatch()
192+
private fun mayHandleConsumedError(recordsPerPartitions: Map<TopicPartition, List<ConsumerRecord<K?, V?>>>,
193+
thrownException: Exception
194+
) {
195+
consumedErrorHandler?.handle(
196+
this,
197+
flatten(recordsPerPartitions),
198+
thrownException
199+
)
178200
}
179201

180202
private fun processBatchRecords(records: ConsumerRecords<K?, V?>) {
@@ -200,6 +222,19 @@ class KafkaConsumerTask<K, V>(
200222
shutdownLatch.await()
201223
}
202224

225+
override fun shutdown(timeout: Duration) {
226+
logWithConsumerInfo(Level.INFO, "Closing")
227+
isShutdown.set(true)
228+
consumer.wakeup()
229+
if (timeout != Duration.ZERO) {
230+
try {
231+
shutdownLatch.await(timeout.toMillis(), TimeUnit.MILLISECONDS)
232+
} catch (e: InterruptedException) {
233+
logWithConsumerInfo(Level.WARN, "Failed to close consumer before timeout")
234+
}
235+
}
236+
}
237+
203238
private fun deserialize(records: List<ConsumerRecord<ByteArray, ByteArray>>): List<ConsumerRecord<K?, V?>> {
204239
val deserialized = LinkedList<ConsumerRecord<K?, V?>>()
205240
for (record : ConsumerRecord<ByteArray, ByteArray> in records) {
@@ -253,7 +288,7 @@ class KafkaConsumerTask<K, V>(
253288
state = State.PARTITIONS_REVOKED
254289
consumerAwareRebalanceListener?.onPartitionsRevokedBeforeCommit(consumer, partitions)
255290

256-
doCommitSync(offsetAndMetadataToCommit())
291+
commitSync(offsetAndMetadataToCommit())
257292

258293
consumerAwareRebalanceListener?.onPartitionsRevokedAfterCommit(consumer, partitions)
259294
assignedPartitions.clear()
@@ -267,11 +302,11 @@ class KafkaConsumerTask<K, V>(
267302
}
268303
}
269304

270-
private fun offsetAndMetadataToCommit() = consumedOffsets.map { Pair(it.key, OffsetAndMetadata(it.value)) }.toMap()
305+
private fun offsetAndMetadataToCommit() = consumedOffsets.map { Pair(it.key, OffsetAndMetadata(it.value + 1)) }.toMap()
271306

272307
private fun mayCommitAfterBatch() {
273308
if (!isAutoCommitEnabled && consumedOffsets.isNotEmpty()) {
274-
doCommitAsync(offsetAndMetadataToCommit())
309+
commitAsync(offsetAndMetadataToCommit())
275310
consumedOffsets.clear()
276311
}
277312
}
@@ -282,11 +317,11 @@ class KafkaConsumerTask<K, V>(
282317
val offset = consumer.position(topicPartition)
283318
Pair(topicPartition, OffsetAndMetadata(offset))
284319
}.toMap()
285-
doCommitSync(positionsToCommit)
320+
commitSync(positionsToCommit)
286321
}
287322
}
288323

289-
private fun doCommitAsync(offsets: Map<TopicPartition, OffsetAndMetadata>? = null) {
324+
override fun commitAsync(offsets: Map<TopicPartition, OffsetAndMetadata>?) {
290325
logWithConsumerInfo(Level.INFO, "Committing offsets async-synchronously for positions: $offsets")
291326
consumer.commitAsync(offsets) {
292327
_, exception -> if (exception != null) {
@@ -295,8 +330,8 @@ class KafkaConsumerTask<K, V>(
295330
}
296331
}
297332

298-
private fun doCommitSync(offsets: Map<TopicPartition, OffsetAndMetadata>? = null) {
299-
if (consumer.assignment().isEmpty()) return // no need to commit if no partition is assign to this io.streamthoughts.kafka.clients.consumer
333+
override fun commitSync(offsets: Map<TopicPartition, OffsetAndMetadata>?) {
334+
if (consumer.assignment().isEmpty()) return // no need to commit if no partition is assign to this consumer
300335
try {
301336
if (offsets == null) {
302337
logWithConsumerInfo(Level.WARN, "Committing offsets synchronously for consumed records")
@@ -307,7 +342,7 @@ class KafkaConsumerTask<K, V>(
307342
}
308343
logWithConsumerInfo(Level.WARN, "Offsets committed for partitions: $assignedPartitions")
309344
} catch (e: RetriableCommitFailedException) {
310-
doCommitSync(offsets)
345+
commitSync(offsets)
311346
} catch (e : RebalanceInProgressException) {
312347
logWithConsumerInfo(Level.WARN, "Error while committing offsets due to a rebalance in progress. Ignored")
313348
}

clients/src/main/kotlin/io/streamthoughts/kafka/clients/consumer/KafkaConsumerWorker.kt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
package io.streamthoughts.kafka.clients.consumer
2020

2121
import io.streamthoughts.kafka.clients.consumer.KafkaConsumerWorker.KafkaConsumerWorker
22+
import io.streamthoughts.kafka.clients.consumer.error.ConsumedErrorHandler
23+
import io.streamthoughts.kafka.clients.consumer.error.ConsumedErrorHandlers.closeTaskOnConsumedError
2224
import io.streamthoughts.kafka.clients.consumer.error.serialization.DeserializationErrorHandler
2325
import io.streamthoughts.kafka.clients.consumer.error.serialization.DeserializationErrorHandlers
2426
import io.streamthoughts.kafka.clients.consumer.listener.ConsumerBatchRecordsListener
@@ -32,7 +34,6 @@ import kotlinx.coroutines.joinAll
3234
import kotlinx.coroutines.launch
3335
import org.apache.kafka.clients.consumer.Consumer
3436
import org.apache.kafka.clients.consumer.ConsumerConfig
35-
import org.apache.kafka.clients.consumer.ConsumerRecords
3637
import org.apache.kafka.common.TopicPartition
3738
import org.apache.kafka.common.serialization.Deserializer
3839
import java.util.concurrent.ExecutorService
@@ -50,6 +51,7 @@ class KafkaConsumerWorker<K, V> (
5051
private val valueDeserializer: Deserializer<V>,
5152
private val consumerRebalanceListener: ConsumerAwareRebalanceListener,
5253
private val batchRecordListener: ConsumerBatchRecordsListener<K, V>,
54+
private val onConsumedError: ConsumedErrorHandler,
5355
private val onDeserializationError: DeserializationErrorHandler<K, V>,
5456
private val consumerFactory: ConsumerFactory = ConsumerFactory.DefaultConsumerFactory
5557
): ConsumerWorker<K, V> {
@@ -104,7 +106,8 @@ class KafkaConsumerWorker<K, V> (
104106
batchRecordListener,
105107
clientId = computeClientId(taskId),
106108
consumerAwareRebalanceListener = consumerRebalanceListener,
107-
deserializationErrorHandler = onDeserializationError
109+
deserializationErrorHandler = onDeserializationError,
110+
consumedErrorHandler = onConsumedError
108111
)
109112
}
110113
doStart()
@@ -165,7 +168,8 @@ class KafkaConsumerWorker<K, V> (
165168
var onPartitionsLost: RebalanceListener? = null,
166169
var batchRecordListener: ConsumerBatchRecordsListener<K, V>? = null,
167170
var onDeserializationError: DeserializationErrorHandler<K, V>? = null,
168-
var consumerFactory: ConsumerFactory? = null
171+
var consumerFactory: ConsumerFactory? = null,
172+
var onConsumedError: ConsumedErrorHandler? = null
169173
) : ConsumerWorker.Builder<K, V> {
170174

171175
override fun configure(init: KafkaConsumerConfigs.() -> Unit) {
@@ -190,6 +194,9 @@ class KafkaConsumerWorker<K, V> (
190194
override fun onDeserializationError(handler : DeserializationErrorHandler<K, V>) =
191195
apply { onDeserializationError = handler }
192196

197+
override fun onConsumedError(handler : ConsumedErrorHandler) =
198+
apply { onConsumedError = handler }
199+
193200
override fun onConsumed(listener: ConsumerBatchRecordsListener<K, V>) =
194201
apply { this.batchRecordListener = listener }
195202

@@ -200,6 +207,7 @@ class KafkaConsumerWorker<K, V> (
200207
valueDeserializer,
201208
SimpleConsumerAwareRebalanceListener(),
202209
batchRecordListener ?: noop(),
210+
onConsumedError ?: closeTaskOnConsumedError(),
203211
onDeserializationError ?: DeserializationErrorHandlers.logAndFail(),
204212
consumerFactory ?: ConsumerFactory.DefaultConsumerFactory
205213
)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright 2020 StreamThoughts.
3+
*
4+
* Licensed to the Apache Software Foundation (ASF) under one or more
5+
* contributor license agreements. See the NOTICE file distributed with
6+
* this work for additional information regarding copyright ownership.
7+
* The ASF licenses this file to You under the Apache License, Version 2.0
8+
* (the "License"); you may not use this file except in compliance with
9+
* the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
package io.streamthoughts.kafka.clients.consumer.error
20+
21+
import io.streamthoughts.kafka.clients.consumer.ConsumerTask
22+
import org.apache.kafka.clients.consumer.ConsumerRecord
23+
24+
/**
25+
* Handles errors thrown during the processing of a non-empty batch of [ConsumerRecord]
26+
* using a given [io.streamthoughts.kafka.clients.consumer.listener.ConsumerBatchRecordsListener]
27+
*/
28+
interface ConsumedErrorHandler {
29+
30+
/**
31+
* This method is invoked when an [thrownException] is thrown while a [consumerTask] is processing
32+
* a non-empty batch of [records].
33+
*
34+
* @param consumerTask the [ConsumerTask] polling records.
35+
* @param records the remaining [records] to be processed (including the one that failed).
36+
* @param thrownException the [Exception] that was thrown while processing [records].
37+
*/
38+
fun handle(consumerTask: ConsumerTask, records: List<ConsumerRecord<*, *>>, thrownException: Exception)
39+
}

0 commit comments

Comments
 (0)