@@ -60,7 +60,11 @@ private[kafka010] class KafkaMicroBatchStream(
6060 metadataPath : String ,
6161 startingOffsets : KafkaOffsetRangeLimit ,
6262 failOnDataLoss : Boolean )
63- extends SupportsTriggerAvailableNow with ReportsSourceMetrics with MicroBatchStream with Logging {
63+ extends SupportsTriggerAvailableNow
64+ with SupportsRealTimeMode
65+ with ReportsSourceMetrics
66+ with MicroBatchStream
67+ with Logging {
6468
6569 private [kafka010] val pollTimeoutMs = options.getLong(
6670 KafkaSourceProvider .CONSUMER_POLL_TIMEOUT ,
@@ -93,6 +97,11 @@ private[kafka010] class KafkaMicroBatchStream(
9397
9498 private var isTriggerAvailableNow : Boolean = false
9599
100+ private var inRealTimeMode = false
101+ override def prepareForRealTimeMode (): Unit = {
102+ inRealTimeMode = true
103+ }
104+
96105 /**
97106 * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
98107 * called in StreamExecutionThread. Otherwise, interrupting a thread while running
@@ -218,6 +227,93 @@ private[kafka010] class KafkaMicroBatchStream(
218227 }.toArray
219228 }
220229
230+ override def planInputPartitions (start : Offset ): Array [InputPartition ] = {
231+ // This function is used for real time mode. Trigger restrictions won't be supported.
232+ if (maxOffsetsPerTrigger.isDefined) {
233+ throw new UnsupportedOperationException (
234+ " maxOffsetsPerTrigger is not compatible with real time mode" )
235+ }
236+ if (minOffsetPerTrigger.isDefined) {
237+ throw new UnsupportedOperationException (
238+ " minOffsetsPerTrigger is not compatible with real time mode"
239+ )
240+ }
241+ if (options.containsKey(KafkaSourceProvider .MIN_PARTITIONS_OPTION_KEY )) {
242+ throw new UnsupportedOperationException (
243+ " minpartitions is not compatible with real time mode"
244+ )
245+ }
246+ if (options.containsKey(KafkaSourceProvider .ENDING_TIMESTAMP_OPTION_KEY )) {
247+ throw new UnsupportedOperationException (
248+ " endingtimestamp is not compatible with real time mode"
249+ )
250+ }
251+ if (options.containsKey(KafkaSourceProvider .MAX_TRIGGER_DELAY )) {
252+ throw new UnsupportedOperationException (
253+ " maxtriggerdelay is not compatible with real time mode"
254+ )
255+ }
256+
257+ // This function is used by Low Latency Mode, where we expect 1:1 mapping between a
258+ // topic partition and an input partition.
259+ // We are skipping partition range check for performance reason. We can always try to do
260+ // it in tasks if needed.
261+ val startPartitionOffsets = start.asInstanceOf [KafkaSourceOffset ].partitionToOffsets
262+
263+ // Here we check previous topic partitions with latest partition offsets to see if we need to
264+ // update the partition list. Here we don't need the updated partition topic to be absolutely
265+ // up to date, because there might already be minutes' delay since new partition is created.
266+ // latestPartitionOffsets should be fetched not long ago anyway.
267+ // If the topic partitions change, we fetch the earliest offsets for all new partitions
268+ // and add them to the list.
269+ assert(latestPartitionOffsets != null , " latestPartitionOffsets should be set in latestOffset" )
270+ val latestTopicPartitions = latestPartitionOffsets.keySet
271+ val newStartPartitionOffsets = if (startPartitionOffsets.keySet == latestTopicPartitions) {
272+ startPartitionOffsets
273+ } else {
274+ val newPartitions = latestTopicPartitions.diff(startPartitionOffsets.keySet)
275+ // Instead of fetching earliest offsets, we could fill offset 0 here and avoid this extra
276+ // admin function call. But we consider new partition is rare and getting earliest offset
277+ // aligns with what we do in micro-batch mode and can potentially enable more sanity checks
278+ // in executor side.
279+ val newPartitionOffsets = kafkaOffsetReader.fetchEarliestOffsets(newPartitions.toSeq)
280+
281+ assert(
282+ newPartitionOffsets.keys.forall(! startPartitionOffsets.contains(_)),
283+ " startPartitionOffsets should not contain any key in newPartitionOffsets" )
284+
285+ // Filter out new partition offsets that are not 0 and log a warning
286+ val nonZeroNewPartitionOffsets = newPartitionOffsets.filter {
287+ case (_, offset) => offset != 0
288+ }
289+ // Log the non-zero new partition offsets
290+ if (nonZeroNewPartitionOffsets.nonEmpty) {
291+ logWarning(log " new partitions should start from offset 0: " +
292+ log " ${MDC (OFFSETS , nonZeroNewPartitionOffsets)}" )
293+ }
294+
295+ logInfo(log " Added new partition offsets: ${MDC (OFFSETS , newPartitionOffsets)}" )
296+ startPartitionOffsets ++ newPartitionOffsets
297+ }
298+
299+ newStartPartitionOffsets.keySet.toSeq.map { tp =>
300+ val fromOffset = newStartPartitionOffsets(tp)
301+ KafkaBatchInputPartition (
302+ KafkaOffsetRange (tp, fromOffset, Long .MaxValue , preferredLoc = None ),
303+ executorKafkaParams,
304+ pollTimeoutMs,
305+ failOnDataLoss,
306+ includeHeaders)
307+ }.toArray
308+ }
309+
310+ override def mergeOffsets (offsets : Array [PartitionOffset ]): Offset = {
311+ val mergedMap = offsets.map {
312+ case KafkaSourcePartitionOffset (p, o) => (p, o)
313+ }.toMap
314+ KafkaSourceOffset (mergedMap)
315+ }
316+
221317 override def createReaderFactory (): PartitionReaderFactory = {
222318 KafkaBatchReaderFactory
223319 }
@@ -235,7 +331,30 @@ private[kafka010] class KafkaMicroBatchStream(
235331 override def toString (): String = s " KafkaV2[ $kafkaOffsetReader] "
236332
237333 override def metrics (latestConsumedOffset : Optional [Offset ]): ju.Map [String , String ] = {
238- KafkaMicroBatchStream .metrics(latestConsumedOffset, latestPartitionOffsets)
334+ var rtmFetchLatestOffsetsTimeMs = Option .empty[Long ]
335+ val reCalculatedLatestPartitionOffsets =
336+ if (inRealTimeMode) {
337+ if (! latestConsumedOffset.isPresent) {
338+ // this means a batch has no end offsets, which should not happen
339+ None
340+ } else {
341+ Some {
342+ val startTime = System .currentTimeMillis()
343+ val latestOffsets = kafkaOffsetReader.fetchLatestOffsets(
344+ Some (latestConsumedOffset.get.asInstanceOf [KafkaSourceOffset ].partitionToOffsets))
345+ val endTime = System .currentTimeMillis()
346+ rtmFetchLatestOffsetsTimeMs = Some (endTime - startTime)
347+ latestOffsets
348+ }
349+ }
350+ } else {
351+ // If we are in micro-batch mode, we need to get the latest partition offsets at the
352+ // start of the batch and recalculate the latest offsets at the end for backlog
353+ // estimation.
354+ Some (kafkaOffsetReader.fetchLatestOffsets(Some (latestPartitionOffsets)))
355+ }
356+
357+ KafkaMicroBatchStream .metrics(latestConsumedOffset, reCalculatedLatestPartitionOffsets)
239358 }
240359
241360 /**
@@ -386,13 +505,14 @@ object KafkaMicroBatchStream extends Logging {
386505 */
387506 def metrics (
388507 latestConsumedOffset : Optional [Offset ],
389- latestAvailablePartitionOffsets : PartitionOffsetMap ): ju.Map [String , String ] = {
508+ latestAvailablePartitionOffsets : Option [ PartitionOffsetMap ] ): ju.Map [String , String ] = {
390509 val offset = Option (latestConsumedOffset.orElse(null ))
391510
392- if (offset.nonEmpty && latestAvailablePartitionOffsets != null ) {
511+ if (offset.nonEmpty && latestAvailablePartitionOffsets.isDefined ) {
393512 val consumedPartitionOffsets = offset.map(KafkaSourceOffset (_)).get.partitionToOffsets
394- val offsetsBehindLatest = latestAvailablePartitionOffsets
395- .map(partitionOffset => partitionOffset._2 - consumedPartitionOffsets(partitionOffset._1))
513+ val offsetsBehindLatest = latestAvailablePartitionOffsets.get
514+ .map(partitionOffset => partitionOffset._2 -
515+ consumedPartitionOffsets.getOrElse(partitionOffset._1, 0L ))
396516 if (offsetsBehindLatest.nonEmpty) {
397517 val avgOffsetBehindLatest = offsetsBehindLatest.sum.toDouble / offsetsBehindLatest.size
398518 return Map [String , String ](
0 commit comments