@@ -87,9 +87,53 @@ default void onDataWriterCommit(WriterCommitMessage message) {}
87
87
* disable this behavior by overriding {@link #useCommitCoordinator()}. If disabled, multiple
88
88
* tasks may have committed successfully and one successful commit message per task will be
89
89
* passed to this commit method. The remaining commit messages are ignored by Spark.
90
+ *
90
91
*/
91
92
void commit (WriterCommitMessage [] messages );
92
93
94
+ /**
95
+ * Commits this writing job with a list of commit messages and operation metrics.
96
+ * <p>
97
+ * If this method fails (by throwing an exception), this writing job is considered to to have been
98
+ * failed, and {@link #abort(WriterCommitMessage[])} would be called. The state of the destination
99
+ * is undefined and @{@link #abort(WriterCommitMessage[])} may not be able to deal with it.
100
+ * <p>
101
+ * Note that speculative execution may cause multiple tasks to run for a partition. By default,
102
+ * Spark uses the commit coordinator to allow at most one task to commit. Implementations can
103
+ * disable this behavior by overriding {@link #useCommitCoordinator()}. If disabled, multiple
104
+ * tasks may have committed successfully and one successful commit message per task will be
105
+ * passed to this commit method. The remaining commit messages are ignored by Spark.
106
+ * <p>
107
+ * @param messages a list of commit messages from successful data writers, produced by
108
+ * {@link DataWriter#commit()}.
109
+ * @param metrics a map of operation metrics collected from the query producing write.
110
+ * The keys will be prefixed by operation type, eg `merge`.
111
+ * <p>
112
+ * Currently supported metrics are:
113
+ * <ul>
114
+ * <li>Operation Type = `merge`
115
+ * <ul>
116
+ * <li>`numTargetRowsCopied`: number of target rows copied unmodified because
117
+ * they did not match any action</li>
118
+ * <li>`numTargetRowsDeleted`: number of target rows deleted</li>
119
+ * <li>`numTargetRowsUpdated`: number of target rows updated</li>
120
+ * <li>`numTargetRowsInserted`: number of target rows inserted</li>
121
+ * <li>`numTargetRowsMatchedUpdated`: number of target rows updated by a
122
+ * matched clause</li>
123
+ * <li>`numTargetRowsMatchedDeleted`: number of target rows deleted by a
124
+ * matched clause</li>
125
+ * <li>`numTargetRowsNotMatchedBySourceUpdated`: number of target rows
126
+ * updated by a not matched by source clause</li>
127
+ * <li>`numTargetRowsNotMatchedBySourceDeleted`: number of target rows
128
+ * deleted by a not matched by source clause</li>
129
+ * </ul>
130
+ * </li>
131
+ * </ul>
132
+ */
133
+ default void commit (WriterCommitMessage [] messages , Map <String , Long > metrics ) {
134
+ commit (messages );
135
+ }
136
+
93
137
/**
94
138
* Aborts this writing job because some data writers are failed and keep failing when retry,
95
139
* or the Spark job fails with some unknown reasons,
@@ -106,14 +150,4 @@ default void onDataWriterCommit(WriterCommitMessage message) {}
106
150
* clean up the data left by data writers.
107
151
*/
108
152
void abort (WriterCommitMessage [] messages );
109
-
110
- /**
111
- * Similar to {@link #commit(WriterCommitMessage[])}, but providing operation metrics to
112
- * this batch write.
113
- * @param metrics operation metrics. The keys will be prefixed by operation type, eg `merge`
114
- */
115
- default void commitWithOperationMetrics (
116
- WriterCommitMessage [] messages , Map <String , Long > metrics ) {
117
- commit (messages );
118
- }
119
153
}
0 commit comments