35
35
import com .google .cloud .storage .BlobId ;
36
36
import com .google .cloud .storage .Bucket ;
37
37
import com .google .cloud .storage .StorageException ;
38
+ import com .google .common .annotations .VisibleForTesting ;
38
39
import com .google .re2j .Matcher ;
39
40
import com .google .re2j .Pattern ;
40
41
import com .wepay .kafka .connect .bigquery .write .row .GcsToBqWriter ;
@@ -67,18 +68,24 @@ public class GcsToBqLoadRunnable implements Runnable {
67
68
private static String SOURCE_URI_FORMAT = "gs://%s/%s" ;
68
69
private final BigQuery bigQuery ;
69
70
70
- // these numbers are intended to try to make this task not excede Google Cloud Quotas.
71
+ // these numbers are intended to try to make this task not exceed Google Cloud Quotas.
71
72
// see: https://cloud.google.com/bigquery/quotas#load_jobs
72
73
private final Bucket bucket ;
73
74
private final Map <Job , List <BlobId >> activeJobs ;
75
+ /**
76
+ * The set of blob Ids that the system is currently processing or are queued to process.
77
+ */
74
78
private final Set <BlobId > claimedBlobIds ;
79
+ /**
80
+ * The set of blob Ids that the system can delete.
81
+ */
75
82
private final Set <BlobId > deletableBlobIds ;
76
83
77
84
/**
78
85
* Create a {@link GcsToBqLoadRunnable} with the given bigquery, bucket, and ms wait interval.
79
86
*
80
87
* @param bigQuery the {@link BigQuery} instance.
81
- * @param bucket the the GCS bucket to read from.
88
+ * @param bucket the GCS bucket to read from.
82
89
*/
83
90
public GcsToBqLoadRunnable (BigQuery bigQuery , Bucket bucket ) {
84
91
this .bigQuery = bigQuery ;
@@ -88,6 +95,24 @@ public GcsToBqLoadRunnable(BigQuery bigQuery, Bucket bucket) {
88
95
this .deletableBlobIds = new HashSet <>();
89
96
}
90
97
98
+ /**
99
+ * Create a {@link GcsToBqLoadRunnable} with the given bigquery, bucket, and ms wait interval.
100
+ *
101
+ * @param bigQuery the {@link BigQuery} instance.
102
+ * @param bucket the GCS bucket to read from.
103
+ * @param activeJobs the map of job to the list of blobs it contains.
104
+ * @param claimedBlobIds the list of Blob Ids being processed.
105
+ * @param deletableBlobIds the list of Blob Ids that can be deleted.
106
+ */
107
+ @ VisibleForTesting
108
+ GcsToBqLoadRunnable (BigQuery bigQuery , Bucket bucket , Map <Job , List <BlobId >> activeJobs , Set <BlobId > claimedBlobIds , Set <BlobId > deletableBlobIds ) {
109
+ this .bigQuery = bigQuery ;
110
+ this .bucket = bucket ;
111
+ this .activeJobs = activeJobs ;
112
+ this .claimedBlobIds = claimedBlobIds ;
113
+ this .deletableBlobIds = deletableBlobIds ;
114
+ }
115
+
91
116
/**
92
117
* Given a blob, return the {@link TableId} this blob should be inserted into.
93
118
*
@@ -217,7 +242,8 @@ private Job triggerBigQueryLoadJob(TableId table, List<Blob> blobs) {
217
242
* any jobs that failed. We only log a message for failed jobs because those blobs will be
218
243
* retried during the next run.
219
244
*/
220
- private void checkJobs () {
245
+ @ VisibleForTesting
246
+ void checkJobs () {
221
247
if (activeJobs .isEmpty ()) {
222
248
// quick exit if nothing needs to be done.
223
249
logger .debug ("No active jobs to check. Skipping check jobs." );
@@ -237,31 +263,50 @@ private void checkJobs() {
237
263
try {
238
264
if (job .isDone ()) {
239
265
logger .trace ("Job is marked done: id={}, status={}" , job .getJobId (), job .getStatus ());
240
- final List <BlobId > blobIdsToDelete = jobEntry .getValue ();
266
+ if (job .getStatus ().getError () == null ) {
267
+ processSuccessfulJob (job , jobEntry .getValue ());
268
+ successCount ++;
269
+ } else {
270
+ processFailedJob (job , jobEntry .getValue ());
271
+ failureCount ++;
272
+ }
241
273
jobIterator .remove ();
242
274
logger .trace ("Job is removed from iterator: {}" , job .getJobId ());
243
- successCount ++;
244
- claimedBlobIds .removeAll (blobIdsToDelete );
245
- logger .trace ("Completed blobs have been removed from claimed set: {}" , blobIdsToDelete );
246
- deletableBlobIds .addAll (blobIdsToDelete );
247
- logger .trace ("Completed blobs marked as deletable: {}" , blobIdsToDelete );
248
275
}
249
276
} catch (BigQueryException ex ) {
250
277
// log a message.
251
278
logger .warn ("GCS to BQ load job failed" , ex );
252
- // remove job from active jobs (it's not active anymore)
253
- List <BlobId > blobIds = activeJobs .get (job );
254
- jobIterator .remove ();
255
- // unclaim blobs
256
- claimedBlobIds .removeAll (blobIds );
279
+ processFailedJob (job , jobEntry .getValue ());
257
280
failureCount ++;
281
+ jobIterator .remove ();
282
+ logger .trace ("Job is removed from iterator: {}" , job .getJobId ());
258
283
} finally {
259
284
logger .info ("GCS To BQ job tally: {} successful jobs, {} failed jobs." ,
260
285
successCount , failureCount );
261
286
}
262
287
}
263
288
}
264
289
290
+ private void processSuccessfulJob (final Job job , final List <BlobId > blobIdsToDelete ) {
291
+ blobIdsToDelete .forEach (claimedBlobIds ::remove );
292
+ logger .trace ("Completed blobs have been removed from claimed set: {}" , blobIdsToDelete );
293
+ deletableBlobIds .addAll (blobIdsToDelete );
294
+ logger .trace ("Completed blobs marked as deletable: {}" , blobIdsToDelete );
295
+ }
296
+
297
+ private void processFailedJob (final Job job , final List <BlobId > blobsNotCompleted ) {
298
+ logger .warn ("Job {} failed with {}" , job .getJobId (), job .getStatus ().getError ());
299
+ if (job .getStatus ().getExecutionErrors ().isEmpty ()) {
300
+ logger .warn ("No additional errors associated with job {}" , job .getJobId ());
301
+ } else {
302
+ logger .warn ("Additional errors associated with job {}: {}" , job .getJobId (), job .getStatus ().getExecutionErrors ());
303
+ }
304
+ logger .warn ("Blobs in job {}: {}" , job .getJobId (), blobsNotCompleted );
305
+ // unclaim blobs
306
+ blobsNotCompleted .forEach (claimedBlobIds ::remove );
307
+ logger .trace ("Failed blobs reset as processable" );
308
+ }
309
+
265
310
/**
266
311
* Delete deletable blobs.
267
312
*/
@@ -298,7 +343,7 @@ private void deleteBlobs() {
298
343
// Calculate number of successful deletes, remove the successful deletes from
299
344
// the deletableBlobIds.
300
345
successfulDeletes = numberOfBlobs - failedDeletes ;
301
- deletableBlobIds . removeAll ( blobIdsToDelete );
346
+ blobIdsToDelete . forEach ( deletableBlobIds :: remove );
302
347
303
348
logger .info ("Successfully deleted {} blobs; failed to delete {} blobs" ,
304
349
successfulDeletes ,
0 commit comments