Skip to content

feat(filesystems): keep subfolders path for AmazonS3MovePolicy #721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
*/
package io.streamthoughts.kafka.connect.filepulse.fs.clean;

import static io.streamthoughts.kafka.connect.filepulse.fs.clean.AmazonS3MoveCleanupPolicy.Config.EXCLUDE_SOURCE_PREFIX_PATH_CONFIG;
import static io.streamthoughts.kafka.connect.filepulse.fs.clean.AmazonS3MoveCleanupPolicy.Config.FAILURES_AWS_BUCKET_NAME_CONFIG;
import static io.streamthoughts.kafka.connect.filepulse.fs.clean.AmazonS3MoveCleanupPolicy.Config.FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH;
import static io.streamthoughts.kafka.connect.filepulse.fs.clean.AmazonS3MoveCleanupPolicy.Config.FAILURES_AWS_PREFIX_PATH_CONFIG;
import static io.streamthoughts.kafka.connect.filepulse.fs.clean.AmazonS3MoveCleanupPolicy.Config.SUCCESS_AWS_BUCKET_NAME_CONFIG;
import static io.streamthoughts.kafka.connect.filepulse.fs.clean.AmazonS3MoveCleanupPolicy.Config.SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH;
import static io.streamthoughts.kafka.connect.filepulse.fs.clean.AmazonS3MoveCleanupPolicy.Config.SUCCESS_AWS_PREFIX_PATH_CONFIG;

import io.streamthoughts.kafka.connect.filepulse.clean.FileCleanupPolicy;
Expand All @@ -29,7 +32,10 @@ public class AmazonS3MoveCleanupPolicy implements FileCleanupPolicy {
private static final Logger LOG = LoggerFactory.getLogger(AmazonS3MoveCleanupPolicy.class);

private AmazonS3Storage storage;


private boolean includeSuccessSourcePrefixPath;
private boolean includeFailuresSourcePrefixPath;

private Config config;

/**
Expand All @@ -38,27 +44,38 @@ public class AmazonS3MoveCleanupPolicy implements FileCleanupPolicy {
@Override
public void configure(final Map<String, ?> configs) {
this.config = new Config(configs);
this.includeSuccessSourcePrefixPath = this.config.getBoolean(SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH);
this.includeFailuresSourcePrefixPath = this.config.getBoolean(FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH);
}

/**
* {@inheritDoc}
*/
@Override
public boolean onSuccess(final FileObject source) {
return move(source, SUCCESS_AWS_BUCKET_NAME_CONFIG, SUCCESS_AWS_PREFIX_PATH_CONFIG);
return move(
source,
SUCCESS_AWS_BUCKET_NAME_CONFIG,
SUCCESS_AWS_PREFIX_PATH_CONFIG,
includeSuccessSourcePrefixPath);
}

/**
* {@inheritDoc}
*/
@Override
public boolean onFailure(final FileObject source) {
return move(source, FAILURES_AWS_BUCKET_NAME_CONFIG, FAILURES_AWS_PREFIX_PATH_CONFIG);
return move(
source,
FAILURES_AWS_BUCKET_NAME_CONFIG,
FAILURES_AWS_PREFIX_PATH_CONFIG,
includeFailuresSourcePrefixPath);
}

private boolean move(final FileObject source,
final String destinationS3BucketConfig,
final String destinationS3PrefixConfig) {
final String destinationS3PrefixConfig,
final boolean includeSourcePrefixPath) {
checkState();
URI sourceURI = source.metadata().uri();
if (!storage.exists(sourceURI)) {
Expand All @@ -67,18 +84,35 @@ private boolean move(final FileObject source,
}
S3BucketKey sourceBucketKey = S3BucketKey.fromURI(sourceURI);

String relativeSourcePrefix = extractPrefix(
sourceBucketKey.key().replaceAll(sourceBucketKey.objectName(), ""));
String newObjectKey = includeSourcePrefixPath ?
relativeSourcePrefix + sourceBucketKey.objectName() : sourceBucketKey.objectName();

var destS3BucketName = Optional
.ofNullable(config.getString(destinationS3BucketConfig))
.orElse(sourceBucketKey.bucketName());

var destBucketKey = new S3BucketKey(
destS3BucketName,
config.getString(destinationS3PrefixConfig),
sourceBucketKey.objectName()
newObjectKey
);
return storage.move(sourceURI, destBucketKey.toURI());
}

private String extractPrefix(final String p) {
String excludeSourcePrefixPath = Optional
.ofNullable(config.getString(EXCLUDE_SOURCE_PREFIX_PATH_CONFIG))
.orElse("");
String prefix = p.replaceAll(excludeSourcePrefixPath, "");
prefix = prefix.replaceAll("^/+", "");
// if there are no subdirectories, return an empty string
if (prefix.length() == 0) {
return "";
}
return prefix.endsWith("/") ? prefix : prefix + "/";
}
/**
* {@inheritDoc}
*/
Expand Down Expand Up @@ -110,6 +144,14 @@ public static class Config extends AbstractConfig {
private static final String SUCCESS_AWS_PREFIX_PATH_DOC =
"The prefix to be used for defining the key of an S3 object to move into the destination bucket.";

public static final String SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH =
CONFIG_PREFIX + "success.aws.include.source.prefix.path";
private static final String SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH_DOC =
"Indicates whether to include the source prefix path in the destination key.";
public static final String FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH =
CONFIG_PREFIX + "failure.aws.include.source.prefix.path";
private static final String FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH_DOC =
"Indicates whether to include the source prefix path in the destination key.";
public static final String FAILURES_AWS_BUCKET_NAME_CONFIG =
CONFIG_PREFIX + "failure.aws.bucket.name";
private static final String FAILURES_AWS_BUCKET_NAME_DOC =
Expand All @@ -120,6 +162,11 @@ public static class Config extends AbstractConfig {
private static final String FAILURES_AWS_PREFIX_PATH_DOC =
"The prefix to be used for defining the key of S3 object to move into the destination bucket.";

public static final String EXCLUDE_SOURCE_PREFIX_PATH_CONFIG =
CONFIG_PREFIX + "exclude.source.prefix.path";
private static final String EXCLUDE_SOURCE_PREFIX_PATH_DOC =
"Indicates whether to exclude the source prefix path from the destination key.";

/**
* Creates a new {@link Config} instance.
*/
Expand Down Expand Up @@ -152,6 +199,17 @@ static ConfigDef configDef() {
ConfigDef.Width.NONE,
SUCCESS_AWS_PREFIX_PATH_CONFIG
)
.define(
SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH,
ConfigDef.Type.BOOLEAN,
false,
ConfigDef.Importance.LOW,
SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH_DOC,
CONFIG_GROUP,
groupCounter++,
ConfigDef.Width.NONE,
SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH
)
.define(
FAILURES_AWS_BUCKET_NAME_CONFIG,
ConfigDef.Type.STRING,
Expand All @@ -173,6 +231,28 @@ static ConfigDef configDef() {
groupCounter++,
ConfigDef.Width.NONE,
FAILURES_AWS_PREFIX_PATH_CONFIG
)
.define(
FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH,
ConfigDef.Type.BOOLEAN,
false,
ConfigDef.Importance.LOW,
FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH_DOC,
CONFIG_GROUP,
groupCounter++,
ConfigDef.Width.NONE,
FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH
)
.define(
EXCLUDE_SOURCE_PREFIX_PATH_CONFIG,
ConfigDef.Type.STRING,
null,
ConfigDef.Importance.LOW,
EXCLUDE_SOURCE_PREFIX_PATH_DOC,
CONFIG_GROUP,
groupCounter++,
ConfigDef.Width.NONE,
EXCLUDE_SOURCE_PREFIX_PATH_CONFIG
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ public class AmazonS3MoveCleanupPolicyTest extends BaseAmazonS3Test {
public static final String S3_TEST_BUCKET = "bucket";
public static final String OBJECT_NAME = "object";
public static final String S3_OBJECT_KEY = "input/" + OBJECT_NAME;
public static final String S3_OBJECT_KEY_WITH_PREFIX = "input/prefix/" + OBJECT_NAME;
public static final String EXCLUDE_SOURCE_PREFIX_PATH = "input";

private AmazonS3Storage storage;

Expand Down Expand Up @@ -58,7 +60,36 @@ public void should_move_object_on_success() {
Assert.assertTrue(storage.exists(new S3BucketKey(S3_TEST_BUCKET, "/success/" + OBJECT_NAME).toURI()));
}

@Test
public void should_move_object_on_success_with_prefix() {
// GIVEN
client.createBucket(S3_TEST_BUCKET);
client.putObject(S3_TEST_BUCKET, S3_OBJECT_KEY_WITH_PREFIX, "contents");

var cleaner = new AmazonS3MoveCleanupPolicy();
cleaner.setStorage(storage);
cleaner.configure(Map.of(
AmazonS3MoveCleanupPolicy.Config.SUCCESS_AWS_PREFIX_PATH_CONFIG, "/success/",
AmazonS3MoveCleanupPolicy.Config.FAILURES_AWS_PREFIX_PATH_CONFIG, "/failure/",
AmazonS3MoveCleanupPolicy.Config.SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH, true,
AmazonS3MoveCleanupPolicy.Config.FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH, true,
AmazonS3MoveCleanupPolicy.Config.EXCLUDE_SOURCE_PREFIX_PATH_CONFIG, EXCLUDE_SOURCE_PREFIX_PATH
));

// WHEN
FileObjectMeta objectMetadata = storage.getObjectMetadata(new S3BucketKey(S3_TEST_BUCKET, S3_OBJECT_KEY_WITH_PREFIX));
cleaner.onSuccess(new FileObject(
objectMetadata,
FileObjectOffset.empty(),
FileObjectStatus.COMPLETED
)
);

// THEN
Assert.assertFalse(storage.exists(objectMetadata.uri()));
Assert.assertTrue(storage.exists(new S3BucketKey(S3_TEST_BUCKET, "/success/prefix/" + OBJECT_NAME).toURI()));
}

@Test
public void should_move_object_on_failure() {
// GIVEN
Expand All @@ -85,4 +116,34 @@ public void should_move_object_on_failure() {
Assert.assertFalse(storage.exists(objectMetadata.uri()));
Assert.assertTrue(storage.exists(new S3BucketKey(S3_TEST_BUCKET, "/failure/" + OBJECT_NAME).toURI()));
}

@Test
public void should_move_object_on_failure_with_prefix() {
// GIVEN
client.createBucket(S3_TEST_BUCKET);
client.putObject(S3_TEST_BUCKET, S3_OBJECT_KEY_WITH_PREFIX, "contents");

var cleaner = new AmazonS3MoveCleanupPolicy();
cleaner.setStorage(storage);
cleaner.configure(Map.of(
AmazonS3MoveCleanupPolicy.Config.SUCCESS_AWS_PREFIX_PATH_CONFIG, "/success/",
AmazonS3MoveCleanupPolicy.Config.FAILURES_AWS_PREFIX_PATH_CONFIG, "/failure/",
AmazonS3MoveCleanupPolicy.Config.SUCCESS_AWS_INCLUDE_SOURCE_PREFIX_PATH, true,
AmazonS3MoveCleanupPolicy.Config.FAILURES_AWS_INCLUDE_SOURCE_PREFIX_PATH, true,
AmazonS3MoveCleanupPolicy.Config.EXCLUDE_SOURCE_PREFIX_PATH_CONFIG, EXCLUDE_SOURCE_PREFIX_PATH
));

// WHEN
FileObjectMeta objectMetadata = storage.getObjectMetadata(new S3BucketKey(S3_TEST_BUCKET, S3_OBJECT_KEY_WITH_PREFIX));
cleaner.onFailure(new FileObject(
objectMetadata,
FileObjectOffset.empty(),
FileObjectStatus.COMPLETED
)
);

// THEN
Assert.assertFalse(storage.exists(objectMetadata.uri()));
Assert.assertTrue(storage.exists(new S3BucketKey(S3_TEST_BUCKET, "/failure/prefix/" + OBJECT_NAME).toURI()));
}
}
Loading