diff --git a/src/main/java/picard/cmdline/CommandLineProgram.java b/src/main/java/picard/cmdline/CommandLineProgram.java index 50ae238abd..edc91ecc7c 100644 --- a/src/main/java/picard/cmdline/CommandLineProgram.java +++ b/src/main/java/picard/cmdline/CommandLineProgram.java @@ -25,6 +25,7 @@ import com.intel.gkl.compression.IntelDeflaterFactory; import com.intel.gkl.compression.IntelInflaterFactory; +import htsjdk.io.IOPath; import htsjdk.samtools.Defaults; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriterFactory; @@ -341,7 +342,7 @@ protected boolean parseArgs(final String[] argv) { // object created by this code path won't be valid - but we still have to set it here in case // the tool tries to access REFERENCE_SEQUENCE directly (such tools will subsequently fail given // a non-local file anyway, but this prevents them from immediately throwing an NPE). - final PicardHtsPath refHtsPath = referenceSequence.getHtsPath(); + final IOPath refHtsPath = referenceSequence.getHtsPath(); REFERENCE_SEQUENCE = ReferenceArgumentCollection.getFileSafe(refHtsPath, Log.getInstance(this.getClass())); // The TMP_DIR setting section below was moved from instanceMain() to here due to timing issues diff --git a/src/main/java/picard/cmdline/argumentcollections/ReferenceArgumentCollection.java b/src/main/java/picard/cmdline/argumentcollections/ReferenceArgumentCollection.java index bb06e30330..eb4bb94fae 100644 --- a/src/main/java/picard/cmdline/argumentcollections/ReferenceArgumentCollection.java +++ b/src/main/java/picard/cmdline/argumentcollections/ReferenceArgumentCollection.java @@ -27,6 +27,7 @@ import java.io.File; import java.nio.file.Path; +import htsjdk.io.IOPath; import htsjdk.samtools.util.Log; import picard.nio.PicardBucketUtils; import picard.nio.PicardHtsPath; @@ -61,7 +62,7 @@ default Path getReferencePath(){ * * @return The reference provided by the user, if any, or the default, if any, as a PicardHtsPath. May be null. */ - default PicardHtsPath getHtsPath(){ + default IOPath getHtsPath(){ return getReferenceFile() == null ? null : new PicardHtsPath(getReferenceFile()); } @@ -74,7 +75,7 @@ default PicardHtsPath getHtsPath(){ * the value returned by calls to getReferenceFile to not get an NPE, and to fail gracefully downstream * with an error message that includes the reference file specifier. */ - static File getFileSafe(final PicardHtsPath picardPath, final Log log) { + static File getFileSafe(final IOPath picardPath, final Log log) { if (picardPath == null) { return null; } else if (picardPath.getScheme().equals(PicardBucketUtils.FILE_SCHEME)) { diff --git a/src/main/java/picard/nio/PicardBucketUtils.java b/src/main/java/picard/nio/PicardBucketUtils.java index 04b610b742..8e100db005 100644 --- a/src/main/java/picard/nio/PicardBucketUtils.java +++ b/src/main/java/picard/nio/PicardBucketUtils.java @@ -1,18 +1,12 @@ package picard.nio; -import com.google.cloud.storage.contrib.nio.CloudStorageFileSystem; -import com.google.cloud.storage.contrib.nio.CloudStoragePath; import htsjdk.io.IOPath; import htsjdk.samtools.util.FileExtensions; import htsjdk.utils.ValidationUtils; +import picard.PicardException; -import java.io.File; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Arrays; import java.util.UUID; - /** * Derived from BucketUtils.java in GATK */ @@ -20,11 +14,12 @@ public class PicardBucketUtils { public static final String GOOGLE_CLOUD_STORAGE_FILESYSTEM_SCHEME = "gs"; public static final String HTTP_FILESYSTEM_PROVIDER_SCHEME = "http"; public static final String HTTPS_FILESYSTEM_PROVIDER_SCHEME = "https"; - public static final String HDFS_SCHEME = "hdfs"; public static final String FILE_SCHEME = "file"; // This Picard test staging bucket has a TTL of 180 days (DeleteAction with Age = 180) - public static final String GCLOUD_PICARD_STAGING_DIRECTORY = "gs://hellbender-test-logs/staging/picard/"; + public static final String GCLOUD_PICARD_STAGING_DIRECTORY_STR = "gs://hellbender-test-logs/staging/picard/"; + public static final PicardHtsPath GCLOUD_PICARD_STAGING_DIRECTORY = new PicardHtsPath(GCLOUD_PICARD_STAGING_DIRECTORY_STR); + // slashes omitted since hdfs paths seem to only have 1 slash which would be weirder to include than no slashes private PicardBucketUtils(){} //private so that no one will instantiate this class @@ -41,59 +36,43 @@ private PicardBucketUtils(){} //private so that no one will instantiate this cla * @return a new temporary path of the form [directory]/[prefix][random chars][.extension] * */ - public static PicardHtsPath getTempFilePath(final String directory, String prefix, final String extension){ + public static IOPath getTempFilePath(final IOPath directory, String prefix, final String extension){ ValidationUtils.validateArg(extension.startsWith("."), "The new extension must start with a period '.'"); final String defaultPrefix = "tmp"; if (directory == null){ + // If directory = null, we are creating a local temp file. // File#createTempFile requires that the prefix be at least 3 characters long prefix = prefix.length() >= 3 ? prefix : defaultPrefix; return new PicardHtsPath(PicardIOUtils.createTempFile(prefix, extension)); - } - - if (isGcsUrl(directory) || isHadoopUrl(directory)){ - final PicardHtsPath path = PicardHtsPath.fromPath(randomRemotePath(directory, prefix, extension)); - PicardIOUtils.deleteOnExit(path.toPath()); - // Mark auxiliary files to be deleted - PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, FileExtensions.TRIBBLE_INDEX, true).toPath()); - PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, FileExtensions.TABIX_INDEX, true).toPath()); - PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, FileExtensions.BAI_INDEX, true).toPath()); // e.g. file.bam.bai - PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, FileExtensions.BAI_INDEX, false).toPath()); // e.g. file.bai - PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, ".md5", true).toPath()); - return path; - } else { + } else if (PicardBucketUtils.isLocalPath(directory)) { // Assume the (non-null) directory points to a directory on a local filesystem prefix = prefix.length() >= 3 ? prefix : defaultPrefix; - return new PicardHtsPath(PicardIOUtils.createTempFileInDirectory(prefix, extension, new File(directory))); + return new PicardHtsPath(PicardIOUtils.createTempFileInDirectory(prefix, extension, directory.toPath().toFile())); + } else { + if (isSupportedCloudFilesystem(directory)) { + final IOPath path = randomRemotePath(directory, prefix, extension); + PicardIOUtils.deleteOnExit(path.toPath()); + // Mark auxiliary files to be deleted + PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, FileExtensions.TRIBBLE_INDEX, true).toPath()); + PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, FileExtensions.TABIX_INDEX, true).toPath()); + PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, FileExtensions.BAI_INDEX, true).toPath()); // e.g. file.bam.bai + PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, FileExtensions.BAI_INDEX, false).toPath()); // e.g. file.bai + PicardIOUtils.deleteOnExit(PicardHtsPath.replaceExtension(path, ".md5", true).toPath()); + return path; + } else { + throw new PicardException("Unsupported cloud filesystem: " + directory.getURIString()); + } } } - /** - * This overload of getTempFilePath takes the directory of type PicardHtsPath instead of String. - * - * @see #getTempFilePath(String, String, String) - * - */ - public static PicardHtsPath getTempFilePath(final IOPath directory, String prefix, final String extension){ - return getTempFilePath(directory.getURIString(), prefix, extension); - } - - /** - * Calls getTempFilePath with the empty string as the prefix. - * - * @see #getTempFilePath(String, String, String) - */ - public static PicardHtsPath getTempFilePath(String directory, String extension){ - return getTempFilePath(directory, "", extension); - } - /** * Creates a temporary file in a local directory. * - * @see #getTempFilePath(String, String, String) + * @see #getTempFilePath(IOPath, String, String) */ - public static PicardHtsPath getLocalTempFilePath(final String prefix, final String extension){ - return getTempFilePath((String) null, prefix, extension); + public static IOPath getLocalTempFilePath(final String prefix, final String extension){ + return getTempFilePath(null, prefix, extension); } /** @@ -110,10 +89,10 @@ public static PicardHtsPath getLocalTempFilePath(final String prefix, final Stri * @param relativePath The relative location for the new "directory" under the harcoded staging bucket with a TTL set e.g. "test/RevertSam/". * @return A PicardHtsPath object to a randomly generated "directory" e.g. "gs://hellbender-test-logs/staging/picard/test/RevertSam/{randomly-generated-string}/" */ - public static PicardHtsPath getRandomGCSDirectory(final String relativePath){ + public static IOPath getRandomGCSDirectory(final String relativePath){ ValidationUtils.validateArg(relativePath.endsWith("/"), "relativePath must end in backslash '/': " + relativePath); - return PicardHtsPath.fromPath(PicardBucketUtils.randomRemotePath(GCLOUD_PICARD_STAGING_DIRECTORY + relativePath, "", "/")); + return PicardBucketUtils.randomRemotePath(PicardHtsPath.resolve(GCLOUD_PICARD_STAGING_DIRECTORY, relativePath), "", "/"); } /** @@ -123,39 +102,14 @@ public static PicardHtsPath getRandomGCSDirectory(final String relativePath){ * @param prefix The beginning of the file name * @param suffix The end of the file name, e.g. ".tmp" */ - public static Path randomRemotePath(String stagingLocation, String prefix, String suffix) { + public static IOPath randomRemotePath(final IOPath stagingLocation, final String prefix, final String suffix) { if (isGcsUrl(stagingLocation)) { - return getPathOnGcs(stagingLocation).resolve(prefix + UUID.randomUUID() + suffix); - } else if (isHadoopUrl(stagingLocation)) { - return Paths.get(stagingLocation, prefix + UUID.randomUUID() + suffix); + return PicardHtsPath.resolve(stagingLocation, prefix + UUID.randomUUID() + suffix); } else { throw new IllegalArgumentException("Staging location is not remote: " + stagingLocation); } } - /** - * String -> Path. This *should* not be necessary (use Paths.get(URI.create(...)) instead) , but it currently is - * on Spark because using the fat, shaded jar breaks the registration of the GCS FilesystemProvider. - * To transform other types of string URLs into Paths, use IOUtils.getPath instead. - */ - private static CloudStoragePath getPathOnGcs(String gcsUrl) { - // use a split limit of -1 to preserve empty split tokens, especially trailing slashes on directory names - final String[] split = gcsUrl.split("/", -1); - final String BUCKET = split[2]; - final String pathWithoutBucket = String.join("/", Arrays.copyOfRange(split, 3, split.length)); - return CloudStorageFileSystem.forBucket(BUCKET).getPath(pathWithoutBucket); - } - - /** - * - * @param path path to inspect - * @return true if this path represents a gcs location - */ - private static boolean isGcsUrl(final String path) { - GATKUtils.nonNull(path); - return path.startsWith(GOOGLE_CLOUD_STORAGE_FILESYSTEM_SCHEME + "://"); - } - /** * * Return true if this {@code PicardHTSPath} represents a gcs URI. @@ -168,34 +122,31 @@ public static boolean isGcsUrl(final IOPath pathSpec) { } /** - * @param pathSpec specifier to inspect - * @return true if this {@code GATKPath} represents a remote storage system which may benefit from prefetching (gcs or http(s)) + * @param path specifier to inspect + * @return true if this {@code IOPath} represents a remote storage system which may benefit from prefetching (gcs or http(s)) */ - public static boolean isEligibleForPrefetching(final IOPath pathSpec) { - GATKUtils.nonNull(pathSpec); - return isEligibleForPrefetching(pathSpec.getScheme()); - } - - /** - * @param path path to inspect - * @return true if this {@code Path} represents a remote storage system which may benefit from prefetching (gcs or http(s)) - */ - public static boolean isEligibleForPrefetching(final Path path) { + public static boolean isEligibleForPrefetching(final IOPath path) { GATKUtils.nonNull(path); - return isEligibleForPrefetching(path.toUri().getScheme()); - } - - private static boolean isEligibleForPrefetching(final String scheme){ + final String scheme = path.getScheme(); return scheme != null && (scheme.equals(GOOGLE_CLOUD_STORAGE_FILESYSTEM_SCHEME) || scheme.equals(HTTP_FILESYSTEM_PROVIDER_SCHEME) || scheme.equals(HTTPS_FILESYSTEM_PROVIDER_SCHEME)); } + public static boolean isLocalPath(final IOPath path){ + return path.getScheme().equals(FILE_SCHEME); + } + /** - * Returns true if the given path is a HDFS (Hadoop filesystem) URL. + * As of August 2024, we only support Google Cloud. + * Will add other filesystems (e.g. Azure, AWS) when ready. + * + * @return whether the cloud filesystem is currently supported by Picard. */ - private static boolean isHadoopUrl(String path) { - return path.startsWith(HDFS_SCHEME + "://"); + public static boolean isSupportedCloudFilesystem(final IOPath path){ + ValidationUtils.validateArg(! isLocalPath(path), "isSupportedCloudFilesystem should be called on a cloud path but was given: " + + path.getURIString()); + return isGcsUrl(path); } } diff --git a/src/main/java/picard/nio/PicardHtsPath.java b/src/main/java/picard/nio/PicardHtsPath.java index f881a642f4..d7d393efef 100644 --- a/src/main/java/picard/nio/PicardHtsPath.java +++ b/src/main/java/picard/nio/PicardHtsPath.java @@ -182,7 +182,7 @@ public static PicardHtsPath replaceExtension(final IOPath path, final String new /** * Wrapper for Path.resolve() */ - public static PicardHtsPath resolve(final PicardHtsPath absPath, final String relativePath){ + public static PicardHtsPath resolve(final IOPath absPath, final String relativePath){ return PicardHtsPath.fromPath(absPath.toPath().resolve(relativePath)); } } diff --git a/src/test/java/picard/cmdline/CommandLineProgramTest.java b/src/test/java/picard/cmdline/CommandLineProgramTest.java index 4d91c2565c..ceaecab0f2 100644 --- a/src/test/java/picard/cmdline/CommandLineProgramTest.java +++ b/src/test/java/picard/cmdline/CommandLineProgramTest.java @@ -1,5 +1,6 @@ package picard.cmdline; +import htsjdk.io.IOPath; import org.apache.commons.io.FileUtils; import org.testng.annotations.AfterClass; import picard.PicardException; @@ -24,19 +25,19 @@ public abstract class CommandLineProgramTest { public static final File CHR_M_DICT = new File(REFERENCE_TEST_DIR,"chrM.reference.dict"); // These are the hg19 references with chromosome names "1" (rather than "chr1") - public static final PicardHtsPath HG19_CHR2021_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/references/human_g1k_v37.20.21.fasta"); - public static final PicardHtsPath HG19_CHR2021 = new PicardHtsPath("testdata/picard/reference/human_g1k_v37.20.21.fasta.gz"); + public static final IOPath HG19_CHR2021_GCLOUD = PicardHtsPath.resolve(GCloudTestUtils.getTestInputPath(), "picard/references/human_g1k_v37.20.21.fasta"); + public static final IOPath HG19_CHR2021 = new PicardHtsPath("testdata/picard/reference/human_g1k_v37.20.21.fasta.gz"); - public static final PicardHtsPath NA12878_MINI_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.bam"); - public static final PicardHtsPath NA12878_MINI_CRAM_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); - public static final PicardHtsPath NA12878_MEDIUM_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.bam"); - public static final PicardHtsPath NA12878_MEDIUM_CRAM_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram"); + public static final IOPath NA12878_MINI_GCLOUD = PicardHtsPath.resolve(GCloudTestUtils.getTestInputPath(), "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.bam"); + public static final IOPath NA12878_MINI_CRAM_GCLOUD = PicardHtsPath.resolve(GCloudTestUtils.getTestInputPath(), "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); + public static final IOPath NA12878_MEDIUM_GCLOUD = PicardHtsPath.resolve(GCloudTestUtils.getTestInputPath(), "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.bam"); + public static final IOPath NA12878_MEDIUM_CRAM_GCLOUD = PicardHtsPath.resolve(GCloudTestUtils.getTestInputPath(), "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram"); // A per-test-class directory that will be deleted after the tests are complete. private File tempOutputDir; /** - * returns an directory designated for output which will be deleted after the test class is tested + * returns a directory designated for output which will be deleted after the test class is tested */ public File getTempOutputDir() { if (tempOutputDir == null) { diff --git a/src/test/java/picard/nio/PicardBucketUtilsTest.java b/src/test/java/picard/nio/PicardBucketUtilsTest.java index 57fe52d482..50f61ec756 100644 --- a/src/test/java/picard/nio/PicardBucketUtilsTest.java +++ b/src/test/java/picard/nio/PicardBucketUtilsTest.java @@ -1,14 +1,11 @@ package picard.nio; +import htsjdk.io.IOPath; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.util.GCloudTestUtils; -import java.net.URI; -import java.nio.file.Path; -import java.nio.file.Paths; - public class PicardBucketUtilsTest { @DataProvider(name="testGetTempFilePathDataProvider") @@ -23,32 +20,28 @@ public Object[][] testGetTempFilePathDataProvider() { // Check that the extension scheme is consistent for cloud and local files @Test(dataProvider = "testGetTempFilePathDataProvider", groups = "cloud") - public void testGetTempFilePath(final String directory, final String prefix, final String extension){ - PicardHtsPath path = PicardBucketUtils.getTempFilePath(directory, prefix, extension); + public void testGetTempFilePath(final IOPath directory, final String prefix, final String extension){ + final IOPath path = PicardBucketUtils.getTempFilePath(directory, prefix, extension); Assert.assertTrue(path.hasExtension(extension)); if (directory != null){ - Assert.assertTrue(path.getURIString().startsWith(directory)); - } - - if (directory == null){ - Assert.assertEquals(path.getScheme(), "file"); + Assert.assertTrue(path.getURIString().startsWith(directory.getURIString())); + } else { + Assert.assertEquals(path.getScheme(), PicardBucketUtils.FILE_SCHEME); } } @DataProvider public Object[][] getVariousPathsForPrefetching(){ return new Object[][]{ - {"file:///local/file", false}, - {"gs://abucket/bucket", true}, - {"gs://abucket_with_underscores", true}, + {new PicardHtsPath("file:///local/file"), false}, + {new PicardHtsPath("gs://abucket/bucket"), true}, + {new PicardHtsPath("gs://abucket_with_underscores"), true}, }; } @Test(groups="bucket", dataProvider = "getVariousPathsForPrefetching") - public void testIsEligibleForPrefetching(String path, boolean isPrefetchable){ - final URI uri = URI.create(path); - final Path uriPath = Paths.get(uri); - Assert.assertEquals(PicardBucketUtils.isEligibleForPrefetching(uriPath), isPrefetchable); + public void testIsEligibleForPrefetching(final IOPath path, boolean isPrefetchable){ + Assert.assertEquals(PicardBucketUtils.isEligibleForPrefetching(path), isPrefetchable); } } \ No newline at end of file diff --git a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java index 0c3f7a626a..f337403b92 100644 --- a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java +++ b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java @@ -23,6 +23,7 @@ */ package picard.sam; +import htsjdk.io.IOPath; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; @@ -289,7 +290,7 @@ final Path makeTempDictionary(final Path inputFasta, final String dictNamePrefix final PicardHtsPath HG19_MINI = PicardHtsPath.resolve(GCloudTestUtils.TEST_INPUTS_DEFAULT_GCLOUD, "picard/references/hg19mini.fasta"); final PicardHtsPath HG19_MINI_LOCAL = new PicardHtsPath("testdata/picard/reference/hg19mini.fasta"); - final PicardHtsPath CLOUD_OUTPUT_DIR = PicardHtsPath.resolve(GCloudTestUtils.TEST_STAGING_DEFAULT_GCLOUD, "picard/"); + final PicardHtsPath CLOUD_OUTPUT_DIR = PicardHtsPath.resolve(GCloudTestUtils.TEST_STAGING_DEFAULT_GCLOUD, "picard/CreateSequenceDictionary/"); @DataProvider public Object[][] cloudTestData() { @@ -302,7 +303,7 @@ public Object[][] cloudTestData() { @Test(groups = "cloud", dataProvider = "cloudTestData") public void testCloud(final PicardHtsPath inputReference) { - final PicardHtsPath output = PicardBucketUtils.getTempFilePath(CLOUD_OUTPUT_DIR.getURIString() + "test", ".dict"); + final IOPath output = PicardBucketUtils.getTempFilePath(CLOUD_OUTPUT_DIR, "", ".dict"); final String[] argv = { "REFERENCE=" + inputReference.getURI(), @@ -310,7 +311,7 @@ public void testCloud(final PicardHtsPath inputReference) { }; // This is the "original" dictionary that lives in gs://hellbender/test/resources/ - final PicardHtsPath expectedOutputPath = PicardHtsPath.resolve(GCloudTestUtils.TEST_INPUTS_DEFAULT_GCLOUD, "hg19mini.dict"); + final IOPath expectedOutputPath = PicardHtsPath.resolve(GCloudTestUtils.TEST_INPUTS_DEFAULT_GCLOUD, "hg19mini.dict"); Assert.assertEquals(runPicardCommandLine(argv), 0); final SAMSequenceDictionary expectedDictionary = SAMSequenceDictionaryExtractor.extractDictionary(expectedOutputPath.toPath()); final SAMSequenceDictionary actualDictionary = SAMSequenceDictionaryExtractor.extractDictionary(output.toPath()); diff --git a/src/test/java/picard/sam/DownsampleSamTest.java b/src/test/java/picard/sam/DownsampleSamTest.java index 9e31332290..cd35d6c791 100644 --- a/src/test/java/picard/sam/DownsampleSamTest.java +++ b/src/test/java/picard/sam/DownsampleSamTest.java @@ -1,5 +1,6 @@ package picard.sam; +import htsjdk.io.IOPath; import htsjdk.samtools.*; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.BufferedLineReader; @@ -137,14 +138,14 @@ public void testDownsampleStrategies(final double fraction, final Strategy strat testDownsampleWorker(new PicardHtsPath(tempSamFile), fraction, strategy.name(), seed); } - private PicardHtsPath testDownsampleWorker(final PicardHtsPath samFile, final double fraction, final String strategy, final Integer seed) throws IOException { + private IOPath testDownsampleWorker(final IOPath samFile, final double fraction, final String strategy, final Integer seed) throws IOException { return testDownsampleWorker(samFile, fraction, strategy, seed, Optional.empty(), Optional.empty(), Optional.empty()); } - private PicardHtsPath testDownsampleWorker(final PicardHtsPath samFile, final double fraction, final String strategy, final Integer seed, - final Optional outputFile, final Optional metricsFile, - final Optional referenceFile) throws IOException { - final PicardHtsPath downsampled = outputFile.isEmpty() ? new PicardHtsPath(File.createTempFile("DownsampleSam", ".bam", tempDir)) : outputFile.get(); + private IOPath testDownsampleWorker(final IOPath samFile, final double fraction, final String strategy, final Integer seed, + final Optional outputFile, final Optional metricsFile, + final Optional referenceFile) throws IOException { + final IOPath downsampled = outputFile.isEmpty() ? new PicardHtsPath(File.createTempFile("DownsampleSam", ".bam", tempDir)) : outputFile.get(); final List args = new ArrayList<>(Arrays.asList( "INPUT=" + samFile.getURIString(), @@ -169,7 +170,7 @@ private PicardHtsPath testDownsampleWorker(final PicardHtsPath samFile, final d // temporarily skip this check for cram files until ValidateSam is updated the support cloud reference input if (! downsampled.isCram()){ final ValidateSamFile validateSamFile = new ValidateSamFile(); - validateSamFile.INPUT = downsampled; + validateSamFile.INPUT = PicardHtsPath.fromPath(downsampled.toPath()); Assert.assertEquals(validateSamFile.doWork(), 0); } @@ -235,7 +236,7 @@ public Object[][] repeatedDownsamplingProvider() { @Test(dataProvider = "RepeatedDownsamplingProvider") public void testRepeatedDownsampling(List strategies, List seeds) throws IOException { - PicardHtsPath input = new PicardHtsPath(tempSamFile); + IOPath input = new PicardHtsPath(tempSamFile); final long nReadsOriginal = SamTestUtil.countSamTotalRecord(input.toPath()); double totalFraction = 1; for (int i = 0 ; i < strategies.size(); i++) { @@ -267,21 +268,25 @@ public Object[][] testCloudCramDataProvider() { } @Test(groups = "cloud", dataProvider = "testCloudBamDataProvider") - public void testCloudBam(final PicardHtsPath inputSAM, final boolean outputInCloud, final boolean createMetrics) throws IOException { - final Optional output = outputInCloud ? - Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD + "downsample", ".bam")) : + public void testCloudBam(final IOPath inputSAM, final boolean outputInCloud, final boolean createMetrics) throws IOException { + final IOPath outputRoot = PicardHtsPath.resolve(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD, "DownsampleSam/downsample/"); + final IOPath metricsOutputRoot = PicardHtsPath.resolve(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD, "DownsampleSam/metrics/"); + + final Optional output = outputInCloud ? + Optional.of(PicardBucketUtils.getTempFilePath(outputRoot, "", ".bam")) : Optional.empty(); - final Optional metricsFile = createMetrics ? - Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD + "metrics", ".txt")) : + final Optional metricsFile = createMetrics ? + Optional.of(PicardBucketUtils.getTempFilePath(metricsOutputRoot, "", ".txt")) : Optional.empty(); testDownsampleWorker(inputSAM, 0.5, ConstantMemory.toString(), DEFAULT_RANDOM_SEED, output, metricsFile, Optional.empty()); } // Isolate the test case for cram input from the above tests for bams, since the tool is much slower with a @Test(groups = "cloud", dataProvider = "testCloudCramDataProvider") - public void testCloudCram(final PicardHtsPath inputCRAM, final boolean outputInCloud, final PicardHtsPath reference) throws IOException { - final Optional output = outputInCloud ? - Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD + "downsample", ".cram")) : + public void testCloudCram(final IOPath inputCRAM, final boolean outputInCloud, final IOPath reference) throws IOException { + final IOPath outputRoot = PicardHtsPath.resolve(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD, "DownsampleSam/output/"); + final Optional output = outputInCloud ? + Optional.of(PicardBucketUtils.getTempFilePath( outputRoot, "", ".cram")) : Optional.empty(); testDownsampleWorker(inputCRAM, 0.5, ConstantMemory.toString(), DEFAULT_RANDOM_SEED, output, Optional.empty(), Optional.of(reference)); } diff --git a/src/test/java/picard/sam/RevertSamTest.java b/src/test/java/picard/sam/RevertSamTest.java index 00461b55b7..095684471f 100755 --- a/src/test/java/picard/sam/RevertSamTest.java +++ b/src/test/java/picard/sam/RevertSamTest.java @@ -610,17 +610,17 @@ public void testHardClipRoundTrip() throws Exception { private final static boolean OUTPUT_BY_READ_GROUP = true; @DataProvider(name = "cloudTestData") public Object[][] getCloudTestData() { - final PicardHtsPath tempCloudCram = PicardBucketUtils.getTempFilePath(DEFAULT_CLOUD_TEST_OUTPUT_DIR, "RevertSam", ".cram"); - final PicardHtsPath tempLocalCram = PicardBucketUtils.getLocalTempFilePath("RevertSam", ".cram"); + final IOPath tempCloudCram = PicardBucketUtils.getTempFilePath(DEFAULT_CLOUD_TEST_OUTPUT_DIR, "RevertSam", ".cram"); + final IOPath tempLocalCram = PicardBucketUtils.getLocalTempFilePath("RevertSam", ".cram"); return new Object[][]{ // Output by read group without the output map, write output bams in the cloud. {NA12878_MEDIUM_GCLOUD, PicardBucketUtils.getRandomGCSDirectory(DEFAULT_CLOUD_TEST_OUTPUT_RELATIVE_PATH), OUTPUT_BY_READ_GROUP, null, null }, // Output by read group using the output map, write output bams in the cloud - {NA12878_MEDIUM_GCLOUD, null, OUTPUT_BY_READ_GROUP, DEFAULT_CLOUD_TEST_OUTPUT_DIR.getURIString(), null }, + {NA12878_MEDIUM_GCLOUD, null, OUTPUT_BY_READ_GROUP, DEFAULT_CLOUD_TEST_OUTPUT_DIR, null }, // Output by read group using the local output map, write output bams in the cloud - {NA12878_MEDIUM_GCLOUD, null, OUTPUT_BY_READ_GROUP, REVERT_SAM_LOCAL_TEST_DATA_DIR, null }, + {NA12878_MEDIUM_GCLOUD, null, OUTPUT_BY_READ_GROUP, new PicardHtsPath(REVERT_SAM_LOCAL_TEST_DATA_DIR), null }, // Cram input, output a CRAM for each read group - {NA12878_MEDIUM_CRAM_GCLOUD, null, OUTPUT_BY_READ_GROUP, DEFAULT_CLOUD_TEST_OUTPUT_DIR.getURIString(), HG19_CHR2021 }, + {NA12878_MEDIUM_CRAM_GCLOUD, null, OUTPUT_BY_READ_GROUP, DEFAULT_CLOUD_TEST_OUTPUT_DIR, HG19_CHR2021 }, // Cram input in the cloud, single cloud cram output {NA12878_MEDIUM_CRAM_GCLOUD, tempCloudCram, !OUTPUT_BY_READ_GROUP, null, HG19_CHR2021 }, // Cram input in the cloud, single local cram output @@ -639,16 +639,16 @@ public Object[][] getCloudTestData() { * * Also see RevertSam::createOutputMapFromReadGroups(), which does part of what this method does (creates a dictionary rather than two lists). */ - private PicardHtsPath getTmpTestReadGroupMapTable(final IOPath sam, final String perReadGroupPathBase) { + private IOPath getTmpTestReadGroupMapTable(final IOPath sam, final IOPath perReadGroupPathBase) { ValidationUtils.validateArg(sam.getExtension().isPresent(), "The variable sam must be a SAM file but is missing an extension: " + sam.getURIString()); final String samExtension = sam.getExtension().get(); - final PicardHtsPath perReadGroupTableFile = PicardBucketUtils.getTempFilePath(DEFAULT_CLOUD_TEST_OUTPUT_DIR.getURIString(), ".txt"); + final IOPath perReadGroupTableFile = PicardBucketUtils.getTempFilePath(DEFAULT_CLOUD_TEST_OUTPUT_DIR, "", ".txt"); final List readGroups = getReadGroups(sam.toPath()).stream().map(rg -> rg.getId()).collect(Collectors.toList()); final List readGroupOutput = new ArrayList<>(); for (String readGroup : readGroups){ - final PicardHtsPath readGroupOutputPath = PicardBucketUtils.getTempFilePath(perReadGroupPathBase, readGroup, samExtension); + final IOPath readGroupOutputPath = PicardBucketUtils.getTempFilePath(perReadGroupPathBase, readGroup, samExtension); readGroupOutput.add(readGroupOutputPath.getURIString()); } @@ -705,8 +705,8 @@ private List getReadGroups(final Path sam){ * @param outputMapDir The location for a dynamically created output map file. May be null. */ @Test(dataProvider = "cloudTestData", groups = "cloud") - public void testCloud(final PicardHtsPath inputSAM, final PicardHtsPath outputPath, final boolean outputByReadGroup, - final String outputMapDir, final PicardHtsPath reference) { + public void testCloud(final IOPath inputSAM, final IOPath outputPath, final boolean outputByReadGroup, + final IOPath outputMapDir, final IOPath reference) { final List args = new ArrayList<>(Arrays.asList( "INPUT=" + inputSAM.getURIString(), "OUTPUT_BY_READGROUP=" + outputByReadGroup)); @@ -715,7 +715,7 @@ public void testCloud(final PicardHtsPath inputSAM, final PicardHtsPath outputPa args.add("OUTPUT=" + outputPath.getURIString()); } - PicardHtsPath outputMap = null; + IOPath outputMap = null; if (outputMapDir != null){ // Dynamically create a temporary output map file based on the read groups in inputSAM. outputMap = getTmpTestReadGroupMapTable(inputSAM, outputMapDir); diff --git a/src/test/java/picard/util/GCloudTestUtils.java b/src/test/java/picard/util/GCloudTestUtils.java index a0230fe4b9..85a9d7526d 100644 --- a/src/test/java/picard/util/GCloudTestUtils.java +++ b/src/test/java/picard/util/GCloudTestUtils.java @@ -1,5 +1,6 @@ package picard.util; +import htsjdk.io.IOPath; import picard.nio.PicardHtsPath; public final class GCloudTestUtils { @@ -48,8 +49,8 @@ public static String getTestProject() { * * @return PICARD_TEST_STAGING env. var if defined, or {@value #TEST_STAGING_DEFAULT.getURIString()} */ - public static String getTestStaging() { - return getSystemProperty("PICARD_TEST_STAGING", TEST_STAGING_DEFAULT_GCLOUD.getURIString()); + public static IOPath getTestStaging() { + return new PicardHtsPath(getSystemProperty("PICARD_TEST_STAGING", TEST_STAGING_DEFAULT_GCLOUD.getURIString())); } /** @@ -58,8 +59,8 @@ public static String getTestStaging() { * * @return PICARD_TEST_INPUTS env. var if defined or {@value #TEST_INPUTS_DEFAULT.getURIString()}. */ - public static String getTestInputPath() { - return getSystemProperty("PICARD_TEST_INPUTS", TEST_INPUTS_DEFAULT_GCLOUD.getURIString()); + public static IOPath getTestInputPath() { + return new PicardHtsPath(getSystemProperty("PICARD_TEST_INPUTS", TEST_INPUTS_DEFAULT_GCLOUD.getURIString())); } } diff --git a/src/test/java/picard/util/GCloudTestUtilsUnitTest.java b/src/test/java/picard/util/GCloudTestUtilsUnitTest.java index 55a64eb4f7..e5a78f6ced 100644 --- a/src/test/java/picard/util/GCloudTestUtilsUnitTest.java +++ b/src/test/java/picard/util/GCloudTestUtilsUnitTest.java @@ -22,7 +22,7 @@ public void testDownload() throws IOException { @Test(groups = "bucket") public void testUpload() throws IOException { - final Path uploadDir = Files.createTempDirectory(IOUtil.getPath(GCloudTestUtils.getTestStaging()), "picardTest"); + final Path uploadDir = Files.createTempDirectory(GCloudTestUtils.getTestStaging().toPath(), "picardTest"); final Path txtUpload = uploadDir.resolve("tmp.txt"); try { Assert.assertFalse(Files.exists(txtUpload)); diff --git a/src/test/java/picard/util/IntervalListToolsTest.java b/src/test/java/picard/util/IntervalListToolsTest.java index 19bc363c38..d737aa0d93 100644 --- a/src/test/java/picard/util/IntervalListToolsTest.java +++ b/src/test/java/picard/util/IntervalListToolsTest.java @@ -23,6 +23,7 @@ */ package picard.util; +import htsjdk.io.IOPath; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.IntervalList; @@ -59,8 +60,8 @@ public class IntervalListToolsTest extends CommandLineProgramTest { private static final String TEST_DATA_DIR = "testdata/picard/util/"; - private static final String CLOUD_DATA_DIR = GCloudTestUtils.getTestInputPath() + "picard/intervals/"; - private static final String CLOUD_OUTPUT_DIR = GCloudTestUtils.getTestStaging() + "picard/"; + private static final IOPath CLOUD_DATA_DIR = PicardHtsPath.resolve(GCloudTestUtils.getTestInputPath(), "picard/intervals/"); + private static final IOPath CLOUD_OUTPUT_DIR = PicardHtsPath.resolve(GCloudTestUtils.getTestStaging(), "picard/"); private final Path scatterable = Paths.get(TEST_DATA_DIR, "scatterable.interval_list"); private final PicardHtsPath scatterableCloud = new PicardHtsPath(CLOUD_DATA_DIR + "scatterable.interval_list"); private final Path scatterableStdin = Paths.get(TEST_DATA_DIR, "scatterable_stdin"); @@ -242,9 +243,8 @@ private IntervalList tester(final IntervalListTools.Action action) { private IntervalList tester(final IntervalListTools.Action action, final boolean invert, final boolean unique, final boolean dontMergeAbutting, final Path input1, final Path input2, final boolean cloudOutput) { - final String outputDirFullName = cloudOutput ? CLOUD_OUTPUT_DIR : null; final String prefix = "IntervalListTools"; - final PicardHtsPath output = PicardBucketUtils.getTempFilePath(outputDirFullName, prefix, INTERVAL_LIST_EXTENSION); + final IOPath output = PicardBucketUtils.getTempFilePath(cloudOutput ? CLOUD_OUTPUT_DIR : null, prefix, INTERVAL_LIST_EXTENSION); final List args = buildStandardTesterArguments(action, invert, unique, dontMergeAbutting, input1, input2); args.add("OUTPUT=" + output); Assert.assertEquals(runPicardCommandLine(args), 0); @@ -263,9 +263,8 @@ private long testerCountOutput(IntervalListTools.Action action, IntervalListTool private long testerCountOutput(IntervalListTools.Action action, IntervalListTools.Output outputValue, boolean invert, boolean unique, boolean dontMergeAbutting, Path input1, Path input2, final boolean cloudOutput) throws IOException { - final String outputDirFullName = cloudOutput ? CLOUD_OUTPUT_DIR : null; final String prefix = "IntervalListTools"; - final PicardHtsPath countOutput = PicardBucketUtils.getTempFilePath(outputDirFullName, prefix, ".txt"); + final IOPath countOutput = PicardBucketUtils.getTempFilePath(cloudOutput ? CLOUD_OUTPUT_DIR : null, prefix, ".txt"); final List args = buildStandardTesterArguments(action, invert, unique, dontMergeAbutting, input1, input2); args.add("OUTPUT_VALUE=" + outputValue);