Skip to content
3 changes: 2 additions & 1 deletion src/main/java/picard/analysis/CollectRawWgsMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ public RawWgsMetrics() {

public RawWgsMetrics(final IntervalList intervals,
final Histogram<Integer> highQualityDepthHistogram,
final Histogram<Integer> highQualityDepthHistogramNonZero
final Histogram<Integer> unfilteredDepthHistogram,
final double pctExcludedByAdapter,
final double pctExcludedByMapq,
Expand All @@ -135,7 +136,7 @@ public RawWgsMetrics(final IntervalList intervals,
final int coverageCap,
final Histogram<Integer> unfilteredBaseQHistogram,
final int sampleSize) {
super(intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq,
super(intervals, highQualityDepthHistogram, highQualityDepthHistogramNonZero,unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq,
pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ public WgsMetricsWithNonZeroCoverage() {

public WgsMetricsWithNonZeroCoverage(final IntervalList intervals,
final Histogram<Integer> highQualityDepthHistogram,
final Histogram<Integer> highQualitDepthHistogramNonZero,
final Histogram<Integer> unfilteredDepthHistogram,
final double pctExcludedByAdapter,
final double pctExcludedByMapq,
Expand All @@ -116,7 +117,7 @@ public WgsMetricsWithNonZeroCoverage(final IntervalList intervals,
final int coverageCap,
final Histogram<Integer> unfilteredBaseQHistogram,
final int sampleSize) {
super(intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq,
super(intervals, highQualityDepthHistogram, highQualitDepthHistogramNonZero, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq,
pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize);
}
}
Expand Down Expand Up @@ -170,6 +171,7 @@ protected int doWork() {
@Override
protected WgsMetrics generateWgsMetrics(final IntervalList intervals,
final Histogram<Integer> highQualityDepthHistogram,
final Histogram<Integer> highQualityDepthHistogramNonZero,
final Histogram<Integer> unfilteredDepthHistogram,
final double pctExcludedByAdapter,
final double pctExcludedByMapq,
Expand All @@ -185,6 +187,7 @@ protected WgsMetrics generateWgsMetrics(final IntervalList intervals,
return new WgsMetricsWithNonZeroCoverage(
intervals,
highQualityDepthHistogram,
highQualityDepthHistogramNonZeroFinal,
unfilteredDepthHistogram,
pctExcludedByAdapter,
pctExcludedByMapq,
Expand All @@ -207,7 +210,7 @@ protected WgsMetricsCollector getCollector(final int coverageCap, final Interval

protected class WgsMetricsWithNonZeroCoverageCollector extends WgsMetricsCollector {
Histogram<Integer> highQualityDepthHistogram;
Histogram<Integer> highQualityDepthHistogramNonZero;
Histogram<Integer> highQualityDepthHistogramNonZeroFinal;

public WgsMetricsWithNonZeroCoverageCollector(final CollectWgsMetricsWithNonZeroCoverage metrics,
final int coverageCap, final IntervalList intervals) {
Expand All @@ -222,7 +225,7 @@ public void addToMetricsFile(final MetricsFile<WgsMetrics, Integer> file,
final CountingFilter mapqFilter,
final CountingPairedFilter pairFilter) {
highQualityDepthHistogram = getDepthHistogram();
highQualityDepthHistogramNonZero = getDepthHistogramNonZero();
highQualityDepthHistogramNonZeroFinal = getDepthHistogramNonZero();

// calculate metrics the same way as in CollectWgsMetrics
final WgsMetricsWithNonZeroCoverage metrics = (WgsMetricsWithNonZeroCoverage) getMetrics(dupeFilter, adapterFilter, mapqFilter, pairFilter);
Expand All @@ -239,7 +242,7 @@ public void addToMetricsFile(final MetricsFile<WgsMetrics, Integer> file,
file.addMetric(metrics);
file.addMetric(metricsNonZero);
file.addHistogram(highQualityDepthHistogram);
file.addHistogram(highQualityDepthHistogramNonZero);
file.addHistogram(highQualityDepthHistogramNonZeroFinal);

if (includeBQHistogram) {
addBaseQHistogram(file);
Expand All @@ -260,7 +263,7 @@ private Histogram<Integer> getDepthHistogramNonZero() {
}

public boolean areHistogramsEmpty() {
return (highQualityDepthHistogram.isEmpty() || highQualityDepthHistogramNonZero.isEmpty());
return (highQualityDepthHistogram.isEmpty() || highQualityDepthHistogramNonZeroFinal.isEmpty());
}
}
}
27 changes: 22 additions & 5 deletions src/main/java/picard/analysis/WgsMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import picard.PicardException;
import picard.util.MathUtil;
import picard.util.help.HelpConstants;
import java.util.stream.LongStream;

/** Metrics for evaluating the performance of whole genome sequencing experiments. */
@DocumentedFeature(groupName = HelpConstants.DOC_CAT_METRICS, summary = HelpConstants.DOC_CAT_METRICS_SUMMARY)
Expand All @@ -46,6 +47,9 @@ public class WgsMetrics extends MergeableMetricBase {
@MergingIsManual
protected final Histogram<Integer> highQualityDepthHistogram;

/** Count of sites with a given depth of coverage. Excludes bases with quality below MINIMUM_BASE_QUALITY and 0 coverage.*/
@MergingIsManual
protected final Histogram<Integer> highQualityDepthHistogramNonZero;
/** Count of sites with a given depth of coverage. Includes all but quality 2 bases */
@MergingIsManual
protected final Histogram<Integer> unfilteredDepthHistogram;
Expand All @@ -68,6 +72,7 @@ public class WgsMetrics extends MergeableMetricBase {
public WgsMetrics() {
intervals = null;
highQualityDepthHistogram = null;
highQualityDepthHistogramNonZero = null;
unfilteredDepthHistogram = null;
unfilteredBaseQHistogram = null;
theoreticalHetSensitivitySampleSize = -1;
Expand All @@ -77,6 +82,7 @@ public WgsMetrics() {
/**
* Create an instance of this metric that is mergeable.
*
* @param highQualityDepthHistogramNonZero the count of genomic positions observed for each observed depth. Excludes bases with quality below MINIMUM_BASE_QUALITY or with 0 coverage.
* @param highQualityDepthHistogram the count of genomic positions observed for each observed depth. Excludes bases with quality below MINIMUM_BASE_QUALITY.
* @param unfilteredDepthHistogram the depth histogram that includes all but quality 2 bases.
* @param pctExcludedByAdapter the fraction of aligned bases that were filtered out because they were in reads with 0 mapping quality that looked like adapter sequence.
Expand All @@ -92,6 +98,7 @@ public WgsMetrics() {
* @param theoreticalHetSensitivitySampleSize the sample size used for theoretical het sensitivity sampling.
*/
public WgsMetrics(final IntervalList intervals,
final Histogram<Integer> highQualityDepthHistogramNonZero,
final Histogram<Integer> highQualityDepthHistogram,
final Histogram<Integer> unfilteredDepthHistogram,
final double pctExcludedByAdapter,
Expand All @@ -107,6 +114,7 @@ public WgsMetrics(final IntervalList intervals,
final int theoreticalHetSensitivitySampleSize) {
this.intervals = intervals.uniqued();
this.highQualityDepthHistogram = highQualityDepthHistogram;
this.highQualityDepthHistogramNonZero = new Histogram<>("coverage_or_base_quality", "high_quality_non_zero_coverage_count");
this.unfilteredDepthHistogram = unfilteredDepthHistogram;
this.unfilteredBaseQHistogram = unfilteredBaseQHistogram;
this.coverageCap = coverageCap;
Expand Down Expand Up @@ -320,12 +328,21 @@ public void calculateDerivedFields() {
PCT_90X = MathUtil.sum(depthHistogramArray, 90, depthHistogramArray.length) / (double) GENOME_TERRITORY;
PCT_100X = MathUtil.sum(depthHistogramArray, 100, depthHistogramArray.length) / (double) GENOME_TERRITORY;



long maxDepth = 0;
for (final Histogram.Bin<Integer> bin : highQualityDepthHistogram.values()) {
maxDepth = Math.max((int) bin.getIdValue(), maxDepth);
final int depth = (int) bin.getIdValue();
if (depth > 0) {
highQualityDepthHistogramNonZero.increment(depth,bin.getValue());
}
}
LongStream.range(1, maxDepth).forEach(i -> highQualityDepthHistogramNonZero.increment((int) i, 0));
// This roughly measures by how much we must over-sequence so that xx% of bases have coverage at least as deep as the current mean coverage:
if (highQualityDepthHistogram.getCount() > 0) {
FOLD_80_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.2);
FOLD_90_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.1);
FOLD_95_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.05);
if (highQualityDepthHistogramNonZero.getCount() > 0) {
FOLD_80_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.2);
FOLD_90_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.1);
FOLD_95_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.05);
} else {
FOLD_80_BASE_PENALTY = 0;
FOLD_90_BASE_PENALTY = 0;
Expand Down
15 changes: 11 additions & 4 deletions src/main/java/picard/analysis/directed/TargetMetricsCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
// give it the bin label "coverage_or_base_quality" to make clear that in the metrics file the coverage and base quality histograms share the same bin column on the left
private final Histogram<Integer> highQualityDepthHistogram = new Histogram<>("coverage_or_base_quality", "high_quality_coverage_count");

// histogram of depths. does not include bases with quality less than MINIMUM_BASE_QUALITY (default 20) or bases with zero coverage.
// we use this for the calculation of the FOLD_80_BASE_PENALTY metric
private final Histogram<Integer> highQualitDepthHistogramNonZero = new Histogram<>("coverage_or_base_quality", "high_quality_non_zero_coverage_count");

// histogram of base qualities. includes all but quality 2 bases. we use this histogram to calculate theoretical het sensitivity.
private final Histogram<Integer> unfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count");

Expand Down Expand Up @@ -708,6 +712,7 @@ public void finish() {
private void calculateTargetCoverageMetrics() {

LongStream.range(0, hqMaxDepth).forEach(i -> highQualityDepthHistogram.increment((int) i, 0));
LongStream.range(1, hqMaxDepth).forEach(i -> highQualityDepthHistogramNonZero.increment((int) i, 0));

int zeroCoverageTargets = 0;

Expand Down Expand Up @@ -738,7 +743,9 @@ private void calculateTargetCoverageMetrics() {
totalCoverage += depth;
highQualityDepthHistogram.increment(depth, 1);
minDepth = Math.min(minDepth, depth);

if (depth > 0) {
highQualityDepthHistogramNonZero.increment(depth, 1);
}
// Add to the "how many target bases at at-least X" calculations.
for (int i = 0; i < targetBasesDepth.length; i++) {
if (depth >= targetBasesDepth[i]) targetBases[i]++;
Expand All @@ -750,16 +757,16 @@ private void calculateTargetCoverageMetrics() {
if (targetBases[0] != highQualityCoverageByTarget.keySet().stream().mapToInt(Interval::length).sum()) {
throw new PicardException("the number of target bases with at least 0x coverage does not equal the number of target bases");
}

metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY;
metrics.MEAN_TARGET_COVERAGE = highQualityDepthHistogram.getMean();
metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian();
metrics.MAX_TARGET_COVERAGE = hqMaxDepth;
// Use Math.min() to account for edge case where highQualityCoverageByTarget is empty (minDepth=Long.MAX_VALUE)
metrics.MIN_TARGET_COVERAGE = Math.min(minDepth, hqMaxDepth);

// compute the coverage value such that 80% of target bases have better coverage than it i.e. 20th percentile
// this roughly measures how much we must sequence extra such that 80% of target bases have coverage at least as deep as the current mean coverage
metrics.FOLD_80_BASE_PENALTY = metrics.MEAN_TARGET_COVERAGE / highQualityDepthHistogram.getPercentile(0.2);
metrics.FOLD_80_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.2);
metrics.ZERO_CVG_TARGETS_PCT = zeroCoverageTargets / (double) allTargets.getIntervals().size();

// Store the "how many bases at at-least X" calculations.
Expand Down