Skip to content

Commit 2680d40

Browse files
authored
Merge pull request #108 from PolinaBevad/unique_mode_by_read_number
Added unique mode by read number (option -UN)
2 parents cc6aba9 + 5a312fb commit 2680d40

File tree

7 files changed

+71
-25
lines changed

7 files changed

+71
-25
lines changed

Readme.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,10 @@ The VarDictJava program follows the workflow:
275275
`SILENT` - Like `LENIENT`, only don't emit warning messages.
276276
Default: `LENIENT`
277277
- `-u`
278-
Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.
278+
Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using **forward** read only.
279+
Default: unique mode disabled, all reads are counted.
280+
- `-UN`
281+
Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using **first** read only.
279282
Default: unique mode disabled, all reads are counted.
280283
- `--chimeric`
281284
Indicate to turn off chimeric reads filtering. Chimeric reads are artifacts from library construction,

src/main/java/com/astrazeneca/vardict/Configuration.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,13 @@ public class Configuration {
147147
* Indicate unique mode, which when mate pairs overlap,
148148
* the overlapping part will be counted only once using forward read only.
149149
*/
150-
boolean uniqueModeOn = false; // -u
150+
boolean uniqueModeAlignmentEnabled = false; // -u
151+
152+
/**
153+
* Indicate unique mode, which when mate pairs overlap,
154+
* the overlapping part will be counted only once using first read only.
155+
*/
156+
boolean uniqueModeSecondInPairEnabled = false; // -UN
151157

152158
/**
153159
* Threads count

src/main/java/com/astrazeneca/vardict/Main.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,12 @@ private void run(CommandLine cmd) throws ParseException, IOException {
137137
conf.chimeric = true;
138138
}
139139

140+
if (cmd.hasOption("UN")) {
141+
conf.uniqueModeSecondInPairEnabled = true;
142+
}
140143

141144
if (cmd.hasOption("u")) {
142-
conf.uniqueModeOn = true;
145+
conf.uniqueModeAlignmentEnabled = true;
143146
}
144147

145148
conf.threads = Math.max(readThreadsCount(cmd), 1);
@@ -190,7 +193,8 @@ private static Options buildOptions() {
190193
options.addOption("t", false, "Indicate to remove duplicated reads. Only one pair with same start positions will be kept");
191194
options.addOption("3", false, "Indicate to move indels to 3-prime if alternative alignment can be achieved.");
192195
options.addOption("K", false, "Include Ns in the total depth calculation");
193-
options.addOption("u", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.");
196+
options.addOption("u", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using first read only.");
197+
options.addOption("UN", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.");
194198
options.addOption("chimeric", false, "Indicate to turn off chimeric reads filtering.");
195199

196200
options.addOption(OptionBuilder.withArgName("bit")

src/main/java/com/astrazeneca/vardict/VarDict.java

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,12 +1582,10 @@ static Tuple4<Map<Integer, Map<String, Variation>>, Map<Integer, Map<String, Var
15821582

15831583
int mateAlignmentStart = record.getMateAlignmentStart();
15841584

1585-
boolean isPairedAndTheSameChromosome = record.getReadPairedFlag()
1586-
&& record.getMateReferenceName().equals(record.getReferenceName());
1587-
1585+
processCigar:
15881586
//Loop over CIGAR records
15891587
for (int ci = 0; ci < cigar.numCigarElements(); ci++) {
1590-
if (conf.uniqueModeOn && !dir && isPairedAndTheSameChromosome && start >= mateAlignmentStart) {
1588+
if (skipOverlappingReads(conf, record, dir, start, mateAlignmentStart)) {
15911589
break;
15921590
}
15931591
//length of segment in CIGAR
@@ -2474,8 +2472,8 @@ && isNotEquals('N', ref.get(start))) {
24742472
p++;
24752473
}
24762474
// Skip read if it is overlap
2477-
if (conf.uniqueModeOn && !dir && isPairedAndTheSameChromosome && start >= mateAlignmentStart) {
2478-
break;
2475+
if (skipOverlappingReads(conf, record, dir, start, mateAlignmentStart)) {
2476+
break processCigar;
24792477
}
24802478
}
24812479
if (moffset != 0) {
@@ -2500,28 +2498,59 @@ && isNotEquals('N', ref.get(start))) {
25002498
}
25012499

25022500
if (conf.performLocalRealignment) {
2503-
if (conf.y)
2504-
System.err.println("Start Realigndel");
2505-
realigndel(hash, dels5, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
2506-
if (conf.y)
2507-
System.err.println("Start Realignins");
2508-
realignins(hash, iHash, ins, cov, sclip5, sclip3, ref, region.chr, chrs, conf);
2509-
if (conf.y)
2510-
System.err.println("Start Realignlgdel");
2511-
realignlgdel(hash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
2512-
if (conf.y)
2513-
System.err.println("Start Realignlgins");
2514-
realignlgins(hash, iHash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
2515-
if (conf.y)
2516-
System.err.println("Start Realignlgins30");
2517-
realignlgins30(hash, iHash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
2501+
realignIndels(region, chrs, rlen, ref, conf, bams, hash, iHash, cov, sclip3, sclip5, ins, dels5);
25182502
}
25192503

25202504
adjMNP(hash, mnp, cov, ref, sclip3, sclip5, conf);
25212505

25222506
return tuple(hash, iHash, cov, rlen);
25232507
}
25242508

2509+
private static void realignIndels(Region region, Map<String, Integer> chrs, int rlen, Map<Integer, Character> ref, Configuration conf, String[] bams, Map<Integer, Map<String, Variation>> hash, Map<Integer, Map<String, Variation>> iHash, Map<Integer, Integer> cov, Map<Integer, Sclip> sclip3, Map<Integer, Sclip> sclip5, Map<Integer, Map<String, Integer>> ins, Map<Integer, Map<String, Integer>> dels5) throws IOException {
2510+
if (conf.y)
2511+
System.err.println("Start Realigndel");
2512+
realigndel(hash, dels5, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
2513+
if (conf.y)
2514+
System.err.println("Start Realignins");
2515+
realignins(hash, iHash, ins, cov, sclip5, sclip3, ref, region.chr, chrs, conf);
2516+
if (conf.y)
2517+
System.err.println("Start Realignlgdel");
2518+
realignlgdel(hash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
2519+
if (conf.y)
2520+
System.err.println("Start Realignlgins");
2521+
realignlgins(hash, iHash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
2522+
if (conf.y)
2523+
System.err.println("Start Realignlgins30");
2524+
realignlgins30(hash, iHash, cov, sclip5, sclip3, ref, region.chr, chrs, rlen, bams, conf);
2525+
}
2526+
2527+
private static boolean skipOverlappingReads(Configuration conf, SAMRecord record, boolean dir, int start, int mateAlignmentStart) {
2528+
if (conf.uniqueModeAlignmentEnabled && isPairedAndSameChromosome(record)
2529+
&& !dir && start >= mateAlignmentStart) {
2530+
return true;
2531+
}
2532+
if (conf.uniqueModeSecondInPairEnabled && record.getSecondOfPairFlag() && isPairedAndSameChromosome(record)
2533+
&& isReadsOverlap(record, start, mateAlignmentStart)) {
2534+
return true;
2535+
}
2536+
return false;
2537+
}
2538+
2539+
private static boolean isReadsOverlap(SAMRecord record, int start, int mateAlignmentStart){
2540+
if (record.getAlignmentStart() >= mateAlignmentStart) {
2541+
return start >= mateAlignmentStart
2542+
&& start <= (mateAlignmentStart + record.getCigar().getReferenceLength() - 1);
2543+
}
2544+
else {
2545+
return start >= mateAlignmentStart
2546+
&& record.getMateAlignmentStart() <= record.getAlignmentEnd();
2547+
}
2548+
}
2549+
2550+
private static boolean isPairedAndSameChromosome(SAMRecord record) {
2551+
return record.getReadPairedFlag() && record.getMateReferenceName().equals(record.getReferenceName());
2552+
}
2553+
25252554
private static void sclip3HighQualityProcessing(Region region, Configuration conf, Map<Integer, Sclip> sclip3,
25262555
String querySequence, int mappingQuality, String queryQuality,
25272556
int nm, int n, boolean dir, int start, int m, int q,
Binary file not shown.
Binary file not shown.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Simple,hg19.fa,L861Q.bam,chr7,55259400,55259600,-UN
2+
L861Q testbed chr7 55259504 55259504 A T 437 8 210 218 0 8 A/T 0.0183 2;0 7.0 0 36.0 0 60.0 16.000 0.0183 0 0 1.000 1 1.0 8 437 CCGCAGCATGTCAAGATCAC GATTTTGGGCTGGCCAAACT chr7:55259401-55259600 SNV
3+
L861Q testbed chr7 55259524 55259524 T A 533 125 198 209 57 68 T/A 0.2345 2;2 21.6 1 35.5 1 60.0 124.000 0.2362 0 0 2.000 3 1.2 124 525 AGATTTTGGGCTGGCCAAAC GCTGGGTGCGGAAGAGAAAG chr7:55259401-55259600 SNV
4+
L861Q testbed chr7 55259544 55259544 G A 371 4 164 202 3 1 G/A 0.0108 2;2 6.8 1 36.0 0 60.0 8.000 0.0110 0 0 2.000 1 1.5 4 365 TGCTGGGTGCGGAAGAGAAA AATACCATGCAGAAGGAGGC chr7:55259401-55259600 SNV

0 commit comments

Comments
 (0)