Skip to content

Commit cc6aba9

Browse files
authored
Merge pull request #106 from PolinaBevad/chimeric+unique
Added chimeric and unique modes from Perl version
2 parents 514215a + a1db2f7 commit cc6aba9

12 files changed

+847
-340
lines changed

Readme.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,8 @@ The VarDictJava program follows the workflow:
242242
The minimum # of reads to determine strand bias, default: `2`
243243
- `-Q INT`
244244
If set, reads with mapping quality less than INT will be filtered and ignored
245-
- `-q INT`
246-
The phred score for a base to be considered a good call. Default: 25 (for Illumina). For PGM, set it to ~15, as PGM tends to underestimate base quality.
245+
- `-q double`
246+
The phred score for a base to be considered a good call. Default: 22.5 (for Illumina). For PGM, set it to ~15, as PGM tends to underestimate base quality.
247247
- `-m INT`
248248
If set, reads with mismatches more than `INT` will be filtered and ignored. Gaps are not counted as mismatches. Valid only for bowtie2/TopHat or BWA aln followed by sampe. BWA mem is calculated as NM - Indels. Default: 8, or reads with more than 8 mismatches will not be used.
249249
- `-T INT`
@@ -274,7 +274,13 @@ The VarDictJava program follows the workflow:
274274
`LENIENT` - Emit warnings but keep going if possible.
275275
`SILENT` - Like `LENIENT`, only don't emit warning messages.
276276
Default: `LENIENT`
277-
277+
- `-u`
278+
Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.
279+
Default: unique mode disabled, all reads are counted.
280+
- `--chimeric`
281+
Indicate to turn off chimeric reads filtering. Chimeric reads are artifacts from library construction,
282+
where a read can be split into two segments, each will be aligned within 1-2 read length distance,
283+
but in opposite direction. Default: filtering enabled
278284
## Output columns
279285

280286
1. Sample - sample name

src/main/java/com/astrazeneca/vardict/Configuration.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ public class Configuration {
6767
/**
6868
* The phred score for a base to be considered a good call
6969
*/
70-
int goodq; // -q, default = 23
70+
double goodq; // -q, default = 22.5
7171
final int buffer = 200;
7272
/**
7373
* Extension of bp to look for mismatches after insersion or deletion
@@ -143,12 +143,24 @@ public class Configuration {
143143
*/
144144
boolean includeNInTotalDepth = false; // -K
145145

146+
/**
147+
* Indicate unique mode, which when mate pairs overlap,
148+
* the overlapping part will be counted only once using forward read only.
149+
*/
150+
boolean uniqueModeOn = false; // -u
146151

147152
/**
148153
* Threads count
149154
*/
150155
int threads;
151156

157+
/**
158+
* Indicate to turn off chimeric reads filtering. Chimeric reads are artifacts from library construction,
159+
* where a read can be split into two segments, each will be aligned within 1-2 read length distance,
160+
* but in opposite direction.
161+
*/
162+
public boolean chimeric = false; // --chimeric
163+
152164
public boolean isColumnForChromosomeSet() {
153165
return columnForChromosome >= 0;
154166
}

src/main/java/com/astrazeneca/vardict/Main.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ private void run(CommandLine cmd) throws ParseException, IOException {
9393
conf.columnForChromosome = getColumnValue(cmd, "c", -1);
9494

9595
conf.numberNucleotideToExtend = getIntValue(cmd, "x", 0);
96-
conf.freq = getDoubleValue(cmd, "f", 0.05d);
96+
conf.freq = getDoubleValue(cmd, "f", 0.01d);
9797
conf.minr = getIntValue(cmd, "r", 2);
9898
conf.minb = getIntValue(cmd, "B", 2);
9999
if (cmd.hasOption("Q")) {
100100
conf.mappingQuality = ((Number)cmd.getParsedOptionValue("Q")).intValue();
101101
}
102-
conf.goodq = getIntValue(cmd, "q", 25);
102+
conf.goodq = getDoubleValue(cmd, "q", 22.5);
103103
conf.mismatch = getIntValue(cmd, "m", 8);
104104
conf.trimBasesAfter = getIntValue(cmd, "T", 0);
105105
conf.vext = getIntValue(cmd, "X", 3);
@@ -133,6 +133,15 @@ private void run(CommandLine cmd) throws ParseException, IOException {
133133
conf.includeNInTotalDepth = true;
134134
}
135135

136+
if (cmd.hasOption("chimeric")) {
137+
conf.chimeric = true;
138+
}
139+
140+
141+
if (cmd.hasOption("u")) {
142+
conf.uniqueModeOn = true;
143+
}
144+
136145
conf.threads = Math.max(readThreadsCount(cmd), 1);
137146

138147
VarDict.start(conf);
@@ -181,6 +190,8 @@ private static Options buildOptions() {
181190
options.addOption("t", false, "Indicate to remove duplicated reads. Only one pair with same start positions will be kept");
182191
options.addOption("3", false, "Indicate to move indels to 3-prime if alternative alignment can be achieved.");
183192
options.addOption("K", false, "Include Ns in the total depth calculation");
193+
options.addOption("u", false, "Indicate unique mode, which when mate pairs overlap, the overlapping part will be counted only once using forward read only.");
194+
options.addOption("chimeric", false, "Indicate to turn off chimeric reads filtering.");
184195

185196
options.addOption(OptionBuilder.withArgName("bit")
186197
.hasArg(true)

0 commit comments

Comments
 (0)