Skip to content

Commit d5a1a0c

Browse files
authored
Merge branch 'master' into qbamfilter_speedup
2 parents 0d32a15 + d2d4aff commit d5a1a0c

File tree

12 files changed

+1434
-475
lines changed

12 files changed

+1434
-475
lines changed

qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,10 @@ public void checkMIUN() {
294294
ConfidenceMode.checkMIUN(new String[]{"A"}, 90, "A3;C8", sb, 2, 3, null);
295295
assertEquals("", sb.toString());
296296

297+
sb = new StringBuilder();
298+
ConfidenceMode.checkMIUN(new String[]{"AA"}, 90, "AA13;AC8", sb, 2, 3, null);
299+
assertEquals("MIUN", sb.toString());
300+
297301
//
298302
}
299303

@@ -811,6 +815,28 @@ public void realLifeMIUN2() {
811815
assertEquals("PASS", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER));
812816
assertEquals("PASS", vcf.getSampleFormatRecord(3).getField(VcfHeaderUtils.FORMAT_FILTER));
813817
assertEquals("PASS", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER));
818+
819+
}
820+
821+
@Test
822+
public void compoundSnpMIUN() {
823+
/*
824+
chr1 205931 . AA GG . . . GT:AD:DP:FF:FT:INF:NNS:OABS 0/0:6,0:6:AA47;AT1;A_1;GG3:.:.:.:AA3[]3[] 0/1:20,4:25:AA182;GG2;TA1:.:SOMATIC:4:AA
825+
7[]13[];GA1[]0[];GG0[]4[]
826+
*/
827+
VcfRecord vcf = VcfUtils.createVcfRecord(ChrPositionUtils.getChrPosition("chr1", 205931, 205931), ".", "CA", "GG");
828+
vcf.setInfo("FLANK=GTAAAACTGGA;BaseQRankSum=0.325;ClippingRankSum=0.000;DP=58;ExcessHet=3.0103;FS=4.683;MQ=55.10;MQRankSum=-6.669;QD=4.63;ReadPosRankSum=-0.352;SOR=1.425;IN=1;DB;VLD;HOM=3,TATATGTAAAgCTGGATTAAT;EFF=downstream_gene_variant(MODIFIER||914|||MST1P2|unprocessed_pseudogene|NON_CODING|ENST00000457982||1),intergenic_region(MODIFIER||||||||||1)");
829+
vcf.setFormatFields(java.util.Arrays.asList(
830+
"GT:AD:DP:FF:FT:INF:NNS:OABS",
831+
"0/0:36,0:36:CA12;GG4;_A2:.:.:.:CA17[]19[];C_1[]0[]",
832+
"0/1:102,11:114:AA1;CA30;CC1;CT1;C_3;GG50;GT1:.:SOMATIC:10:AA1[]0[];CA61[]41[];GG4[]7[];G_1[]0[];_A0[]2[]"));
833+
ConfidenceMode cm = new ConfidenceMode(TWO_SAMPLE_ONE_CALLER_META);
834+
cm.positionRecordMap.put(vcf.getChrPosition(), List.of(vcf));
835+
cm.addAnnotation();
836+
vcf = cm.positionRecordMap.get(vcf.getChrPosition()).getFirst();
837+
assertEquals("MIUN", vcf.getSampleFormatRecord(1).getField(VcfHeaderUtils.FORMAT_FILTER));
838+
assertEquals("PASS", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER));
839+
814840
}
815841

816842
@Test
@@ -1345,6 +1371,30 @@ public void confidenceRealLifeMerged9() {
13451371
assertEquals("PASS", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER));
13461372
}
13471373

1374+
@Test
1375+
public void realLifeCSMIUN() {
1376+
/*
1377+
chr1 11445731 rs386628485 AG GC . . IN=1;DB;HOM=0,ACAGAGAGACagAGAGTCAGAG GT:AD:DP:FF:FT:INF:NNS:OABS 0/0:18,1:20:AC1;AG6;AGC1;AT1;A_1;CC1;GC7:PASS:.:.:AG7[]11[];GC0[]1[];GG0
1378+
[]1[];_C1[]0[] 0/1:32,4:36:AA1;AG11;A_1;GA1;GC18;G_1:MR:SOMATIC:4:AG21[]11[];A_1[]0[];GC2[]2[];_C1[]0[] ./.:.:.:.:COV:.:.:. ./.:.:.:.:COV:.:.:.
1379+
*/
1380+
VcfRecord vcf = new VcfRecord(new String[]{"chr1", "11445731", "rs386628485", "AG", "GC", ".", ".", "IN=1;DB;HOM=0,ACAGAGAGACagAGAGTCAGAG"
1381+
, "GT:AD:DP:FF:FT:INF:NNS:OABS"
1382+
, "0/0:18,1:20:AC1;AG6;AGC1;AT1;A_1;CC1;GC7:.:.:.:AG7[]11[];GC0[]1[];GG0[]1[];_C1[]0[]"
1383+
, "0/1:32,4:36:AA1;AG11;A_1;GA1;GC18;G_1:.:SOMATIC:4:AG21[]11[];A_1[]0[];GC2[]2[];_C1[]0[]"
1384+
, "./.:.:.:.:.:.:.:."
1385+
, "./.:.:.:.:.:.:.:."});
1386+
ConfidenceMode cm = new ConfidenceMode(TWO_SAMPLE_TWO_CALLER_META);
1387+
cm.positionRecordMap.put(vcf.getChrPosition(), List.of(vcf));
1388+
cm.addAnnotation();
1389+
1390+
vcf = cm.positionRecordMap.get(vcf.getChrPosition()).getFirst();
1391+
assertEquals("MIUN", vcf.getSampleFormatRecord(1).getField(VcfHeaderUtils.FORMAT_FILTER));
1392+
assertEquals("MR", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER));
1393+
assertEquals("COV", vcf.getSampleFormatRecord(3).getField(VcfHeaderUtils.FORMAT_FILTER));
1394+
assertEquals("COV", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER));
1395+
1396+
}
1397+
13481398
@Test
13491399
public void applyMRFilter() {
13501400
assertFalse(ConfidenceMode.applyMutantReadFilter(null, null, -1));

qcommon/src/org/qcmg/common/model/Accumulator.java

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ public class Accumulator {
4343

4444
private final int position;
4545

46-
private short failedFilterACount = 0;
47-
private short failedFilterCCount = 0;
48-
private short failedFilterGCount = 0;
49-
private short failedFilterTCount = 0;
46+
private TLongList failedFilterACount;
47+
private TLongList failedFilterCCount;
48+
private TLongList failedFilterGCount;
49+
private TLongList failedFilterTCount;
5050

5151
private TLongList readNameHashStrandBasePositionQualities;
5252

@@ -58,19 +58,23 @@ public int getPosition() {
5858
return position;
5959
}
6060

61-
public void addFailedFilterBase(final byte base) {
61+
public void addFailedFilterBase(final byte base, long readNameHash) {
6262
switch (base) {
6363
case A_BYTE:
64-
failedFilterACount++;
64+
if (null == failedFilterACount) failedFilterACount = new TLongArrayList();
65+
failedFilterACount.add(readNameHash);
6566
break;
6667
case C_BYTE:
67-
failedFilterCCount++;
68+
if (null == failedFilterCCount) failedFilterCCount = new TLongArrayList();
69+
failedFilterCCount.add(readNameHash);
6870
break;
6971
case G_BYTE:
70-
failedFilterGCount++;
72+
if (null == failedFilterGCount) failedFilterGCount = new TLongArrayList();
73+
failedFilterGCount.add(readNameHash);
7174
break;
7275
case T_BYTE:
73-
failedFilterTCount++;
76+
if (null == failedFilterTCount) failedFilterTCount = new TLongArrayList();
77+
failedFilterTCount.add(readNameHash);
7478
break;
7579
default: /* do nothing */
7680
break;
@@ -120,17 +124,17 @@ public String toString() {
120124

121125
public String getFailedFilterPileup() {
122126
StringBuilder sb = new StringBuilder();
123-
if (failedFilterACount > 0) {
124-
StringUtils.updateStringBuilder(sb, A_STRING + failedFilterACount, Constants.SEMI_COLON);
127+
if (null != failedFilterACount && ! failedFilterACount.isEmpty()) {
128+
StringUtils.updateStringBuilder(sb, A_STRING + failedFilterACount.size(), Constants.SEMI_COLON);
125129
}
126-
if (failedFilterCCount > 0) {
127-
StringUtils.updateStringBuilder(sb, C_STRING + failedFilterCCount, Constants.SEMI_COLON);
130+
if (null != failedFilterCCount && ! failedFilterCCount.isEmpty()) {
131+
StringUtils.updateStringBuilder(sb, C_STRING + failedFilterCCount.size(), Constants.SEMI_COLON);
128132
}
129-
if (failedFilterGCount > 0) {
130-
StringUtils.updateStringBuilder(sb, G_STRING + failedFilterGCount, Constants.SEMI_COLON);
133+
if (null != failedFilterGCount && ! failedFilterGCount.isEmpty()) {
134+
StringUtils.updateStringBuilder(sb, G_STRING + failedFilterGCount.size(), Constants.SEMI_COLON);
131135
}
132-
if (failedFilterTCount > 0) {
133-
StringUtils.updateStringBuilder(sb, T_STRING + failedFilterTCount, Constants.SEMI_COLON);
136+
if (null != failedFilterTCount && ! failedFilterTCount.isEmpty()) {
137+
StringUtils.updateStringBuilder(sb, T_STRING + failedFilterTCount.size(), Constants.SEMI_COLON);
134138
}
135139
return !sb.isEmpty() ? sb.toString() : Constants.MISSING_DATA_STRING;
136140
}
@@ -140,4 +144,23 @@ public int getCoverage() {
140144
return null == readNameHashStrandBasePositionQualities ? 0 : readNameHashStrandBasePositionQualities.size() / 2;
141145
}
142146

147+
public boolean isEmpty() {
148+
return null == readNameHashStrandBasePositionQualities && null == failedFilterACount && null == failedFilterCCount && null == failedFilterGCount && null == failedFilterTCount;
149+
}
150+
151+
public TLongList getFailedFilterACount() {
152+
return failedFilterACount;
153+
}
154+
155+
public TLongList getFailedFilterCCount() {
156+
return failedFilterCCount;
157+
}
158+
159+
public TLongList getFailedFilterGCount() {
160+
return failedFilterGCount;
161+
}
162+
163+
public TLongList getFailedFilterTCount() {
164+
return failedFilterTCount;
165+
}
143166
}

qcommon/src/org/qcmg/common/util/AccumulatorUtils.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ public class AccumulatorUtils {
6060
public static final long T_BASE_BIT = 0x400000000000000L;
6161
public static final int T_BASE_BIT_POSITION = 58;
6262

63-
6463
public static final long STRAND_BIT = 0x8000000000000000L;
6564
public static final int STRAND_BIT_POSITION = 63;
6665
public static final long END_OF_READ_BIT = 0x4000000000000000L;
@@ -69,7 +68,7 @@ public class AccumulatorUtils {
6968

7069
/**
7170
* This removes reads that have the same read name hash from the accumulator.
72-
* If
71+
* <p>
7372
* If the duplicates have the same base, then 1 is left, if they have different bases, they are both (all?) removed
7473
* <p>
7574
* This method updates the Accumulator object that is passed in, and is therefore not side-effect free
@@ -236,6 +235,7 @@ public static int[] getBaseCountByStrand(int[] array, char c) {
236235
* strand (bit 63)
237236
* end of read (bit 62)
238237
* base (bits 58-61)
238+
* passedFilter (bit 57)
239239
* quality (bits 32-40)
240240
* position (bits 0-31)
241241
*
@@ -1014,8 +1014,9 @@ public static TLongIntMap getReadNameHashStartPositionMap(Accumulator acc) {
10141014
if (null != acc) {
10151015
TLongList list = acc.getData();
10161016
if (null != list) {
1017-
TLongIntMap map = new TLongIntHashMap(list.size() * 2);
1018-
for (int i = 0, len = list.size(); i < len; i += 2) {
1017+
int len = list.size();
1018+
TLongIntMap map = new TLongIntHashMap(len);
1019+
for (int i = 0; i < len; i += 2) {
10191020

10201021
int startPosition = (int) list.get(i + 1);
10211022
if (((list.get(i + 1) >>> STRAND_BIT_POSITION) & 1) == 0) {
@@ -1038,8 +1039,9 @@ public static TLongCharMap getReadNameHashBaseMap(Accumulator acc) {
10381039
if (null != acc) {
10391040
TLongList list = acc.getData();
10401041
if (null != list) {
1041-
TLongCharMap map = new TLongCharHashMap(list.size() * 2);
1042-
for (int i = 0, len = list.size(); i < len; i += 2) {
1042+
int len = list.size();
1043+
TLongCharMap map = new TLongCharHashMap(len);
1044+
for (int i = 0; i < len; i += 2) {
10431045

10441046
char base = getBaseAsCharFromLong(list.get(i + 1));
10451047
if (((list.get(i + 1) >>> STRAND_BIT_POSITION) & 1) == 0) {

qcommon/test/org/qcmg/common/model/AccumulatorTest.java

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,12 @@ public void endOfReads() {
8181
public void testUnfilteredPileup() {
8282
Accumulator acc = new Accumulator(1);
8383
String basesString = "ACGT";
84-
for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b);
84+
long readNameHash = 1;
85+
for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash);
8586
assertEquals("A1;C1;G1;T1", acc.getFailedFilterPileup());
86-
for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b);
87+
for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash + 1);
8788
assertEquals("A2;C2;G2;T2", acc.getFailedFilterPileup());
88-
for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b);
89+
for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash + 2);
8990
assertEquals("A3;C3;G3;T3", acc.getFailedFilterPileup());
9091
}
9192

@@ -97,16 +98,17 @@ public void testUnfilteredPileupPercentage() {
9798
}
9899

99100
String basesString = "GG";
101+
long readNameHash = 1;
100102
for (byte b : basesString.getBytes()) {
101-
acc.addFailedFilterBase(b);
103+
acc.addFailedFilterBase(b, readNameHash++);
102104
}
103105
assertEquals("G2", acc.getFailedFilterPileup());
104106
/*
105107
* need 3 percent
106108
*/
107109
basesString = "G";
108110
for (byte b : basesString.getBytes()) {
109-
acc.addFailedFilterBase(b);
111+
acc.addFailedFilterBase(b, readNameHash++);
110112
}
111113
assertEquals("G3", acc.getFailedFilterPileup());
112114
}
@@ -115,31 +117,32 @@ public void testUnfilteredPileupPercentage() {
115117
@Test
116118
public void singleUnfilteredPileup() {
117119
Accumulator acc = new Accumulator(1);
118-
for (byte b : "ACGT".getBytes()) acc.addFailedFilterBase(b);
120+
long readNameHash = 1;
121+
for (byte b : "ACGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
119122
assertEquals("A1;C1;G1;T1", acc.getFailedFilterPileup());
120123

121124
acc = new Accumulator(1);
122-
for (byte b : "ACGTA".getBytes()) acc.addFailedFilterBase(b);
125+
for (byte b : "ACGTA".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
123126
assertEquals("A2;C1;G1;T1", acc.getFailedFilterPileup());
124127

125128
acc = new Accumulator(1);
126-
for (byte b : "ACCGT".getBytes()) acc.addFailedFilterBase(b);
129+
for (byte b : "ACCGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
127130
assertEquals("A1;C2;G1;T1", acc.getFailedFilterPileup());
128131

129132
acc = new Accumulator(1);
130-
for (byte b : "ATTTGT".getBytes()) acc.addFailedFilterBase(b);
133+
for (byte b : "ATTTGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
131134
assertEquals("A1;G1;T4", acc.getFailedFilterPileup());
132135

133136
acc = new Accumulator(1);
134-
for (byte b : "AAAATTTGT".getBytes()) acc.addFailedFilterBase(b);
137+
for (byte b : "AAAATTTGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
135138
assertEquals("A4;G1;T4", acc.getFailedFilterPileup());
136139

137140
acc = new Accumulator(1);
138-
for (byte b : "AAAACTTTCGT".getBytes()) acc.addFailedFilterBase(b);
141+
for (byte b : "AAAACTTTCGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
139142
assertEquals("A4;C2;G1;T4", acc.getFailedFilterPileup());
140143

141144
acc = new Accumulator(1);
142-
for (byte b : "AAAACTTTCGTG".getBytes()) acc.addFailedFilterBase(b);
145+
for (byte b : "AAAACTTTCGTG".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
143146
assertEquals("A4;C2;G2;T4", acc.getFailedFilterPileup());
144147
}
145148

@@ -217,7 +220,7 @@ public void getGenotypeRealLife() {
217220
Accumulator acc = new Accumulator(1);
218221
for (int i = 1; i <= 60; i++) acc.addBase((byte) 'G', (byte) 40, false, 1, 1, 2, i);
219222
for (int i = 1; i <= 5; i++) acc.addBase((byte) 'C', (byte) 42, false, 1, 1, 2, i + 61);
220-
for (int i = 1; i <= 1; i++) acc.addBase((byte) 'C', (byte) 42, true, 1, 1, 2, i + 67);
223+
for (int i = 1; i == 1; i++) acc.addBase((byte) 'C', (byte) 42, true, 1, 1, 2, i + 67);
221224

222225
assertEquals("C1[42]5[42];G0[0]60[40]", AccumulatorUtils.getOABS(acc));
223226
/*

0 commit comments

Comments
 (0)