Skip to content

Commit 0bd5461

Browse files
committed
implements +tag2tag --gl-to-pl
(via @wkretzsch) Closes samtools#260
1 parent d30dea2 commit 0bd5461

File tree

4 files changed

+93
-0
lines changed

4 files changed

+93
-0
lines changed

plugins/tag2tag.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@ DEALINGS IN THE SOFTWARE. */
3232

3333

3434
#define GP_TO_GL 1
35+
#define GL_TO_PL 2
3536

3637
static int mode = 0, drop_source_tag = 0;
3738
static bcf_hdr_t *in_hdr, *out_hdr;
3839
static float *farr = NULL;
40+
static int32_t *iarr = NULL;
3941
static int mfarr = 0;
4042

4143
const char *about(void)
@@ -55,6 +57,7 @@ const char *usage(void)
5557
"Plugin options:\n"
5658
//todo " --gl-to-gp convert FORMAT/GL to FORMAT/GP\n"
5759
" --gp-to-gl convert FORMAT/GP to FORMAT/GL\n"
60+
" --gl-to-pl convert FORMAT/GL to FORMAT/PL\n"
5861
" -r, --replace drop the source tag\n"
5962
"\n"
6063
"Example:\n"
@@ -77,6 +80,7 @@ int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)
7780
{
7881
{"replace",0,0,'r'},
7982
{"gp-to-gl",0,0,1},
83+
{"gl-to-pl",0,0,2},
8084
{0,0,0,0}
8185
};
8286
char c;
@@ -85,6 +89,7 @@ int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)
8589
switch (c)
8690
{
8791
case 1 : mode = GP_TO_GL; break;
92+
case 2 : mode = GL_TO_PL; break;
8893
case 'r': drop_source_tag = 1; break;
8994
case 'h':
9095
case '?':
@@ -98,6 +103,8 @@ int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)
98103

99104
if ( mode==GP_TO_GL )
100105
init_header(out_hdr,drop_source_tag?"GP":NULL,BCF_HL_FMT,"##FORMAT=<ID=GL,Number=G,Type=Float,Description=\"Genotype Likelihoods\">");
106+
else if ( mode==GL_TO_PL )
107+
init_header(out_hdr,drop_source_tag?"GL":NULL,BCF_HL_FMT,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"Phred scaled genotype likelihoods\">");
101108

102109
return 0;
103110
}
@@ -117,12 +124,39 @@ bcf1_t *process(bcf1_t *rec)
117124
if ( drop_source_tag )
118125
bcf_update_format_float(out_hdr,rec,"GP",NULL,0);
119126
}
127+
else if ( mode==GL_TO_PL )
128+
{
129+
n = bcf_get_format_float(in_hdr,rec,"GL",&farr,&mfarr);
130+
if(n < 0){
131+
fprintf(stderr, "Could not read tag: GL\n");
132+
exit(1);
133+
}
134+
135+
136+
// create extra space to store converted data
137+
iarr = (int32_t*) malloc(n * sizeof(int32_t));
138+
if(!iarr) n = -4;
139+
140+
for (i=0; i<n; i++)
141+
{
142+
if ( bcf_float_is_missing(farr[i]) )
143+
iarr[i] = bcf_int32_missing;
144+
else if ( bcf_float_is_vector_end(farr[i]) )
145+
iarr[i] = bcf_int32_vector_end;
146+
else
147+
iarr[i] = lroundf(-10 * farr[i]);
148+
}
149+
bcf_update_format_int32(out_hdr,rec,"PL",iarr,n);
150+
if ( drop_source_tag )
151+
bcf_update_format_float(out_hdr,rec,"GL",NULL,0);
152+
}
120153
return rec;
121154
}
122155

123156
void destroy(void)
124157
{
125158
free(farr);
159+
free(iarr);
126160
}
127161

128162

test/test.pl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@
150150
test_vcf_plugin($opts,in=>'fixploidy',out=>'fixploidy.out',cmd=>'+fixploidy',args=>'-- -s {PATH}/fixploidy.samples -p {PATH}/fixploidy.ploidy');
151151
test_vcf_plugin($opts,in=>'vcf2sex',out=>'vcf2sex.out',cmd=>'+vcf2sex',args=>'-- -n 5');
152152
test_vcf_plugin($opts,in=>'vcf2sex',out=>'vcf2sex.out',cmd=>'+vcf2sex',args=>'-- -gn 5');
153+
test_vcf_plugin($opts,in=>'view.GL',out=>'view.PL.vcf',cmd=>'+tag2tag',args=>'-- -r --gl-to-pl');
153154
test_vcf_concat($opts,in=>['concat.1.a','concat.1.b'],out=>'concat.1.vcf.out',do_bcf=>0,args=>'');
154155
test_vcf_concat($opts,in=>['concat.1.a','concat.1.b'],out=>'concat.1.bcf.out',do_bcf=>1,args=>'');
155156
test_vcf_concat($opts,in=>['concat.2.a','concat.2.b'],out=>'concat.2.vcf.out',do_bcf=>0,args=>'-a');

test/view.GL.vcf

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
##fileformat=VCFv4.1
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##reference=file:///seq/references/1000Genomes-NCBI37.fasta
4+
##contig=<ID=11,length=135006516>
5+
##contig=<ID=20,length=63025520>
6+
##contig=<ID=X,length=155270560>
7+
##contig=<ID=Y,length=59373566>
8+
##FORMAT=<ID=GL,Number=G,Type=Float,Description="List of Phred-scaled genotype likelihoods">
9+
##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
10+
##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
11+
##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
12+
##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
13+
##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
14+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
15+
11 2343543 . A . 999 PASS . GL 0,-25.5,-25.5 0,-25.5,-25.5 0,-25.5,-25.5
16+
11 5464562 . C T 999 PASS . GL 0,0,0 0,0,0 0,0,0
17+
20 76962 rs6111385 T C 999 PASS . GL -25.5,0,-25.5 -25.5,-25.5,0 -25.5,-25.5,0
18+
20 126310 . ACC A 999 StrandBias;EndDistBias . GL -25.5,0,-13.2 -25.5,0,-13.9 -25.5,-21.3,0
19+
20 138125 rs2298108 G T 999 PASS . GL -13.5,0,-16.3 -14,0,-25.5 -25.5,-19.9,0
20+
20 138148 rs2298109 C T 999 PASS . GL -19.5,0,-25.5 -19.2,0,-25.5 -25.5,-23.5,0
21+
20 271225 . T TTTA,TA 999 StrandBias . GL -15.1,-5.3,-20.3,0,-5.2,-15.9 -25.5,0,-21.3,-25.5,-25.5,-25.5 -25.5,-25.5,-25.5,-25.5,0,-24.1
22+
20 304568 . C T 999 PASS . GL -9.5,0,-25.5 -19.2,0,-25.5 -25.5,-9.5,0
23+
20 326891 . A AC 999 PASS . GL -25.5,0,-13.2 -25.5,0,-13.9 .,.,.
24+
X 2928329 rs62584840 C T 999 PASS . GL 0,-5.6 0,-8.1 -7.3,0,-1.9
25+
X 2933066 rs61746890 G C 999 PASS . GL 0,-25.5 0,-25.5 -25.5,-25.5,-25.5
26+
X 2942109 rs5939407 T C 999 PASS . GL 0,-25.5 -25.5,0 -25.5,-15.7,0
27+
X 3048719 . T C 999 PASS . GL 0,-25.5 -25.5,0 -25.5,0,-15.7
28+
Y 8657215 . C A 999 PASS . GL 0,-25.5 -25.5,0 .
29+
Y 10011673 rs78249411 G A 999 MinAB . GL -12.6,-10.1 -9.5,0 .

test/view.PL.vcf

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
##fileformat=VCFv4.1
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##reference=file:///seq/references/1000Genomes-NCBI37.fasta
4+
##contig=<ID=11,length=135006516>
5+
##contig=<ID=20,length=63025520>
6+
##contig=<ID=X,length=155270560>
7+
##contig=<ID=Y,length=59373566>
8+
##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
9+
##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [1e-100]">
10+
##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
11+
##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
12+
##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
13+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred scaled genotype likelihoods">
14+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
15+
11 2343543 . A . 999 PASS . PL 0,255,255 0,255,255 0,255,255
16+
11 5464562 . C T 999 PASS . PL 0,0,0 0,0,0 0,0,0
17+
20 76962 rs6111385 T C 999 PASS . PL 255,0,255 255,255,0 255,255,0
18+
20 126310 . ACC A 999 StrandBias;EndDistBias . PL 255,0,132 255,0,139 255,213,0
19+
20 138125 rs2298108 G T 999 PASS . PL 135,0,163 140,0,255 255,199,0
20+
20 138148 rs2298109 C T 999 PASS . PL 195,0,255 192,0,255 255,235,0
21+
20 271225 . T TTTA,TA 999 StrandBias . PL 151,53,203,0,52,159 255,0,213,255,255,255 255,255,255,255,0,241
22+
20 304568 . C T 999 PASS . PL 95,0,255 192,0,255 255,95,0
23+
20 326891 . A AC 999 PASS . PL 255,0,132 255,0,139 .,.,.
24+
X 2928329 rs62584840 C T 999 PASS . PL 0,56 0,81 73,0,19
25+
X 2933066 rs61746890 G C 999 PASS . PL 0,255 0,255 255,255,255
26+
X 2942109 rs5939407 T C 999 PASS . PL 0,255 255,0 255,157,0
27+
X 3048719 . T C 999 PASS . PL 0,255 255,0 255,0,157
28+
Y 8657215 . C A 999 PASS . PL 0,255 255,0 .
29+
Y 10011673 rs78249411 G A 999 MinAB . PL 126,101 95,0 .

0 commit comments

Comments
 (0)