Skip to content

Commit 0ca17a0

Browse files
pd3mcshane
authored andcommitted
Handle VCF lines with misssing FORMAT=.
For example, this is a valid VCF line ``` 1 300 . C A . PASS . . . . ``` Previously this would emit a warning saying: `[W::vcf_parse_format] FORMAT '.' is not defined in the header, assuming Type=String` and internally we would have a new `FORMAT=.` tag. This will now be recognised as missing. htslib already writes out such lines when `n_fmt == 0` and `n_samples > 0` Mixing missing and non-missing FORMAT tags (e.g. `.:GT` or `GT:.:AD`) is not allowed. See conversation in #409
1 parent bf75336 commit 0ca17a0

File tree

4 files changed

+27
-1
lines changed

4 files changed

+27
-1
lines changed

test/formatmissing-out.vcf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
##fileformat=VCFv4.3
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##contig=<ID=1>
4+
##FORMAT=<ID=S,Number=1,Type=String,Description="Text">
5+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3
6+
1 100 a A T . . . . . . .

test/formatmissing.vcf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
##fileformat=VCFv4.3
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##contig=<ID=1>
4+
##FORMAT=<ID=S,Number=1,Type=String,Description="Text">
5+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3
6+
1 100 a A T . . . . . . .

test/test.pl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ sub test_vcf_various
311311
cmd => "$$opts{bin}/htsfile -c $$opts{path}/formatcols.vcf");
312312
test_cmd($opts, %args, out => "noroundtrip-out.vcf",
313313
cmd => "$$opts{bin}/htsfile -c $$opts{path}/noroundtrip.vcf");
314+
test_cmd($opts, %args, out => "formatmissing-out.vcf",
315+
cmd => "$$opts{bin}/htsfile -c $$opts{path}/formatmissing.vcf");
314316
}
315317

316318
sub test_rebgzip

vcf.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1618,8 +1618,14 @@ static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, char *p
16181618
return -1;
16191619
}
16201620

1621-
// get format information from the dictionary
16221621
v->n_fmt = 0;
1622+
if ( p[0]=='.' && p[1]==0 ) // FORMAT field is empty "."
1623+
{
1624+
v->n_sample = bcf_hdr_nsamples(h);
1625+
return 0;
1626+
}
1627+
1628+
// get format information from the dictionary
16231629
for (j = 0, t = kstrtok(p, ":", &aux1); t; t = kstrtok(0, 0, &aux1), ++j) {
16241630
if (j >= MAX_N_FMT) {
16251631
v->errcode |= BCF_ERR_LIMITS;
@@ -1630,6 +1636,12 @@ static int vcf_parse_format(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, char *p
16301636
*(char*)aux1.p = 0;
16311637
k = kh_get(vdict, d, t);
16321638
if (k == kh_end(d) || kh_val(d, k).info[BCF_HL_FMT] == 15) {
1639+
if ( t[0]=='.' && t[1]==0 )
1640+
{
1641+
fprintf(stderr, "[E::%s] Invalid FORMAT tag name '.'\n", __func__);
1642+
v->errcode |= BCF_ERR_TAG_INVALID;
1643+
return -1;
1644+
}
16331645
if (hts_verbose >= 2) fprintf(stderr, "[W::%s] FORMAT '%s' is not defined in the header, assuming Type=String\n", __func__, t);
16341646
kstring_t tmp = {0,0,0};
16351647
int l;

0 commit comments

Comments
 (0)