@@ -3336,50 +3336,26 @@ static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p
3336
3336
3337
3337
v -> n_info = 0 ;
3338
3338
if (* (q - 1 ) == ';' ) * (q - 1 ) = 0 ;
3339
-
3340
- // Parsing consists of processing key=value; or key; so we only need
3341
- // to track the next = and ; symbols, plus end of string.
3342
- // We could process 1 char at a time, but this is inefficient compared
3343
- // to strchr which can process word by word. Hence even doing two strchrs
3344
- // is quicker than byte by byte processing.
3345
- char * next_equals = strchr (p , '=' );
3346
- char * next_semicolon = strchr (p , ';' );
3347
- r = p ;
3348
- while (* r ) {
3349
- // Look for key=value or just key.
3350
- char * val , * end , * from ;
3351
- key = r ;
3352
- if (next_equals && (!next_semicolon || next_equals < next_semicolon )) {
3353
- // key=value;
3354
- * next_equals = 0 ;
3355
- from = val = next_equals + 1 ;
3356
- // Prefetching d->keys[hash] helps here provided we avoid
3357
- // computing hash twice (needs API change), but not universally.
3358
- // It may be significant for other applications though so it's
3359
- // something to consider for the future.
3360
- } else {
3361
- // key;
3362
- val = NULL ;
3363
- from = key ;
3364
- }
3365
-
3366
- // Update location of next ; and if used =
3367
- if (next_semicolon ) {
3368
- end = next_semicolon ;
3369
- r = end + 1 ;
3370
- next_semicolon = strchr (end + 1 , ';' );
3371
- if (val )
3372
- next_equals = strchr (end , '=' );
3373
- } else {
3374
- // find nul location, starting from key or val.
3375
- r = end = from + strlen (from );
3339
+ for (r = key = p ;; ++ r ) {
3340
+ int c ;
3341
+ char * val , * end ;
3342
+ while (* r > '=' || (* r != ';' && * r != '=' && * r != 0 )) r ++ ;
3343
+ if (v -> n_info == UINT16_MAX ) {
3344
+ hts_log_error ("Too many INFO entries at %s:%" PRIhts_pos ,
3345
+ bcf_seqname_safe (h ,v ), v -> pos + 1 );
3346
+ v -> errcode |= BCF_ERR_LIMITS ;
3347
+ goto fail ;
3376
3348
}
3349
+ val = end = NULL ;
3350
+ c = * r ; * r = 0 ;
3351
+ if (c == '=' ) {
3352
+ val = r + 1 ;
3377
3353
3378
- * end = 0 ;
3379
-
3380
- // We've now got key and val (maybe NULL), so process it
3354
+ for (end = val ; * end != ';' && * end != 0 ; ++ end );
3355
+ c = * end ; * end = 0 ;
3356
+ } else end = r ;
3357
+ if ( !* key ) { if (c == 0 ) break ; r = end ; key = r + 1 ; continue ; } // faulty VCF, ";;" in the INFO
3381
3358
k = kh_get (vdict , d , key );
3382
- // 15 is default (unknown) type. See bcf_idinfo_def at top
3383
3359
if (k == kh_end (d ) || kh_val (d , k ).info [BCF_HL_INFO ] == 15 )
3384
3360
{
3385
3361
hts_log_warning ("INFO '%s' is not defined in the header, assuming Type=String" , key );
@@ -3481,8 +3457,8 @@ static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p
3481
3457
} else {
3482
3458
bcf_enc_vint (str , n_val , a_val , -1 );
3483
3459
}
3484
- if (n_val == 1 && (val1 != bcf_int32_missing || is_int64 ) &&
3485
- memcmp (key , "END" , 4 ) == 0 )
3460
+ if (n_val == 1 && (val1 != bcf_int32_missing || is_int64 )
3461
+ && memcmp (key , "END" , 4 ) == 0 )
3486
3462
{
3487
3463
if ( val1 <= v -> pos )
3488
3464
{
@@ -3508,6 +3484,9 @@ static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p
3508
3484
bcf_enc_vfloat (str , n_val , val_f );
3509
3485
}
3510
3486
}
3487
+ if (c == 0 ) break ;
3488
+ r = end ;
3489
+ key = r + 1 ;
3511
3490
}
3512
3491
3513
3492
free (a_val );
0 commit comments