Skip to content

Commit 4361a3d

Browse files
committed
Add hts_parse_decimal() and hts_parse_region() flags parameter [DRAFT]
Add the first flag, HTS_PARSE_THOUSANDS_SEP. [IN PROGRESS] Need to figure out whether hts_parse_region() is workable with a strend argument and the possibility of colons in chromosome names...
1 parent 306664a commit 4361a3d

File tree

4 files changed

+49
-29
lines changed

4 files changed

+49
-29
lines changed

hts.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,7 +1824,7 @@ static inline long long push_digit(long long i, char c)
18241824
return 10 * i + digit;
18251825
}
18261826

1827-
long long hts_parse_decimal(const char *str, char **end)
1827+
long long hts_parse_decimal(const char *str, char **strend, int flags)
18281828
{
18291829
long long n = 0;
18301830
int decimals = 0, e = 0, lost = 0;
@@ -1837,7 +1837,7 @@ long long hts_parse_decimal(const char *str, char **end)
18371837
if (*s == '+' || *s == '-') sign = *s++;
18381838
while (*s)
18391839
if (isdigit(*s)) n = push_digit(n, *s++);
1840-
else if (*s == ',') s++;
1840+
else if (*s == ',' && (flags & HTS_PARSE_THOUSANDS_SEP)) s++;
18411841
else break;
18421842

18431843
if (*s == '.') {
@@ -1860,7 +1860,7 @@ long long hts_parse_decimal(const char *str, char **end)
18601860
fprintf(stderr, "[W::%s] discarding fractional part of %.*s\n",
18611861
__func__, (int)(s - str), str);
18621862

1863-
if (end) *end = (char *) s;
1863+
if (strend) *strend = (char *) s;
18641864
else if (*s && hts_verbose >= 2)
18651865
fprintf(stderr, "[W::%s] ignoring unknown characters after %.*s[%s]\n",
18661866
__func__, (int)(s - str), str, s);
@@ -1869,6 +1869,12 @@ long long hts_parse_decimal(const char *str, char **end)
18691869
}
18701870

18711871
const char *hts_parse_reg(const char *s, int *beg, int *end)
1872+
{
1873+
return hts_parse_region(s, NULL, beg, end, HTS_PARSE_THOUSANDS_SEP);
1874+
}
1875+
1876+
const char *
1877+
hts_parse_region(const char *s, char **strend, int *beg, int *end, int flags)
18721878
{
18731879
char *hyphen;
18741880
const char *colon = strrchr(s, ':');
@@ -1877,11 +1883,12 @@ const char *hts_parse_reg(const char *s, int *beg, int *end)
18771883
return s + strlen(s);
18781884
}
18791885

1880-
*beg = hts_parse_decimal(colon+1, &hyphen) - 1;
1886+
*beg = hts_parse_decimal(colon+1, &hyphen, flags) - 1;
18811887
if (*beg < 0) *beg = 0;
18821888

1889+
// FIXME \0 vs. return NULL
18831890
if (*hyphen == '\0') *end = INT_MAX;
1884-
else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, NULL);
1891+
else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, strend, flags);
18851892
else return NULL;
18861893

18871894
if (*beg >= *end) return NULL;

htslib/hts.h

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -477,27 +477,40 @@ hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx);
477477
int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped);
478478
uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx);
479479

480+
481+
#define HTS_PARSE_THOUSANDS_SEP 1 ///< Ignore ',' separators within numbers
482+
480483
/// Parse a numeric string
481-
/** The number may be expressed in scientific notation, and may contain commas
482-
in the integer part (before any decimal point or E notation).
483-
@param str String to be parsed
484-
@param end If non-NULL, set on return to point to the first character
485-
in @a str after those forming the parsed number
484+
/** The number may be expressed in scientific notation, and optionally may
485+
contain commas in the integer part (before any decimal point or E notation).
486+
@param str String to be parsed
487+
@param strend If non-NULL, set on return to point to the first character
488+
in @a str after those forming the parsed number
489+
@param flags Or'ed-together combination of HTS_PARSE_* flags
486490
@return Converted value of the parsed number.
487491
488-
When @a end is NULL, a warning will be printed (if hts_verbose is 2
492+
When @a strend is NULL, a warning will be printed (if hts_verbose is 2
489493
or more) if there are any trailing characters after the number.
490494
*/
491-
long long hts_parse_decimal(const char *str, char **end);
495+
long long hts_parse_decimal(const char *str, char **strend, int flags);
496+
497+
/// Equivalent to hts_parse_region(str, NULL, beg, end, HTS_PARSE_THOUSANDS_SEP)
498+
const char *hts_parse_reg(const char *str, int *beg, int *end);
492499

493500
/// Parse a "CHR:START-END"-style region string
494-
/** @param str String to be parsed
495-
@param beg Set on return to the 0-based start of the region
496-
@param end Set on return to the 1-based end of the region
497-
@return Pointer to the colon or '\0' after the reference sequence name,
498-
or NULL if @a str could not be parsed.
501+
/** @param str String to be parsed
502+
@param strend If non-NULL, set on return to point to the first character
503+
in @a str after those forming the parsed region
504+
@param beg Set on return to the 0-based start of the region
505+
@param end Set on return to the 1-based end of the region
506+
@param flags Or'ed-together combination of HTS_PARSE_* flags
507+
@return Pointer to the colon or terminating character after the reference
508+
sequence name, or NULL if @a str could not be parsed.
509+
510+
When @a strend is NULL, a warning will be printed (if hts_verbose is 2
511+
or more) if there are any trailing characters after the region string.
499512
*/
500-
const char *hts_parse_reg(const char *str, int *beg, int *end);
513+
const char *hts_parse_region(const char *str, char **strend, int *beg, int *end, int flags);
501514

502515
hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);
503516
void hts_itr_destroy(hts_itr_t *iter);

regidx.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,11 +297,11 @@ int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, reg_t *re
297297
*chr_end = se-1;
298298

299299
ss = se+1;
300-
reg->start = hts_parse_decimal(ss, &se);
300+
reg->start = hts_parse_decimal(ss, &se, 0);
301301
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }
302302

303303
ss = se+1;
304-
reg->end = hts_parse_decimal(ss, &se) - 1;
304+
reg->end = hts_parse_decimal(ss, &se, 0) - 1;
305305
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }
306306

307307
return 0;
@@ -322,15 +322,15 @@ int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, reg_t *re
322322
*chr_end = se-1;
323323

324324
ss = se+1;
325-
reg->start = hts_parse_decimal(ss, &se) - 1;
325+
reg->start = hts_parse_decimal(ss, &se, 0) - 1;
326326
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }
327327

328328
if ( !se[0] || !se[1] )
329329
reg->end = reg->start;
330330
else
331331
{
332332
ss = se+1;
333-
reg->end = hts_parse_decimal(ss, &se);
333+
reg->end = hts_parse_decimal(ss, &se, 0);
334334
if ( ss==se ) reg->end = reg->start;
335335
else reg->end--;
336336
}

synced_bcf_reader.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -887,7 +887,7 @@ static bcf_sr_regions_t *_regions_init_string(const char *str)
887887
if ( *ep==':' )
888888
{
889889
sp = ep+1;
890-
from = hts_parse_decimal(sp,(char**)&ep);
890+
from = hts_parse_decimal(sp,(char**)&ep,0);
891891
if ( sp==ep )
892892
{
893893
fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
@@ -906,7 +906,7 @@ static bcf_sr_regions_t *_regions_init_string(const char *str)
906906
}
907907
ep++;
908908
sp = ep;
909-
to = hts_parse_decimal(sp,(char**)&ep);
909+
to = hts_parse_decimal(sp,(char**)&ep,0);
910910
if ( *ep && *ep!=',' )
911911
{
912912
fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
@@ -953,15 +953,15 @@ static int _regions_parse_line(char *line, int ichr,int ifrom,int ito, char **ch
953953
if ( i<=k ) return -1;
954954
if ( k==l )
955955
{
956-
*from = *to = hts_parse_decimal(ss, &tmp);
956+
*from = *to = hts_parse_decimal(ss, &tmp, 0);
957957
if ( tmp==ss ) return -1;
958958
}
959959
else
960960
{
961961
if ( k==ifrom )
962-
*from = hts_parse_decimal(ss, &tmp);
962+
*from = hts_parse_decimal(ss, &tmp, 0);
963963
else
964-
*to = hts_parse_decimal(ss, &tmp);
964+
*to = hts_parse_decimal(ss, &tmp, 0);
965965
if ( ss==tmp ) return -1;
966966

967967
for (i=k; i<l && *se; i++)
@@ -971,9 +971,9 @@ static int _regions_parse_line(char *line, int ichr,int ifrom,int ito, char **ch
971971
}
972972
if ( i<l ) return -1;
973973
if ( k==ifrom )
974-
*to = hts_parse_decimal(ss, &tmp);
974+
*to = hts_parse_decimal(ss, &tmp, 0);
975975
else
976-
*from = hts_parse_decimal(ss, &tmp);
976+
*from = hts_parse_decimal(ss, &tmp, 0);
977977
if ( ss==tmp ) return -1;
978978
}
979979

0 commit comments

Comments
 (0)