@@ -31,9 +31,10 @@ typedef struct reglist
31
31
{
32
32
uint32_t n , m ;
33
33
uint64_t * a ;
34
+ int tid ;
34
35
} reglist_t ;
35
36
36
- KHASH_MAP_INIT_STR (reg , reglist_t )
37
+ KHASH_MAP_INIT_INT (reg , reglist_t )
37
38
typedef kh_reg_t reghash_t ;
38
39
39
40
static int compare_uint64 (const void * a , const void * b )
@@ -52,7 +53,7 @@ static void reg_print(reghash_t *h) {
52
53
reglist_t * p ;
53
54
khint_t k ;
54
55
uint32_t i ;
55
- const char * reg ;
56
+ khint32_t key ;
56
57
uint32_t beg , end ;
57
58
58
59
if (!h ) {
@@ -61,16 +62,16 @@ static void reg_print(reghash_t *h) {
61
62
}
62
63
for (k = kh_begin (h ); k < kh_end (h ); k ++ ) {
63
64
if (kh_exist (h ,k )) {
64
- reg = kh_key (h ,k );
65
- fprintf (stderr , "Region: '%s' \n" , reg );
65
+ key = kh_key (h ,k );
66
+ fprintf (stderr , "Region: key %u tid %d \n" , key , p -> tid );
66
67
if ((p = & kh_val (h ,k )) != NULL && p -> n > 0 ) {
67
68
for (i = 0 ; i < p -> n ; i ++ ) {
68
69
beg = (uint32_t )(p -> a [i ]>>32 );
69
70
end = (uint32_t )(p -> a [i ]);
70
71
fprintf (stderr , "\tinterval[%d]: %d-%d\n" , i , beg , end );
71
72
}
72
73
} else {
73
- fprintf (stderr , "Region '%s' has no intervals!\n" , reg );
74
+ fprintf (stderr , "Region key %u has no intervals!\n" , key );
74
75
}
75
76
}
76
77
}
@@ -109,7 +110,7 @@ static int reg_compact(reghash_t *h) {
109
110
return count ;
110
111
}
111
112
112
- static int reg_insert (reghash_t * h , char * reg , unsigned int beg , unsigned int end ) {
113
+ static int reg_insert (reghash_t * h , int tid , unsigned int beg , unsigned int end ) {
113
114
114
115
khint_t k ;
115
116
reglist_t * p ;
@@ -118,17 +119,15 @@ static int reg_insert(reghash_t *h, char *reg, unsigned int beg, unsigned int en
118
119
return -1 ;
119
120
120
121
// Put reg in the hash table if not already there
121
- k = kh_get (reg , h , reg ); //looks strange, but only the second reg is the actual region name.
122
+ k = kh_get (reg , h , tid );
122
123
if (k == kh_end (h )) { // absent from the hash table
123
124
int ret ;
124
- char * s = strdup (reg );
125
- if (NULL == s ) return -1 ;
126
- k = kh_put (reg , h , s , & ret );
125
+ k = kh_put (reg , h , tid , & ret );
127
126
if (-1 == ret ) {
128
- free (s );
129
127
return -1 ;
130
128
}
131
129
memset (& kh_val (h , k ), 0 , sizeof (reglist_t ));
130
+ kh_val (h , k ).tid = tid ;
132
131
}
133
132
p = & kh_val (h , k );
134
133
@@ -156,7 +155,6 @@ static void reg_destroy(reghash_t *h) {
156
155
for (k = 0 ; k < kh_end (h ); ++ k ) {
157
156
if (kh_exist (h , k )) {
158
157
free (kh_val (h , k ).a );
159
- free ((char * )kh_key (h , k ));
160
158
}
161
159
}
162
160
kh_destroy (reg , h );
@@ -175,11 +173,10 @@ hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr
175
173
hts_reglist_t * h_reglist = NULL ;
176
174
177
175
khint_t k ;
178
- int i , l_count = 0 ;
176
+ int i , l_count = 0 , tid ;
179
177
uint32_t j ;
180
- char reg [1024 ];
181
178
const char * q ;
182
- int beg , end ;
179
+ int64_t beg , end ;
183
180
184
181
/* First, transform the char array into a hash table */
185
182
h = kh_init (reg );
@@ -189,65 +186,56 @@ hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr
189
186
}
190
187
191
188
for (i = 0 ; i < argc ; i ++ ) {
192
- q = hts_parse_reg (argv [i ], & beg , & end );
193
- if (q ) {
194
- if (q - argv [i ] > sizeof (reg ) - 1 ) {
195
- hts_log_error ("Region name '%s' is too long (bigger than %d)" , argv [i ], (int ) sizeof (reg ) - 1 );
196
- continue ;
197
- }
198
- memcpy (reg , argv [i ], q - argv [i ]);
199
- reg [q - argv [i ]] = 0 ;
189
+ if (!strcmp (argv [i ], "." )) {
190
+ q = argv [i ] + 1 ;
191
+ tid = HTS_IDX_START ; beg = 0 ; end = INT64_MAX ;
192
+ } else if (!strcmp (argv [i ], "*" )) {
193
+ q = argv [i ] + 1 ;
194
+ tid = HTS_IDX_NOCOOR ; beg = 0 ; end = INT64_MAX ;
200
195
} else {
201
- // not parsable as a region, but possibly a sequence named "foo:a"
202
- if (strlen (argv [i ]) > sizeof (reg ) - 1 ) {
203
- hts_log_error ("Region name '%s' is too long (bigger than %d)" , argv [i ], (int ) sizeof (reg ) - 1 );
204
- continue ;
205
- }
206
- strcpy (reg , argv [i ]);
207
- beg = 0 ; end = INT_MAX ;
196
+ q = hts_parse_region (argv [i ], & tid , & beg , & end , getid , hdr ,
197
+ HTS_PARSE_THOUSANDS_SEP );
208
198
}
199
+ if (!q ) {
200
+ // not parsable as a region
201
+ hts_log_warning ("Region '%s' specifies an unknown reference name. Continue anyway" , argv [i ]);
202
+ continue ;
203
+ }
204
+
205
+ if (beg > INT_MAX ) beg = INT_MAX ; // Remove when fully 64-bit compliant
206
+ if (end > INT_MAX ) end = INT_MAX ; // Remove when fully 64-bit compliant
209
207
210
- if (reg_insert (h , reg , beg , end ) != 0 ) {
208
+ if (reg_insert (h , tid , beg , end ) != 0 ) {
211
209
hts_log_error ("Error when inserting region='%s' in the bed hash table at address=%p" , argv [i ], (void * ) h );
212
210
goto fail ;
213
211
}
214
212
}
215
213
216
214
* r_count = reg_compact (h );
217
215
if (!* r_count )
218
- return NULL ;
216
+ goto fail ;
219
217
220
218
/* Transform the hash table into a list */
221
219
h_reglist = (hts_reglist_t * )calloc (* r_count , sizeof (hts_reglist_t ));
222
220
if (!h_reglist )
223
- return NULL ;
221
+ goto fail ;
224
222
225
223
for (k = kh_begin (h ); k < kh_end (h ) && l_count < * r_count ; k ++ ) {
226
224
if (!kh_exist (h ,k ) || !(p = & kh_val (h ,k )))
227
225
continue ;
228
226
229
- char * reg_name = (char * )kh_key (h ,k );
230
- if (!strcmp (reg_name , "." )) {
231
- h_reglist [l_count ].tid = HTS_IDX_START ;
232
- } else if (!strcmp (reg_name , "*" )) {
233
- h_reglist [l_count ].tid = HTS_IDX_NOCOOR ;
234
- } else {
235
- h_reglist [l_count ].tid = getid (hdr , reg_name );
236
- if (h_reglist [l_count ].tid < 0 )
237
- hts_log_warning ("Region '%s' specifies an unknown reference name. Continue anyway" , reg_name );
238
- }
239
-
240
- h_reglist [l_count ].intervals = (hts_pair32_t * )calloc (p -> n , sizeof (hts_pair32_t ));
227
+ h_reglist [l_count ].tid = p -> tid ;
228
+ h_reglist [l_count ].intervals = calloc (p -> n , sizeof (h_reglist [l_count ].intervals [0 ]));
241
229
if (!(h_reglist [l_count ].intervals )) {
242
- hts_log_error ("Could not allocate memory for intervals for region='%s'" , kh_key ( h , k ) );
230
+ hts_log_error ("Could not allocate memory for intervals" );
243
231
goto fail ;
244
232
}
245
233
h_reglist [l_count ].count = p -> n ;
246
234
h_reglist [l_count ].max_end = 0 ;
247
235
248
236
for (j = 0 ; j < p -> n ; j ++ ) {
249
237
h_reglist [l_count ].intervals [j ].beg = (uint32_t )(p -> a [j ]>>32 );
250
- h_reglist [l_count ].intervals [j ].end = (uint32_t )(p -> a [j ]);
238
+ h_reglist [l_count ].intervals [j ].end = (uint32_t )(p -> a [j ] & 0xffffffffU );
251
239
252
240
if (h_reglist [l_count ].intervals [j ].end > h_reglist [l_count ].max_end )
253
241
h_reglist [l_count ].max_end = h_reglist [l_count ].intervals [j ].end ;
0 commit comments