Skip to content

Commit 0b0b743

Browse files
committed
Add in-memory I/O
1 parent 19c1894 commit 0b0b743

File tree

4 files changed

+360
-0
lines changed

4 files changed

+360
-0
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ LIBHTS_OBJS = \
148148
faidx.o \
149149
hfile.o \
150150
hfile_net.o \
151+
hfile_mem.o \
151152
hts.o \
152153
md5.o \
153154
probaln.o \
@@ -279,6 +280,7 @@ knetfile.o knetfile.pico: knetfile.c config.h $(htslib_knetfile_h)
279280
hfile.o hfile.pico: hfile.c config.h $(htslib_hfile_h) $(hfile_internal_h) $(hts_internal_h) $(htslib_khash_h)
280281
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hts_internal_h) $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h)
281282
hfile_net.o hfile_net.pico: hfile_net.c config.h $(hfile_internal_h) $(htslib_knetfile_h)
283+
hfile_mem.o hfile_mem.pico: hfile_mem.c config.h $(hfile_internal_h) $(htslib_knetfile_h)
282284
hts.o hts.pico: hts.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) version.h $(hts_internal_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h)
283285
vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_khash_h) $(htslib_kseq_h)
284286
sam.o sam.pico: sam.c config.h $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h)

hfile.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,8 @@ static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *),
714714
return 0;
715715
}
716716

717+
extern int hfile_plugin_init_mem(struct hFILE_plugin *self);
718+
717719
static void load_hfile_plugins()
718720
{
719721
static const struct hFILE_scheme_handler
@@ -726,6 +728,7 @@ static void load_hfile_plugins()
726728
hfile_add_scheme_handler("data", &data);
727729
hfile_add_scheme_handler("file", &file);
728730
init_add_plugin(NULL, hfile_plugin_init_net, "knetfile");
731+
init_add_plugin(NULL, hfile_plugin_init_mem, "mem");
729732

730733
#ifdef ENABLE_PLUGINS
731734
struct hts_path_itr path;

hfile_mem.c

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
/* The MIT License
2+
3+
Copyright (c) 2016 Illumina Cambridge Ltd.
4+
5+
Author: Peter Krusche <[email protected]>
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in
15+
all copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23+
THE SOFTWARE.
24+
*/
25+
26+
#include "htslib/hfile.h"
27+
#include "htslib/hfile_mem.h"
28+
#include "hfile_internal.h"
29+
30+
#include <stdio.h>
31+
#include <string.h>
32+
#include <stdlib.h>
33+
#include <malloc.h>
34+
#include <stdint.h>
35+
36+
37+
static buffer_lookup_fn hfile_mem_lookup_buffer = NULL;
38+
void hfile_mem_set_lookup_function(buffer_lookup_fn fn)
39+
{
40+
hfile_mem_lookup_buffer = fn;
41+
}
42+
43+
44+
typedef struct
45+
{
46+
hFILE base;
47+
char *filename;
48+
char *mode;
49+
size_t buffer_size;
50+
size_t used_size;
51+
off_t offset;
52+
uint8_t *buffer;
53+
int buffer_is_mine;
54+
int write_flag;
55+
} hFILE_mem;
56+
57+
58+
/*
59+
* Implementation
60+
*/
61+
62+
63+
static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)
64+
{
65+
hFILE_mem *fp = (hFILE_mem *) fpv;
66+
const size_t max_read = fp->used_size - fp->offset;
67+
const size_t to_read = max_read < nbytes ? max_read : nbytes;
68+
69+
if(fp->offset >= fp->buffer_size)
70+
{
71+
return 0;
72+
}
73+
memcpy(buffer, fp->buffer + fp->offset, to_read);
74+
fp->offset += to_read;
75+
return to_read;
76+
}
77+
78+
static ssize_t mem_write(hFILE *fpv, const void *buffer, size_t nbytes)
79+
{
80+
hFILE_mem *fp = (hFILE_mem *) fpv;
81+
const ssize_t available = fp->buffer_size - fp->offset;
82+
const size_t round_mask = ((ssize_t) -1) << 10;
83+
void *tmp = NULL;
84+
size_t new_buffer_size;
85+
86+
if(!fp->buffer_is_mine)
87+
{
88+
fprintf(stderr, "[E::mem_file] Cannot write to %s -- I don't own the buffer and can only read.\n",
89+
fp->filename);
90+
return -1;
91+
}
92+
93+
if(available < nbytes)
94+
{
95+
new_buffer_size = (fp->offset + nbytes + 1023) & round_mask;
96+
tmp = realloc(fp->buffer, new_buffer_size) ;
97+
if(!tmp)
98+
{
99+
fprintf(stderr, "[E::mem_file] Cannot write to %s -- failed to allocate memory for %li bytes.\n",
100+
fp->filename, new_buffer_size);
101+
return -1;
102+
}
103+
fp->buffer_size = new_buffer_size;
104+
fp->buffer = tmp;
105+
}
106+
fp->write_flag = 1;
107+
memcpy(fp->buffer + fp->offset, buffer, nbytes);
108+
fp->offset += nbytes;
109+
if(fp->offset > fp->used_size)
110+
{
111+
fp->used_size = (size_t) fp->offset;
112+
}
113+
return nbytes;
114+
}
115+
116+
static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
117+
{
118+
hFILE_mem *fp = (hFILE_mem *) fpv;
119+
if(whence == SEEK_END)
120+
{
121+
fp->offset = (off_t) fp->buffer_size + offset;
122+
return fp->offset;
123+
}
124+
else if(whence == SEEK_CUR)
125+
{
126+
fp->offset += offset;
127+
return fp->offset;
128+
}
129+
else if(whence == SEEK_SET)
130+
{
131+
fp->offset = offset;
132+
return fp->offset;
133+
}
134+
else return -1;
135+
}
136+
137+
static int mem_flush(hFILE *fpv)
138+
{
139+
return 0;
140+
}
141+
142+
static int mem_close(hFILE *fpv)
143+
{
144+
hFILE_mem *fp = (hFILE_mem *) fpv;
145+
if(fp->filename)
146+
{
147+
free(fp->filename);
148+
}
149+
if(fp->mode)
150+
{
151+
free(fp->mode);
152+
}
153+
if(fp->buffer_is_mine && fp->buffer)
154+
{
155+
free(fp->buffer);
156+
}
157+
return 0;
158+
}
159+
160+
static const struct hFILE_backend mem_backend = {
161+
mem_read, mem_write, mem_seek, mem_flush, mem_close
162+
};
163+
164+
hFILE *hopen_mem(const char *filename, const char *mode)
165+
{
166+
hFILE_mem *fp;
167+
FILE *fpr;
168+
size_t len;
169+
170+
const char *realfilename = strchr(filename, ':') + 1;
171+
if(!realfilename)
172+
{
173+
realfilename = filename;
174+
}
175+
fp = (hFILE_mem *) hfile_init(sizeof(hFILE_mem), mode, 0);
176+
if(!fp)
177+
{
178+
fprintf(stderr, "[E::mem_file] Cannot allocate memory for opening %s.\n", filename);
179+
return NULL;
180+
}
181+
182+
fp->base.backend = &mem_backend;
183+
fp->buffer = NULL;
184+
fp->buffer_size = 0;
185+
fp->used_size = 0;
186+
fp->write_flag = 0;
187+
fp->offset = 0;
188+
fp->mode = strdup(mode);
189+
fp->buffer_is_mine = 0;
190+
191+
if(realfilename[0] == '@')
192+
{
193+
if(hfile_mem_lookup_buffer == NULL)
194+
{
195+
fprintf(stderr, "[E::mem_file] Cannot open %s for reading: no lookup function is set.\n", filename);
196+
free(fp);
197+
return NULL;
198+
}
199+
++realfilename;
200+
fp->filename = NULL;
201+
if(hfile_mem_lookup_buffer(realfilename, (void**)&fp->buffer, &fp->buffer_size))
202+
{
203+
free(fp);
204+
return NULL;
205+
}
206+
207+
// fprintf(stderr, "[E::mem_file] Successfully opened %s for reading\n", filename);
208+
fp->used_size = fp->buffer_size;
209+
}
210+
else
211+
{
212+
fp->filename = strdup(realfilename);
213+
214+
if(strchr(mode, 'r'))
215+
{
216+
fpr = fopen(realfilename, mode);
217+
if(!fpr)
218+
{
219+
// fprintf(stderr, "[E::mem_file] Cannot open %s for reading.\n", filename);
220+
// don't write an error, this happens all the time when htslib tries to open a
221+
// csi file that doesn't exist
222+
free(fp);
223+
return NULL;
224+
}
225+
fseek(fpr, 0, SEEK_END);
226+
len = ftell(fpr);
227+
fseek(fpr, 0, SEEK_SET);
228+
fp->buffer = malloc(len);
229+
fp->buffer_is_mine = 1;
230+
if(!fp->buffer)
231+
{
232+
fprintf(stderr, "[E::mem_file] Cannot allocate memory to read %s.\n", filename);
233+
free(fp);
234+
fclose(fpr);
235+
return NULL;
236+
}
237+
if(fread(fp->buffer, 1, len, fpr) != len)
238+
{
239+
fprintf(stderr, "[E::mem_file] cannot read all of %s.\n", filename);
240+
free(fp);
241+
fclose(fpr);
242+
return NULL;
243+
}
244+
fp->buffer_size = len;
245+
fp->used_size = len;
246+
fclose(fpr);
247+
}
248+
else
249+
{
250+
fp->buffer = malloc(1024);
251+
fp->buffer_size = 1024;
252+
fp->buffer_is_mine = 1;
253+
}
254+
}
255+
return &fp->base;
256+
}
257+
258+
int hfile_mem_get_buffer(hFILE * file, void ** buffer, size_t * length)
259+
{
260+
if(file->backend != &mem_backend)
261+
{
262+
fprintf(stderr, "[E::mem_file] Cannot return buffer for hFILE that does not use the memory backend.");
263+
return -1;
264+
}
265+
hFILE_mem *fp = (hFILE_mem *) file;
266+
267+
if(fp->buffer)
268+
{
269+
*buffer = fp->buffer;
270+
*length = fp->used_size;
271+
}
272+
else
273+
{
274+
return -1;
275+
}
276+
return 0;
277+
}
278+
279+
int hfile_plugin_init_mem(struct hFILE_plugin *self)
280+
{
281+
// mem files are declared remote so they work with a tabix index
282+
static const struct hFILE_scheme_handler handler =
283+
{hopen_mem, hfile_always_remote, "mem", 0};
284+
self->name = "mem";
285+
hfile_add_scheme_handler("mem", &handler);
286+
return 0;
287+
}
288+

htslib/hfile_mem.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/* The MIT License
2+
3+
Copyright (c) 2016 Illumina Cambridge Ltd.
4+
5+
Author: Peter Krusche <[email protected]>
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in
15+
all copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23+
THE SOFTWARE.
24+
*/
25+
26+
#ifndef HTSLIB_HFILE_MEM_H
27+
#define HTSLIB_HFILE_MEM_H
28+
29+
#include "hfile.h"
30+
31+
#ifdef __cplusplus
32+
extern "C" {
33+
#endif
34+
35+
/**
36+
* Buffer lookup callback. Given a file name, returns a buffer and size.
37+
*
38+
* When hopen_mem is called to read a file with a name that starts with '@',
39+
* it will use such a function to obtain a buffer pointer. This allows us to
40+
* feed arbitrary memory blocks into htslib for decompression / parsing.
41+
*
42+
* @param name the file / internal handle name.
43+
* @param buffer void pointer that will receive the buffer
44+
* @param length size_t pointer that will receive the length of the data pointed to in buffer
45+
*/
46+
typedef int (*buffer_lookup_fn)(const char * name, void** buffer, size_t * length);
47+
48+
/**
49+
* Set buffer lookup function for memory files.
50+
* @param fn function of type buffer_lookup_fn
51+
*/
52+
extern void hfile_mem_set_lookup_function(buffer_lookup_fn fn);
53+
54+
/**
55+
* Get buffer for a hfile
56+
* @param file the file to use. This should be a hFILE that was opened using hfile_mem
57+
* @param buffer void pointer that will receive the buffer
58+
* @param length size_t pointer that will receive the length of the data pointed to in buffer
59+
*
60+
* @return 0 if successful an error code otherwise
61+
*/
62+
extern int hfile_mem_get_buffer(hFILE * file, void ** buffer, size_t * length);
63+
64+
#ifdef __cplusplus
65+
};
66+
#endif
67+
#endif //HTSLIB_HFILE_MEM_H

0 commit comments

Comments
 (0)