Skip to content

Commit 1c3ae1f

Browse files
committed
int_vector_mapper now supports RAM-files
1 parent d6c0f5a commit 1c3ae1f

13 files changed

+530
-316
lines changed

include/sdsl/config.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
#include <map>
66
#include <string>
77

8+
#ifndef MSVC_COMPILER
9+
#define SDSL_UNUSED __attribute__ ((unused))
10+
#else
11+
#define SDSL_UNUSED
12+
#endif
13+
814
namespace sdsl
915
{
1016
namespace conf // namespace for library constant
@@ -35,6 +41,7 @@ enum byte_sa_algo_type {LIBDIVSUFSORT, SE_SAIS};
3541
//! Helper class for construction process
3642
struct cache_config {
3743
bool delete_files; // Flag which indicates if all files which were created
44+
bool delete_data; // Flag which indicates if the original data can be deleted
3845
// during construction should be deleted.
3946
std::string dir; // Directory for temporary files.
4047
std::string id; // Identifier is part of temporary file names. If

include/sdsl/int_vector.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ class int_vector
590590
}
591591

592592
//! Read the size and int_width of a int_vector
593-
static void read_header(int_vector_size_type& size, int_width_type& int_width, std::istream& in)
593+
static size_t read_header(int_vector_size_type& size, int_width_type& int_width, std::istream& in)
594594
{
595595
uint64_t width_and_size = 0;
596596
read_member(width_and_size, in);
@@ -604,6 +604,7 @@ class int_vector
604604
std::cerr << " was specified as " << (size_type)read_int_width << std::endl;
605605
std::cerr << "Length is " << size << " bits" << std::endl;
606606
}
607+
return sizeof(width_and_size);
607608
}
608609

609610
//! Write the size and int_width of a int_vector

include/sdsl/int_vector_mapper.hpp

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class int_vector_mapper
2020
typedef typename int_vector<t_width>::value_type value_type;
2121
typedef typename int_vector<t_width>::size_type size_type;
2222
typedef typename int_vector<t_width>::int_width_type width_type;
23+
static constexpr uint8_t fixed_int_width = t_width;
2324
public:
2425
const size_type append_block_size = 1000000;
2526
private:
@@ -38,31 +39,31 @@ class int_vector_mapper
3839
~int_vector_mapper()
3940
{
4041
if (m_mapped_data) {
41-
if (t_mode&std::ios_base::out) { // write was possible
42-
if (m_data_offset) {
43-
// update size in the on disk representation and
44-
// truncate if necessary
45-
uint64_t* size_in_file = (uint64_t*)m_mapped_data;
46-
if (*size_in_file != m_wrapper.m_size) {
47-
*size_in_file = m_wrapper.m_size;
48-
}
49-
if (t_width==0) {
50-
// if size is variable and we map a sdsl vector
51-
// we might have to update the stored width
52-
uint8_t stored_width = m_mapped_data[8];
53-
if (stored_width != m_wrapper.m_width) {
54-
m_mapped_data[8] = m_wrapper.m_width;
55-
}
56-
}
57-
}
58-
}
59-
60-
auto ret = memory_manager::mem_unmap(m_mapped_data,m_file_size_bytes);
42+
auto ret = memory_manager::mem_unmap(m_fd,m_mapped_data,m_file_size_bytes);
6143
if (ret != 0) {
6244
std::cerr << "int_vector_mapper: error unmapping file mapping'"
6345
<< m_file_name << "': " << ret << std::endl;
6446
}
6547

48+
if (t_mode&std::ios_base::out) { // write was possible
49+
if (m_data_offset) { // if the file is not a plain file
50+
// set std::ios::in to not truncate the file
51+
osfstream out(m_file_name, std::ios::in);
52+
if ( out ) {
53+
out.seekp(0, std::ios::beg);
54+
int_vector<t_width>::write_header(m_wrapper.m_size,
55+
m_wrapper.m_width,
56+
out);
57+
58+
// out.seekp(0, std::ios::end);
59+
} else {
60+
throw std::runtime_error("int_vector_mapper: \
61+
could not open file for header update");
62+
}
63+
}
64+
}
65+
66+
6667
if (t_mode&std::ios_base::out) {
6768
// do we have to truncate?
6869
size_type current_bit_size = m_wrapper.m_size;
@@ -95,6 +96,7 @@ class int_vector_mapper
9596
m_wrapper.m_data = nullptr;
9697
m_wrapper.m_size = 0;
9798
}
99+
98100
int_vector_mapper(int_vector_mapper&& ivm)
99101
{
100102
m_wrapper.m_data = ivm.m_wrapper.m_data;
@@ -107,6 +109,7 @@ class int_vector_mapper
107109
ivm.m_mapped_data = nullptr;
108110
ivm.m_fd = -1;
109111
}
112+
110113
int_vector_mapper& operator=(int_vector_mapper&& ivm)
111114
{
112115
m_wrapper.m_data = ivm.m_wrapper.m_data;
@@ -120,32 +123,35 @@ class int_vector_mapper
120123
ivm.m_fd = -1;
121124
return (*this);
122125
}
126+
123127
int_vector_mapper(const std::string& key,const cache_config& config)
124128
: int_vector_mapper(cache_file_name(key, config)) {}
125129

126130

127131
int_vector_mapper(const std::string filename,
128132
bool is_plain = false,
129133
bool delete_on_close = false) :
134+
m_data_offset(0),
130135
m_file_name(filename), m_delete_on_close(delete_on_close)
131136
{
132137
size_type size_in_bits = 0;
133138
uint8_t int_width = t_width;
134139
{
135-
std::ifstream f(filename,std::ifstream::binary);
140+
isfstream f(filename,std::ifstream::binary);
136141
if (!f.is_open()) {
137142
throw std::runtime_error(
138-
"int_vector_mapper: file does not exist.");
143+
"int_vector_mapper: file "+
144+
m_file_name +
145+
" does not exist.");
139146
}
140147
if (!is_plain) {
141-
int_vector<t_width>::read_header(size_in_bits, int_width, f);
148+
m_data_offset = int_vector<t_width>::read_header(size_in_bits, int_width, f);
142149
}
143150
}
151+
144152
m_file_size_bytes = util::file_size(m_file_name);
145153

146-
if (!is_plain) {
147-
m_data_offset = 8;
148-
} else {
154+
if (is_plain) {
149155
if (8 != t_width and 16 != t_width and 32 != t_width and 64 != t_width) {
150156
throw std::runtime_error("int_vector_mapper: plain vector can "
151157
"only be of width 8, 16, 32, 64.");
@@ -158,7 +164,6 @@ class int_vector_mapper
158164
}
159165
}
160166
size_in_bits = m_file_size_bytes * 8;
161-
m_data_offset = 0;
162167
}
163168

164169
// open backend file depending on mode
@@ -170,6 +175,7 @@ class int_vector_mapper
170175
throw std::runtime_error(open_error);
171176
}
172177

178+
173179
// prepare for mmap
174180
m_wrapper.width(int_width);
175181
// mmap data
@@ -203,7 +209,7 @@ class int_vector_mapper
203209
size_type new_size_in_bytes = ((bit_size + 63) >> 6) << 3;
204210
if (m_file_size_bytes != new_size_in_bytes + m_data_offset) {
205211
if (m_mapped_data) {
206-
auto ret = memory_manager::mem_unmap(m_mapped_data,m_file_size_bytes);
212+
auto ret = memory_manager::mem_unmap(m_fd,m_mapped_data,m_file_size_bytes);
207213
if (ret != 0) {
208214
std::cerr << "int_vector_mapper: error unmapping file mapping'"
209215
<< m_file_name << "': " << ret << std::endl;
@@ -349,7 +355,7 @@ class temp_file_buffer
349355
throw std::runtime_error("could not create temporary file.");
350356
}
351357
#else
352-
sprintf(tmp_file_name, "%s/tmp_mapper_file_%lu_XXXXXX.sdsl",dir.c_str(),util::pid());
358+
sprintf(tmp_file_name, "%s/tmp_mapper_file_%llu_XXXXXX.sdsl",dir.c_str(),util::pid());
353359
int fd = mkstemps(tmp_file_name,5);
354360
if (fd == -1) {
355361
throw std::runtime_error("could not create temporary file.");
@@ -386,7 +392,7 @@ class temp_file_buffer
386392

387393
// creates emtpy int_vector<> that will not be deleted
388394
template <uint8_t t_width = 0>
389-
class write_out_buffer
395+
class write_out_mapper
390396
{
391397
public:
392398
static int_vector_mapper<t_width> create(const std::string& key,cache_config& config)
@@ -403,6 +409,15 @@ class write_out_buffer
403409
store_to_file(tmp_vector,file_name);
404410
return int_vector_mapper<t_width,std::ios_base::out|std::ios_base::in>(file_name,false,false);
405411
}
412+
static int_vector_mapper<t_width> create(const std::string& file_name,size_t size, uint8_t int_width = t_width)
413+
{
414+
//write empty int_vector to init the file
415+
int_vector<t_width> tmp_vector(0,0,int_width);
416+
store_to_file(tmp_vector,file_name);
417+
int_vector_mapper<t_width,std::ios_base::out|std::ios_base::in> mapper(file_name,false,false);
418+
mapper.resize(size);
419+
return mapper;
420+
}
406421
};
407422

408423
template<std::ios_base::openmode t_mode = std::ios_base::out|std::ios_base::in>

0 commit comments

Comments
 (0)