diff --git a/CDB_File.pm b/CDB_File.pm index 69b54a2..269e01d 100644 --- a/CDB_File.pm +++ b/CDB_File.pm @@ -16,10 +16,12 @@ CDB_File - Perl extension for access to cdb databases =head1 SYNOPSIS use CDB_File; - $c = tie(%h, 'CDB_File', 'file.cdb') or die "tie failed: $!\n"; - # If accessing a utf8 stored CDB_File - $c = tie(%h, 'CDB_File', 'file.cdb', utf8 => 1) or die "tie failed: $!\n"; + # If accessing a bytes/Latin-1 CDB file: + $c = tie(%h, 'CDB_File', 'file.cdb', string_mode => 'latin1') or die "tie failed: $!\n"; + + # If accessing a utf8 stored CDB file: + $c = tie(%h, 'CDB_File', 'file.cdb', string_mode => 'utf8') or die "tie failed: $!\n"; $fh = $c->handle; sysseek $fh, $c->datapos, 0 or die ...; @@ -27,7 +29,7 @@ CDB_File - Perl extension for access to cdb databases undef $c; untie %h; - $t = CDB_File->new('t.cdb', "t.$$") or die ...; + $t = CDB_File->new('t.cdb', "t.$$", string_mode => 'latin1') or die ...; $t->insert('key', 'value'); $t->finish; @@ -36,10 +38,11 @@ CDB_File - Perl extension for access to cdb databases or use CDB_File 'create'; - create %t, $file, "$file.$$"; + create %t, $file, "$file.$$", string_mode => 'latin1'; + + # If you want to store the data UTF-8 encoded: + create %t, $file, "$file.$$", string_mode => 'utf8'; - # If you want to store the data in utf8 mode. - create %t, $file, "$file.$$", utf8 => 1; =head1 DESCRIPTION B is a module which provides a Perl interface to Dan @@ -86,25 +89,54 @@ C<$final> containing the contents of C<%t>. As before, C<$tmp> must name a temporary file which can be atomically renamed to C<$final>. C may be imported. -=head2 UTF8 support. - -When CDB_File was created in 1997 (prior even to Perl 5.6), Perl SVs -didn't really deal with UTF8. In order to properly store mixed -bytes and utf8 data in the file, we would normally need to store a bit -for each string which clarifies the encoding of the key / values. -This would be useful since Perl hash keys are downgraded to bytes when -possible so as to normalize the hash key access regardless of encoding. - -The CDB_File format is used outside of Perl and so must maintain file -format compatibility with those systems. As a result this module provides -a utf8 mode which must be enabled at database generation and then later -at read. Keys will always be stored as UTF8 strings which is the opposite -of how Perl stores the strings. This approach had to be taken to assure no -data corruption happened due to accidentally downgraded SVs before they -are stored or on retrieval. - -You can enable utf8 mode by passing C 1> to B, B, -or B. All returned SVs while in this mode will be encoded in utf8. +=head2 String Modes + +When CDB_File was created in 1997 (prior even to Perl 5.6), Perl strings +were simple byte strings. It thus made sense, when exporting strings, +simply to save the Perl interpreter’s internal string representation. + +In modern perls, though, strings are ordered arrays of code points. +Perl doesn’t store those code points in a predictable internal encoding; +thus, if we use the old behavior of exporting Perl’s internal +representation, we’ll have unpredictable results. + +Sadly, this status quo must remain our default behavior; however, newer +code should fix the situation by passing a C parameter +to C, C, or C with one of the following values: + +=over + +=item * C - Similar to legacy behavior, but all strings are +saved and imported as Latin-1. Any attempt to save a string that contains +a code point that Latin-1 can’t accommodate—i.e., a code point that exceeds +255—will trigger an exception. + +Likewise, any lookup on a string that includes a >255 code point will +trigger an exception. + +This is suitable for “byte strings”, i.e., strings whose code points +represent raw octets. This is the default state for Perl strings, and +it’s also what you’ll have if you’ve encoded your strings for output +prior to sending them to CDB_File. + +=item * C - All strings are stored as UTF-8. Additionally, when +reading a CDB file, all strings are also I as UTF-8. Any strings +in the CDB file that may not be valid UTF-8 will trigger an exception. +This is suitable for decoded strings that you have I encoded prior to +sending them to CDB_File. + +=item * C - Just like C, but this skips the UTF-8 +validity check. This can be marginally faster than C, but if any +strings are invalid UTF-8 then Perl’s internals may be corrupted. Avoid +this mode unless you trust what you’re loading. + +=back + +You can also pass a C of C to indicate the legacy +behavior explicitly. + +Note that the above applies to hash keys as well as to values. + This feature is not available below 5.14 due to lack of Perl macro support. B read/write of databases not stored in utf8 mode will often be diff --git a/CDB_File.xs b/CDB_File.xs index 4bf6a1e..ae39c45 100644 --- a/CDB_File.xs +++ b/CDB_File.xs @@ -72,7 +72,14 @@ EINVAL. */ #endif #if PERL_VERSION_LE(5,13,7) - #define CDB_FILE_HAS_UTF8_HASH_MACROS + #define CDB_FILE_LACKS_UTF8_HASH_MACROS +#endif + +#ifdef CDB_FILE_LACKS_UTF8_HASH_MACROS + #define CROAK_IF_NO_UTF8_SUPPORT \ + croak("utf8 CDB_Files are not supported below Perl 5.14"); +#else + #define CROAK_IF_NO_UTF8_SUPPORT #endif #if defined(SV_COW_REFCNT_MAX) @@ -101,6 +108,15 @@ struct t_string_finder { }; typedef struct t_string_finder string_finder; +typedef enum { + STRING_MODE_SV, + STRING_MODE_UTF8, + STRING_MODE_LATIN1, + STRING_MODE_UTF8_NAIVE, +} string_mode_t; + +#define STRING_MODE_FLAG_UTF8 1 + struct t_cdb { PerlIO *fh; /* */ @@ -109,7 +125,7 @@ struct t_cdb { #endif U32 end; /* If non zero, the file offset of the first byte of hash tables. */ - bool is_utf8; /* will we be reading in utf8 encoded data? If so we'll set SvUTF8 = true; */ + string_mode_t string_mode; /* The output mode for strings */ string_finder curkey; /* While iterating: the current key; */ STRLEN curkey_allocated; U32 curpos; /* the file offset of the current record. */ @@ -126,6 +142,8 @@ struct t_cdb { typedef struct t_cdb cdb; +#define CDB_IS_UTF8(cdb) (cdb->string_mode & STRING_MODE_FLAG_UTF8) + #define CDB_HPLIST 1000 struct cdb_hp { U32 h; U32 p; }; @@ -138,7 +156,7 @@ struct cdb_hplist { struct t_cdb_make { PerlIO *f; /* Handle of file being created. */ - bool is_utf8; /* Coerce the PV to utf8 before writing out the data? */ + string_mode_t string_mode; /* The output mode for strings */ char *fn; /* Final name of file. */ char *fntemp; /* Temporary name of file. */ char final[2048]; @@ -163,6 +181,22 @@ static void readerror() { croak("Read of CDB_File failed: %s", Strerror(errno)); static void nomem() { croak("Out of memory!"); } +static inline void _reconcile_new_sv_with_utf8 (cdb *c, SV *sv) { + switch (c->string_mode) { + case STRING_MODE_UTF8: + if ( !sv_utf8_decode(sv) ) + croak("Invalid UTF-8 sequence detected!"); + break; + + case STRING_MODE_UTF8_NAIVE: + SvUTF8_on(sv); + break; + + default: + break; + } +} + static inline SV * sv_from_datapos(cdb *c, STRLEN len) { SV *sv; char *buf; @@ -170,14 +204,15 @@ static inline SV * sv_from_datapos(cdb *c, STRLEN len) { sv = newSV(len + 1 + CDB_CAN_COW); SvPOK_on(sv); CDB_DO_COW(sv); - if(c->is_utf8) - SvUTF8_on(sv); + buf = SvPVX(sv); if (cdb_read(c, buf, len, cdb_datapos(c)) == -1) readerror(); buf[len] = '\0'; SvCUR_set(sv, len); + _reconcile_new_sv_with_utf8(c, sv); + return sv; } @@ -186,8 +221,8 @@ static inline SV * sv_from_curkey (cdb *c) { sv = newSV(c->curkey.len + 1 + CDB_CAN_COW); sv_setpvn(sv, c->curkey.pv, c->curkey.len); CDB_DO_COW(sv); - if(c->is_utf8) - SvUTF8_on(sv); + + _reconcile_new_sv_with_utf8(c, sv); return sv; } @@ -336,9 +371,10 @@ static bool cdb_key_eq (string_finder *left, string_finder *right) { static int match(cdb *c, string_finder *to_find, U32 pos) { string_finder nextkey; + nextkey.is_utf8 = CDB_IS_UTF8(c); + #ifdef HASMMAP /* We don't have to allocate any memory if we're using mmap. */ - nextkey.is_utf8 = c->is_utf8; SET_FINDER_LEN(nextkey, to_find->len); nextkey.pv = cdb_map_addr(c, to_find->len, pos); return cdb_key_eq(&nextkey, to_find); @@ -348,7 +384,6 @@ static int match(cdb *c, string_finder *to_find, U32 pos) { int len; char static_buffer[CDB_MATCH_BUFFER]; - nextkey.is_utf8 = c->is_utf8; SET_FINDER_LEN(nextkey, to_find->len); len = nextkey.len; @@ -513,6 +548,58 @@ static void iter_end(cdb *c) { } } +#define DOWNGRADE_SV_IF_NEEDED(cdbthing, sv) \ + if ((cdbthing->string_mode == STRING_MODE_LATIN1) && SvUTF8(sv) && !sv_utf8_downgrade(sv, TRUE)) { \ + croak("Wide character given; Latin-1 mode requires all code points to be 0-255!"); \ + } + +static inline void _string_finder_init( cdb *c, SV *k, string_finder *to_find ) { + DOWNGRADE_SV_IF_NEEDED(c, k); + + to_find->pv = CDB_IS_UTF8(c) ? SvPVutf8(k, to_find->len) : SvPV(k, to_find->len); + to_find->hash = 0; + to_find->is_utf8 = CDB_IS_UTF8(c); +} + +static string_mode_t _parse_string_mode( const char *option_key, const char *option_value ) { + string_mode_t string_mode = STRING_MODE_SV; + + if(strEQ("string_mode", option_key)) { + if (option_value == NULL) { + croak("Need value for “string_mode”!"); + } + else if (strEQ("utf8", option_value)) { + CROAK_IF_NO_UTF8_SUPPORT; + string_mode = STRING_MODE_UTF8; + } + else if (strEQ("latin1", option_value)) { + string_mode = STRING_MODE_LATIN1; + } + else if (strEQ("utf8_naive", option_value)) { + CROAK_IF_NO_UTF8_SUPPORT; + string_mode = STRING_MODE_UTF8_NAIVE; + } + else if (!strEQ("sv", option_value)) { + croak("Bad “string_mode”: %s", option_value); + } + } + + return string_mode; +} + +#define CDB_SVPV_AFTER_POSSIBLE_DOWNGRADE(cdb_maybe_make, sv, len) ( \ + CDB_IS_UTF8(cdb_maybe_make) \ + ? SvPVutf8(sv, len) \ + : (cdb_maybe_make->string_mode == STRING_MODE_LATIN1) \ + ? SvPVbyte(sv, len) \ + : SvPV(sv, len) \ +) + +static char *_cdb_svpv( cdb_make *this, SV* sv, STRLEN* len ) { + DOWNGRADE_SV_IF_NEEDED(this, sv); + return CDB_SVPV_AFTER_POSSIBLE_DOWNGRADE(this, sv, *len); +} + typedef PerlIO * InputStream; MODULE = CDB_File PACKAGE = CDB_File PREFIX = cdb_ @@ -560,27 +647,20 @@ cdb_datapos(db) RETVAL cdb * -cdb_TIEHASH(CLASS, filename, option_key="", is_utf8=FALSE) +cdb_TIEHASH(CLASS, filename, option_key="", const char *option_value=NULL) char *CLASS char *filename char *option_key - bool is_utf8 PREINIT: PerlIO *f; - bool utf8_chosen = FALSE; CODE: - if(strlen(option_key) == 4 && strnEQ("utf8", option_key, 4) && is_utf8 ) -#ifdef CDB_FILE_HAS_UTF8_HASH_MACROS - croak("utf8 CDB_Files are not supported below Perl 5.14"); -#else - utf8_chosen = TRUE; -#endif + string_mode_t string_mode = _parse_string_mode(option_key, option_value); Newxz(RETVAL, 1, cdb); RETVAL->fh = f = PerlIO_open(filename, "rb"); - RETVAL->is_utf8 = utf8_chosen; + RETVAL->string_mode = string_mode; if (!f) XSRETURN_NO; @@ -618,12 +698,12 @@ cdb_FETCH(this, k) CODE: if (!SvOK(k)) { + + /* Should this not mimic Perl and at least warn? */ XSRETURN_UNDEF; } - to_find.pv = this->is_utf8 ? SvPVutf8(k, to_find.len) : SvPV(k, to_find.len); - to_find.hash = 0; - to_find.is_utf8 = this->is_utf8 && SvUTF8(k); + _string_finder_init( this, k, &to_find ); /* Already advanced to the key we need. */ if (this->end && cdb_key_eq(&this->curkey, &to_find)) { @@ -707,16 +787,18 @@ cdb_multi_get(this, k) CODE: if (!SvOK(k)) { + + /* Should this not mimic Perl and at least warn? */ XSRETURN_UNDEF; } + cdb_findstart(this); + + _string_finder_init( this, k, &to_find); + RETVAL = newAV(); sv_2mortal((SV *)RETVAL); - to_find.pv = this->is_utf8 ? SvPVutf8(k, to_find.len) : SvPV(k, to_find.len); - to_find.hash = 0; - to_find.is_utf8 = SvUTF8(k); - for (;;) { found = cdb_findnext(this, &to_find); if ((found != 0) && (found != 1)) @@ -746,9 +828,7 @@ cdb_EXISTS(this, k) XSRETURN_NO; } - to_find.pv = this->is_utf8 ? SvPVutf8(k, to_find.len) : SvPV(k, to_find.len); - to_find.hash = 0; - to_find.is_utf8 = SvUTF8(k); + _string_finder_init( this, k, &to_find ); RETVAL = cdb_find(this, &to_find); if (RETVAL != 0 && RETVAL != 1) @@ -809,9 +889,7 @@ cdb_NEXTKEY(this, k) XSRETURN_UNDEF; } - to_find.pv = this->is_utf8 ? SvPVutf8(k, to_find.len) : SvPV(k, to_find.len); - to_find.hash = 0; - to_find.is_utf8 = SvUTF8(k); + _string_finder_init( this, k, &to_find ); /* Sometimes NEXTKEY gets called before FIRSTKEY if the hash * gets re-tied so we call iter_start() anyway here */ @@ -831,28 +909,22 @@ cdb_NEXTKEY(this, k) RETVAL cdb_make * -cdb_new(CLASS, fn, fntemp, option_key="", is_utf8=FALSE) +cdb_new(CLASS, fn, fntemp, option_key="", const char* option_value=NULL) char * CLASS char * fn char * fntemp char * option_key - bool is_utf8; PREINIT: cdb_make *cdbmake; bool utf8_chosen = FALSE; CODE: - if(strlen(option_key) == 4 && strnEQ("utf8", option_key, 4) && is_utf8 ) -#ifdef CDB_FILE_HAS_UTF8_HASH_MACROS - croak("utf8 CDB_Files are not supported below Perl 5.14"); -#else - utf8_chosen = TRUE; -#endif + string_mode_t string_mode = _parse_string_mode(option_key, option_value); Newxz(cdbmake, 1, cdb_make); cdbmake->f = PerlIO_open(fntemp, "wb"); - cdbmake->is_utf8 = utf8_chosen; + cdbmake->string_mode = string_mode; if (!cdbmake->f) XSRETURN_UNDEF; @@ -896,15 +968,12 @@ cdbmaker_insert(this, ...) PREINIT: char *kp, *vp, packbuf[8]; int x; - bool is_utf8; STRLEN klen, vlen; U32 h; SV *k; SV *v; PPCODE: - is_utf8 = this->is_utf8; - for (x = 1; x < items; x += 2) { k = ST(x); v = ST(x+1); @@ -919,8 +988,8 @@ cdbmaker_insert(this, ...) v = sv_2mortal(newSVpv("", 0)); } - kp = is_utf8 ? SvPVutf8(k, klen) : SvPV(k, klen); - vp = is_utf8 ? SvPVutf8(v, vlen) : SvPV(v, vlen); + kp = _cdb_svpv( this, k, &klen ); + vp = _cdb_svpv( this, v, &vlen ); uint32_pack(packbuf, klen); uint32_pack(packbuf + 4, vlen); diff --git a/Changelog b/Changelog index 09f8169..cbd797f 100644 --- a/Changelog +++ b/Changelog @@ -1,5 +1,11 @@ Revision history for Perl extension CDB_File. +1.06 +- Change course w/ utf8 mode: instead, provide a string_mode that allows + strings to be considered either Latin-1 strings or character strings. + This breaks compatibility with anything that adopted the utf8 flag added + in 1.04, but since that was so recent this seems an acceptable risk. + 1.05 - Todd Rinaldo 2020-12-11 - Additional tests for known corner cases. - Use Perl_warn not warn in XS. diff --git a/t/clear.t b/t/clear.t index 800ba04..6a3a7b0 100644 --- a/t/clear.t +++ b/t/clear.t @@ -18,11 +18,11 @@ use CDB_File; my ( $db, $db_tmp ) = get_db_file_pair(1); my %a = qw(one Hello two Goodbye); -eval { CDB_File::create( %a, $db->filename, $db_tmp->filename, 'utf8' => 1 ) or die "Failed to create cdb: $!" }; +eval { CDB_File::create( %a, $db->filename, $db_tmp->filename, string_mode => 'utf8' ) or die "Failed to create cdb: $!" }; is( "$@", '', "Create cdb" ); # Test that good file works. -tie( my %h, "CDB_File", $db->filename, 'utf8' => 0 ) and pass("Test that good file works"); +tie( my %h, "CDB_File", $db->filename, string_mode => 'sv' ) and pass("Test that good file works"); like exception { delete $h{'one'} }, qr{^\QModification of a CDB_File attempted at t/clear.t\E}, "Test dies if you try to delete a key in a tied hash"; like exception { $h{'one'} = 5 }, qr{^\QModification of a CDB_File attempted at t/clear.t\E}, "Test dies if you try to modify a key in a tied hash"; diff --git a/t/empty.t b/t/empty.t index 04dff73..3eb0b7b 100644 --- a/t/empty.t +++ b/t/empty.t @@ -16,7 +16,7 @@ my ( $db, $db_tmp ) = get_db_file_pair(1); my $db_file = $db->filename; eval { - my $t = CDB_File->new( $db_file, $db_tmp->filename, utf8 => 0 ) or die "Failed to create cdb: $!"; + my $t = CDB_File->new( $db_file, $db_tmp->filename, string_mode => 'sv' ) or die "Failed to create cdb: $!"; $t->finish; }; diff --git a/t/latin1.t b/t/latin1.t new file mode 100644 index 0000000..5653dde --- /dev/null +++ b/t/latin1.t @@ -0,0 +1,71 @@ +#!perl + +use strict; +use warnings; + +use FindBin; +use lib "$FindBin::Bin/lib"; +use Helpers; # Local helper routines used by the test suite. + +use Test::More; + +plan tests => 6; + +use CDB_File; + +my $good = join( q<>, map { chr } 0 .. 255 ); + +my %a = qw(one Hello two Goodbye); +$a{'good'} = $good; + +$a{'good2'} = $good; +utf8::upgrade($a{'good2'}); + +{ + my ( $db, $db_tmp ) = get_db_file_pair(1); + + eval { CDB_File::create( %a, $db->filename, $db_tmp->filename, string_mode => 'latin1' ) or die "Failed to create cdb: $!" }; + is( "$@", '', "Create cdb OK when contents are all bytes" ); +} + +my $bad = chr( 256 ); +$a{'bad'} = $bad; + +{ + my ( $db, $db_tmp ) = get_db_file_pair(1); + + eval { CDB_File::create( %a, $db->filename, $db_tmp->filename, string_mode => 'latin1' ) or die "Failed to create cdb: $!" }; + my $err = $@; + isnt( $err, q<>, 'An error happens if we try to store a >255 code point.' ); +} + +{ + my ( $db, $db_tmp ) = get_db_file_pair(1); + + CDB_File::create( my %a, $db->filename, $db_tmp->filename ); + + my %h; + + # Test that good file works. + ok( + tie( %h, "CDB_File", $db->filename, string_mode => 'latin1' ), + 'tie() succeeds', + ); + + eval { $h{'bad'} = $bad }; + my $err = $@; + + isnt( $err, q<>, 'An error happens if we try to store a >255 code point in a tied hash.' ); + + eval { my $foo = $h{$bad} }; + $err = $@; + + isnt( $err, q<>, 'An error happens if we try to look up a value whose key contains a >255 code point.' ); + + eval { exists $h{$bad} }; + $err = $@; + + isnt( $err, q<>, 'An error happens if we try to existence-check a key contains a >255 code point.' ); +} + +1; diff --git a/t/not_utf8.t b/t/not_utf8.t index 25ebe4e..5226571 100644 --- a/t/not_utf8.t +++ b/t/not_utf8.t @@ -26,7 +26,7 @@ utf8::upgrade($leon); my %a = qw(one Hello two Goodbye); eval { - my $t = CDB_File->new( $db->filename, $db_tmp->filename, utf8 => 0 ) or die "Failed to create cdb: $!"; + my $t = CDB_File->new( $db->filename, $db_tmp->filename, string_mode => 'sv' ) or die "Failed to create cdb: $!"; $t->insert(%a); $t->insert( $avar, $leon ); $t->insert( $latin_avar, 12345 ); @@ -37,7 +37,7 @@ is( "$@", '', "Create cdb" ); my %h; # Test that good file works. -tie( %h, "CDB_File", $db->filename, 'utf8' => 0 ) and pass("Test that good file works"); +tie( %h, "CDB_File", $db->filename, string_mode => 'sv' ) and pass("Test that good file works"); is $h{$avar}, $leon_not_encoded_but_not_utf8, "Access a utf8 key and get back the utf8 sequence but without the utf8 flag."; is( utf8::is_utf8( $h{$avar} ), '', "\$latin_avar is does not have the utf8 flag on." ); is $h{$latin_avar}, 12345, "Access of the latin1 key is not normalized so we get the alternate value."; diff --git a/t/undef.t b/t/undef.t index 583e128..36b1b97 100644 --- a/t/undef.t +++ b/t/undef.t @@ -34,7 +34,7 @@ use CDB_File; eval { note "Test undef insert"; my ( $db, $db_tmp ) = get_db_file_pair(1); - my $t = CDB_File->new( $db->filename, $db_tmp->filename, utf8 => 0 ) or die "Failed to create cdb: $!"; + my $t = CDB_File->new( $db->filename, $db_tmp->filename, string_mode => 'sv' ) or die "Failed to create cdb: $!"; like( warning { $t->insert( "efg", undef ) }, qr/^undef values cannot be stored in CDB_File\. Storing an empty string instead at /, "Undef values are warned." ); like( warning { $t->insert( undef, "abcd" ) }, qr{^Use of uninitialized value in hash key at }, "undef keys get a warnings too." ); diff --git a/t/utf8.t b/t/utf8.t index 2fc5538..3bd7d8f 100644 --- a/t/utf8.t +++ b/t/utf8.t @@ -26,14 +26,14 @@ utf8::upgrade($leon); my %a = qw(one Hello two Goodbye); $a{$avar} = $leon; -eval { CDB_File::create( %a, $db->filename, $db_tmp->filename, 'utf8' => 1 ) or die "Failed to create cdb: $!" }; +eval { CDB_File::create( %a, $db->filename, $db_tmp->filename, string_mode => 'utf8' ) or die "Failed to create cdb: $!" }; is( "$@", '', "Create cdb" ); my %h; # Test that good file works. -tie( %h, "CDB_File", $db->filename, 'utf8' => 1 ) and pass("Test that good file works"); -is $h{$avar}, $leon, "Access a utf8 key"; +tie( %h, "CDB_File", $db->filename, string_mode => 'utf8' ) and pass("Test that good file works"); +is $h{$avar}, $leon, "Access a utf8 key" or diag explain; is $h{$latin_avar}, $leon, "Access a utf8 key using its latin1 record."; is( utf8::is_utf8($latin_avar), '', "\$latin_avar is not converted to utf8" ); diff --git a/t/utf8_invalid.t b/t/utf8_invalid.t new file mode 100644 index 0000000..8889121 --- /dev/null +++ b/t/utf8_invalid.t @@ -0,0 +1,43 @@ +#!perl + +use strict; +use warnings; + +use FindBin; +use lib "$FindBin::Bin/lib"; +use Helpers; # Local helper routines used by the test suite. + +use Test::More; + +plan( skip_all => "utf8 macro support requires > 5.13.7" ) if $] < '5.013007'; +plan tests => 3; + +use CDB_File; + +my ( $db, $db_tmp ) = get_db_file_pair(1); + +my %data = ( nonutf8 => "\xff\xfe" ); + +CDB_File::create( %data, $db->filename, $db_tmp->filename, string_mode => 'latin1' ); + +{ + my %h; + tie %h, "CDB_File", $db->filename, string_mode => 'utf8'; + + eval { my $foo = $h{'nonutf8'} }; + my $err = $@; + + like $err, qri, '“utf8” mode rejects invalid UTF-8.'; +} + +{ + my %h; + tie %h, "CDB_File", $db->filename, string_mode => 'utf8_naive'; + + my $foo; + eval { $foo = $h{'nonutf8'} }; + my $err = $@; + + is( $err, q<>, '“utf8_naive” mode accepts invalid UTF-8.' ); + ok( !utf8::valid($foo), '.. and SV is invalid' ); +}