Skip to content

Commit 82e8e65

Browse files
committed
add StaticCoding
1 parent a0ca5b9 commit 82e8e65

File tree

1 file changed

+79
-7
lines changed

1 file changed

+79
-7
lines changed

aces.go

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"io"
88
"math"
99
"math/big"
10+
"strings"
1011
)
1112

1213
// size of the buffers used by BitReader and BitWriter.
@@ -40,6 +41,7 @@ func NewBitReader(chunkLen uint8, in io.Reader) (*BitReader, error) {
4041

4142
// NewBitReaderSize is like NewBitReader but allows setting the internal buffer size
4243
func NewBitReaderSize(chunkLen uint8, in io.Reader, bufSize int) (*BitReader, error) {
44+
fmt.Println("bufSize", bufSize)
4345
// bufSize % chunkLen == 0 so that we never have to read across the buffer boundary
4446
br := &BitReader{chunkLen: chunkLen, in: in, bufSize: bufSize - bufSize%int(chunkLen)}
4547
br.buf = make([]byte, br.bufSize)
@@ -178,19 +180,31 @@ type Coding interface {
178180
// This is because most encoders interpret data as a number and use a base conversion algorithm to convert it to the
179181
// character set. For non-power-of-2 charsets, this requires all data to be read before encoding, which is not possible
180182
// with streams. To enable stream encoding for non-power-of-2 charsets, Aces converts a default of 4 bytes (adjustable
181-
// with Coding.SetByteChunkSize) of data at a time, which is not the same as converting the base of the entire data.
183+
// with Coding.SetByteChunkSize) of data at a time, which is not the same as converting the base of the entire data. If
184+
// stream encoding is not necessary, use StaticCoding, for which using the base58 character set, for example, will
185+
// produce the same output as a base58-specific encoder.
182186
func NewCoding(charset []rune) (Coding, error) {
187+
if err := checkSet(charset); err != nil {
188+
return nil, err
189+
}
190+
if len(charset)&(len(charset)-1) == 0 && len(charset) < 256 { // is power of 2?
191+
return newTwoCoding(charset)
192+
}
193+
return newAnyCoding(charset)
194+
}
195+
196+
func checkSet(charset []rune) error {
197+
if len(charset) <= 2 {
198+
return errors.New("charset length must be greater than 2")
199+
}
183200
seen := make(map[rune]bool)
184201
for _, r := range charset {
185202
if seen[r] {
186-
return nil, errors.New("charset contains duplicates: '" + string(r) + "'")
203+
return errors.New("charset contains duplicates: '" + string(r) + "'")
187204
}
188205
seen[r] = true
189206
}
190-
if len(charset)&(len(charset)-1) == 0 && len(charset) < 256 { // is power of 2?
191-
return newTwoCoding(charset)
192-
}
193-
return newAnyCoding(charset)
207+
return nil
194208
}
195209

196210
// twoCoding is for character sets of a length that is a power of 2.
@@ -209,7 +223,7 @@ func newTwoCoding(charset []rune) (*twoCoding, error) {
209223
"\n want: a power of 2 (nearest is", 1<<numOfBits, "which is", math.Abs(float64(len(charset)-1<<numOfBits)), "away)"),
210224
)
211225
}
212-
return &twoCoding{charset: charset, numOfBits: numOfBits}, nil
226+
return &twoCoding{charset: charset, numOfBits: numOfBits, bufSize: defaultBufSize}, nil
213227
}
214228

215229
func (c *twoCoding) SetByteChunkSize(_ int) {}
@@ -442,3 +456,61 @@ func fromBase(enc []rune, set []rune) (*big.Int, error) {
442456
}
443457
return result, nil
444458
}
459+
460+
type StaticCoding struct {
461+
charset []rune
462+
maxRunesPerByte float64
463+
}
464+
465+
// NewStaticCoding creates a StaticCoding with the given character set, which must be a set of unique runes.
466+
// StaticCoding differs from Coding in that it does not accept streamed input, but instead requires the entire input to
467+
// be provided at once. So, StaticCoding is not recommended for very large inputs. It encodes by changing the
468+
// mathematical base of the input (interpreted as a binary number) to the length of the charset. Each null byte at the
469+
// beginning of the input are encoded as the first character in the charset.
470+
//
471+
// For example,
472+
//
473+
// NewStaticCoding([]rune("123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"))
474+
//
475+
// creates the base58 encoding scheme compatible with Bitcoin's implementation.
476+
func NewStaticCoding(charset []rune) (*StaticCoding, error) {
477+
if err := checkSet(charset); err != nil {
478+
return nil, err
479+
}
480+
// calculate what maximum multiplier a charset of this length increases data size by
481+
maxRunesPerByte := 8 / math.Log2(float64(len(charset)))
482+
return &StaticCoding{charset, maxRunesPerByte}, nil
483+
}
484+
485+
func (c *StaticCoding) Encode(data []byte) (string, error) {
486+
nullBytes := 0
487+
for i := range data {
488+
if data[i] == 0 {
489+
nullBytes++
490+
} else {
491+
break
492+
}
493+
}
494+
return strings.Repeat(string(c.charset[0]), nullBytes) + string(toBase(
495+
bytesToInt(data[nullBytes:]),
496+
make([]rune, 0, int(math.Ceil(c.maxRunesPerByte*float64(len(data))))),
497+
c.charset,
498+
)), nil
499+
}
500+
501+
func (c *StaticCoding) Decode(data string) ([]byte, error) {
502+
dataR := []rune(data)
503+
nullBytes := 0
504+
for i := range dataR {
505+
if dataR[i] == c.charset[0] {
506+
nullBytes++
507+
} else {
508+
break
509+
}
510+
}
511+
bigNum, err := fromBase(dataR, c.charset)
512+
if err != nil {
513+
return nil, err
514+
}
515+
return append(make([]byte, nullBytes), bigNum.Bytes()...), nil
516+
}

0 commit comments

Comments
 (0)