Skip to content

Commit 5148fd0

Browse files
author
Gilad Chase
committed
feat(ops): add get, SliceIndex and impl for ByteSpan
Implementation notes for ByteSpan: 1. When a slice ends before a word boundary (it's last word isn't a full 31 bytes long) , it's last word is copied into the remainder word. Rationale: this is consistent with `ByteArray`'s `pending word`, and allows slices of full bytes31 that include an end_suffix to be shifted-right without allocating a new array. 2. When slices include a start-offset, the offset is applied lazily only upon `into`ing into a `ByteArray`, otherwise it's only recorded in the `first_char_start_offset` field.
1 parent 887650a commit 5148fd0

File tree

5 files changed

+258
-10
lines changed

5 files changed

+258
-10
lines changed

corelib/src/byte_array.cairo

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ use crate::cmp::min;
5555
use crate::integer::{U32TryIntoNonZero, u128_safe_divmod};
5656
#[feature("bounded-int-utils")]
5757
use crate::internal::bounded_int::{self, BoundedInt, downcast, upcast};
58+
use crate::num::traits::CheckedSub;
5859
#[allow(unused_imports)]
5960
use crate::serde::Serde;
6061
use crate::traits::{Into, TryInto};
@@ -837,6 +838,16 @@ pub impl ByteSpanImpl of ByteSpanTrait {
837838
ba.append_from_parts(self.data, self.remainder_word, upcast(self.remainder_len));
838839
ba
839840
}
841+
842+
/// Gets the element(s) at the given index.
843+
/// Accepts ranges (returns Option<ByteSpan>), and (to-be-implemented) single indices (returns
844+
/// Option<u8>).
845+
#[feature("corelib-get-trait")]
846+
fn get<I, impl TGet: crate::ops::Get<ByteSpan, I>, +Drop<I>>(
847+
self: @ByteSpan, index: I,
848+
) -> Option<TGet::Output> {
849+
TGet::get(self, index)
850+
}
840851
}
841852

842853
impl ByteSpanDefault of Default<ByteSpan> {
@@ -847,6 +858,59 @@ impl ByteSpanDefault of Default<ByteSpan> {
847858
}
848859
}
849860

861+
862+
impl ByteSpanGetRange of crate::ops::Get<ByteSpan, crate::ops::Range<usize>> {
863+
type Output = ByteSpan;
864+
865+
/// Returns a slice for the given range `[start, end)`.
866+
/// If span is consumed by the slice: returns the default object.
867+
/// If out of bounds: returns `None`.
868+
fn get(self: @ByteSpan, index: crate::ops::Range<usize>) -> Option<ByteSpan> {
869+
let range = index;
870+
let len = (range.end).checked_sub(range.start)?;
871+
if len == 0 {
872+
return Some(Default::default());
873+
}
874+
if range.end > self.len() {
875+
return None;
876+
}
877+
878+
let abs_start = range.start + upcast(*self.first_char_start_offset);
879+
let (start_word, start_offset) = DivRem::div_rem(abs_start, BYTES_IN_BYTES31_NONZERO);
880+
let (end_word, end_offset) = DivRem::div_rem(abs_start + len, BYTES_IN_BYTES31_NONZERO);
881+
let data_len = self.data.len();
882+
883+
let remainder_with_end_offset_trimmed = if end_word < data_len {
884+
let word = (*self.data[end_word]).into();
885+
shift_right(word, BYTES_IN_BYTES31, BYTES_IN_BYTES31 - end_offset)
886+
} else {
887+
let remainder_len = upcast(*self.remainder_len);
888+
shift_right(*self.remainder_word, remainder_len, remainder_len - end_offset)
889+
};
890+
891+
Some(
892+
ByteSpan {
893+
data: self.data.slice(start_word, min(end_word, data_len) - start_word),
894+
first_char_start_offset: downcast(start_offset).unwrap(),
895+
remainder_word: remainder_with_end_offset_trimmed,
896+
remainder_len: downcast(end_offset).unwrap(),
897+
},
898+
)
899+
}
900+
}
901+
902+
impl ByteSpanGetRangeInclusive of crate::ops::Get<ByteSpan, crate::ops::RangeInclusive<usize>> {
903+
type Output = ByteSpan;
904+
905+
/// Returns a slice for the given range `[start, end]`.
906+
/// If span is consumed by the slice: returns the default object.
907+
/// If out of bounds: returns `None`.
908+
fn get(self: @ByteSpan, index: crate::ops::RangeInclusive<usize>) -> Option<ByteSpan> {
909+
let end_exclusive = crate::num::traits::CheckedAdd::checked_add(index.end, 1)?;
910+
ByteSpanTrait::get(self, index.start..end_exclusive)
911+
}
912+
}
913+
850914
/// Trait for types that can be converted into a `ByteSpan`.
851915
#[unstable(feature: "byte-span")]
852916
pub trait ToByteSpanTrait<C> {
@@ -874,6 +938,21 @@ impl ByteSpanToByteSpan of ToByteSpanTrait<ByteSpan> {
874938
}
875939
}
876940

941+
/// Shifts a word right by `n_bytes`.
942+
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
943+
/// performance.
944+
///
945+
/// Note: this function assumes that:
946+
/// 1. `word` is validly convertible to a `bytes31` which has no more than `word_len` bytes of data.
947+
/// 2. `n_bytes <= word_len`.
948+
/// 3. `word_len <= BYTES_IN_BYTES31`.
949+
/// If these assumptions are not met, it can corrupt the result. Thus, this should be a
950+
/// private function. We could add masking/assertions but it would be more expensive.
951+
fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
952+
let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes);
953+
after_shift_right
954+
}
955+
877956
mod helpers {
878957
use core::num::traits::Bounded;
879958
use crate::bytes_31::BYTES_IN_BYTES31;

corelib/src/ops.cairo

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,8 @@ pub use range::{
7373
// `RangeOp` and `RangeInclusiveOp` are used internally by the compiler.
7474
#[allow(unused_imports)]
7575
use range::{RangeInclusiveOp, RangeOp};
76+
77+
#[unstable(feature: "corelib-get-trait")]
78+
pub mod get;
79+
#[feature("corelib-get-trait")]
80+
pub use get::Get;

corelib/src/ops/get.cairo

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/// A trait for fallible indexing operations with different index types.
2+
///
3+
/// Unlike [`IndexView`] and [`Index`] which panic on out-of-bounds access, `Get`
4+
/// returns an `Option`, providing safe indexing operations. This trait enables containers
5+
/// to support multiple index types (e.g., `Range<usize>`, `RangeInclusive<usize>`,
6+
/// or `usize`) through a unified interface.
7+
///
8+
/// [`IndexView`]: crate::ops::IndexView
9+
/// [`Index`]: crate::ops::Index
10+
///
11+
/// # Examples
12+
///
13+
/// The following example shows how `ByteSpan` implements `Get` for both `Range<usize>`
14+
/// and `RangeInclusive<usize>`, enabling safe slicing operations that return `None` when
15+
/// out of bounds.
16+
///
17+
/// ```
18+
/// use core::byte_array::{ByteSpan, ByteSpanTrait};
19+
///
20+
/// let ba: ByteArray = "hello";
21+
/// let span = ba.span();
22+
///
23+
/// // Using Range<usize>.
24+
/// let slice = span.get(1..4).unwrap();
25+
/// assert_eq!(slice.to_byte_array(), "ell");
26+
///
27+
/// // Using RangeInclusive<usize>.
28+
/// let slice = span.get(1..=3).unwrap();
29+
/// assert_eq!(slice.to_byte_array(), "ell");
30+
///
31+
/// // Out of bounds returns None.
32+
/// assert!(span.get(10..20).is_none());
33+
/// ```
34+
// TODO(giladchase): add examples for `usize` once supported.
35+
#[unstable(feature: "corelib-get-trait")]
36+
pub trait Get<C, I> {
37+
/// The returned type after indexing.
38+
type Output;
39+
40+
/// Returns the output at this index, if in bounds.
41+
fn get(self: @C, index: I) -> Option<Self::Output>;
42+
}

corelib/src/ops/index.cairo

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
//! * [`IndexView`] - For snapshot-based access
77
//! * [`Index`] - For reference-based access
88
//!
9+
//! For safe indexing operations that return `Option`, see the unstable [`Get`] trait
10+
//! in the [`get`] module.
11+
//!
912
//! # When to use which trait
1013
//!
1114
//! - Use [`IndexView`] when the collection can be accessed in a read-only context and is not
@@ -17,6 +20,8 @@
1720
//! Only one of these traits should be implemented for any given type, not both.
1821
//!
1922
//! [`Felt252Dict`]: core::dict::Felt252Dict
23+
//! [`Get`]: crate::ops::get::Get
24+
//! [`get`]: crate::ops::get
2025

2126
#[feature("deprecated-index-traits")]
2227
use crate::traits::{Index as DeprecatedIndex, IndexView as DeprecatedIndexView};

corelib/src/test/byte_array_test.cairo

Lines changed: 127 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#[feature("byte-span")]
2-
use crate::byte_array::{ByteSpanTrait, ToByteSpanTrait};
2+
use crate::byte_array::{ByteSpan, ByteSpanTrait, ToByteSpanTrait};
3+
use crate::num::traits::Bounded;
34
use crate::test::test_utils::{assert_eq, assert_ne};
45

56
#[test]
@@ -508,39 +509,63 @@ fn test_from_collect() {
508509
assert_eq!(ba, "hello");
509510
}
510511

511-
// TODO(giladchase): add dedicated is_empty test once we have `slice`.
512512
#[test]
513513
fn test_span_len() {
514-
// Test simple happy flow --- value is included in the last word.
515514
// TODO(giladchase): add short string test here once supported cast into span.
516515
let ba: ByteArray = "A";
517516
let span = ba.span();
518517
assert_eq!(span.len(), 1);
519518
assert!(!span.is_empty());
520519

520+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
521+
let span = ba_31.span();
522+
assert_eq!(span.len(), 31, "wrong span len");
523+
assert!(!span.is_empty());
524+
521525
// Test empty.
522526
let empty_ba: ByteArray = "";
523527
let empty_span = empty_ba.span();
524528
assert_eq!(empty_span.len(), 0);
525529
assert!(empty_span.is_empty());
526530

527-
// TODO(giladchase): Add start-offset using slice once supported.
528531
// First word in the array, second in last word.
529532
let two_byte31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
530-
let mut single_span = two_byte31.span();
531-
assert_eq!(single_span.len(), 33, "len error with start offset");
533+
let mut single_span = two_byte31.span().get(1..=32).unwrap();
534+
assert_eq!(single_span.len(), 32, "len error with start offset");
532535
assert!(!single_span.is_empty());
533536

534-
// TODO(giladchase): Add start-offset using slice once supported.
535537
// First word in the array, second in the array, third in last word.
536538
let three_bytes31: ByteArray =
537539
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 chars.
538-
let mut three_span = three_bytes31.span();
539-
assert_eq!(three_span.len(), 64, "len error with size-3 bytearray");
540+
let mut three_span = three_bytes31.span().get(1..64).unwrap();
541+
assert_eq!(three_span.len(), 63, "len error with size-3 bytearray");
540542
assert!(!three_span.is_empty());
541543
// TODO(giladchase): use `ByteSpan::PartialEq` to check that a consuming slice == Default.
542544
}
543545

546+
#[test]
547+
fn test_span_slice_is_empty() {
548+
let ba: ByteArray = "hello";
549+
let span = ba.span();
550+
551+
let empty = span.get(2..2).unwrap();
552+
assert_eq!(empty.len(), 0);
553+
assert!(empty.is_empty());
554+
assert_eq!(empty.to_byte_array(), "");
555+
556+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
557+
let span = ba_31.span();
558+
assert!(span.get(30..30).unwrap().is_empty());
559+
assert!(span.get(31..31).unwrap().is_empty());
560+
assert!(!span.get(15..30).unwrap().is_empty());
561+
562+
let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd";
563+
let span = ba_30.span();
564+
assert!(span.get(29..29).unwrap().is_empty());
565+
assert!(span.get(30..30).unwrap().is_empty());
566+
assert!(!span.get(15..29).unwrap().is_empty());
567+
}
568+
544569
#[test]
545570
fn test_span_copy() {
546571
let ba: ByteArray = "12";
@@ -561,6 +586,85 @@ fn test_span_copy() {
561586
assert_eq!(ba, span.to_byte_array());
562587
}
563588

589+
#[test]
590+
fn test_span_slice_empty() {
591+
let ba: ByteArray = "hello";
592+
let span = ba.span();
593+
594+
let empty = span.get(2..2).unwrap();
595+
assert_eq!(empty.len(), 0);
596+
assert!(empty.is_empty());
597+
assert_eq!(empty.to_byte_array(), "");
598+
}
599+
600+
// TODO(giladchase): replace assert+is_none with assert_eq when we have PartialEq.
601+
#[test]
602+
fn test_span_slice_out_of_bounds() {
603+
let ba: ByteArray = "hello";
604+
let span = ba.span();
605+
606+
assert!(span.get(3..=7).is_none(), "end out of bounds");
607+
assert!(span.get(6..=6).is_none(), "start out of bounds (inclusive)");
608+
609+
assert!(
610+
span.get(2..4).unwrap().get((Bounded::<usize>::MAX - 1)..Bounded::<usize>::MAX).is_none(),
611+
"start offset overflow",
612+
);
613+
assert!(
614+
span.get(2..=3).unwrap().get((Bounded::<usize>::MAX - 1)..Bounded::<usize>::MAX).is_none(),
615+
"start offset overflow (first get inclusive)",
616+
);
617+
assert!(
618+
span.get(2..4).unwrap().get((Bounded::<usize>::MAX - 1)..=Bounded::<usize>::MAX).is_none(),
619+
"start offset overflow (second get inclusive)",
620+
);
621+
assert!(
622+
span.get(2..=3).unwrap().get((Bounded::<usize>::MAX - 1)..=Bounded::<usize>::MAX).is_none(),
623+
"start offset overflow (both gets inclusive)",
624+
);
625+
assert!(span.get(Bounded::<usize>::MAX..0).is_none(), "backwards range");
626+
627+
let empty_string: ByteArray = "";
628+
assert!(empty_string.span().get(0..2).is_none(), "empty slice is sliceable");
629+
}
630+
631+
#[test]
632+
fn test_span_slice_under_31_bytes() {
633+
// Word entirely in remainder word.
634+
let ba: ByteArray = "abcde";
635+
let span = ba.span();
636+
let tba = |ba: ByteSpan| ba.to_byte_array();
637+
638+
assert_eq!(span.get(0..=2).map(tba), Some("abc"));
639+
assert_eq!(span.get(2..4).map(tba), Some("cd"));
640+
assert_eq!(span.get(4..=4).map(tba), Some("e"));
641+
}
642+
#[test]
643+
fn test_span_slice_exactly_31_bytes() {
644+
// 1 full data word, empty last_word.
645+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
646+
let span = ba_31.span();
647+
648+
assert_eq!(span.len(), 31);
649+
assert_eq!(span.get(0..31).unwrap().to_byte_array(), ba_31);
650+
assert_eq!(span.get(10..=19).unwrap().to_byte_array(), "KLMNOPQRST");
651+
}
652+
653+
#[test]
654+
fn test_span_slice_positions() {
655+
// Two full bytes31 + remainder with 2 bytes.
656+
let ba_64: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$";
657+
let span = ba_64.span();
658+
let tba = |ba: ByteSpan| ba.to_byte_array();
659+
660+
assert_eq!(span.get(10..=39).map(tba), Some("KLMNOPQRSTUVWXYZabcdefghijklmn"));
661+
assert_eq!(
662+
span.get(5..64).map(tba),
663+
Some("FGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"),
664+
);
665+
assert_eq!(span.get(29..49).map(tba), Some("defghijklmnopqrstuvw"));
666+
}
667+
564668
#[test]
565669
fn test_span_to_bytearray() {
566670
let empty_ba: ByteArray = "";
@@ -578,5 +682,18 @@ fn test_span_to_bytearray() {
578682
let even_larger_ba: ByteArray =
579683
"abcdeFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 bytes
580684
assert_eq!(even_larger_ba.span().to_byte_array(), even_larger_ba);
581-
// TODO(giladchase): test with slice.
685+
}
686+
687+
#[test]
688+
fn test_span_multiple_start_offset_slicing() {
689+
let ba_6: ByteArray = "abcdef";
690+
let span = ba_6.span();
691+
692+
let slice1_inc = span.get(1..=5).unwrap();
693+
let slice2_inc = slice1_inc.get(1..=4).unwrap();
694+
let slice3_inc = slice2_inc.get(1..=3).unwrap();
695+
696+
assert_eq!(slice1_inc.to_byte_array(), "bcdef");
697+
assert_eq!(slice2_inc.to_byte_array(), "cdef");
698+
assert_eq!(slice3_inc.to_byte_array(), "def");
582699
}

0 commit comments

Comments
 (0)