Skip to content

Commit de87aa8

Browse files
authored
Merge pull request #209 from Rust-Data-Science/wip-rand-choice
Wip rand choice
2 parents 0e4e0bd + bd57415 commit de87aa8

File tree

12 files changed

+129
-8
lines changed

12 files changed

+129
-8
lines changed

requirements/common.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# A-Z
2-
maturin==0.12.6
2+
maturin==0.13.7

tests/test_constructors.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,28 @@ def test_rand(
104104
assert arr1.not_equal(arr2).all()
105105
assert len(arr1) == len(arr2)
106106
assert len(arr1) == kwargs["size"]
107+
108+
109+
@pytest.mark.parametrize(
110+
"kwargs",
111+
[
112+
{"obj": list(range(1000)), "size": 1000, "dtype": "float"},
113+
{"obj": list(range(1000)), "size": 1000, "dtype": "float32"},
114+
{"obj": list(range(1000)), "size": 1000, "dtype": "float64"},
115+
{"obj": list(range(1000)), "size": 1000, "dtype": "int"},
116+
{"obj": list(range(1000)), "size": 1000, "dtype": "int32"},
117+
{"obj": list(range(1000)), "size": 1000, "dtype": "int64"},
118+
{"obj": [True, False], "size": 1000, "dtype": "bool"},
119+
{"obj": ['foo', 'bar'], "size": 1000, "dtype": "string"},
120+
],
121+
)
122+
def test_choices(kwargs: dict) -> None:
123+
arr1 = ul.choices(**kwargs)
124+
arr2 = ul.choices(**kwargs)
125+
assert all([x in kwargs["obj"] for x in arr1.to_list()])
126+
assert all([x in kwargs["obj"] for x in arr2.to_list()])
127+
assert arr1.not_equal(arr2).any()
128+
assert len(arr1) == len(arr2)
129+
assert len(arr1) == kwargs["size"]
130+
assert arr1.dtype.startswith(kwargs["dtype"])
131+
assert arr2.dtype.startswith(kwargs["dtype"])

ulist/python/ulist/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .constructor import arange, cycle, from_seq, random, repeat # noqa:F401
1+
from .constructor import arange, choices, cycle, from_seq, random, repeat # noqa:F401, E501
22
from .control_flow import select # noqa:F401
33
from .core import UltraFastList # noqa:F401
44
from .io import read_csv # noqa:F401

ulist/python/ulist/constructor.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,47 @@ def arange(
6767
"Parameter dtype should be 'int', 'int32' or 'int64'!")
6868

6969

70+
def choices(obj: Sequence, size: int, dtype: str) -> UltraFastList:
71+
"""Choose element from a sequence randomly and endlessly until
72+
the size is met.
73+
74+
Args:
75+
obj (Sequence):
76+
Sequence object such as list, tuple and range.
77+
size (int):
78+
size (int): Size of the new ulist.
79+
dtype (str):
80+
The type of the output ulist. 'int', 'int32', 'int64',
81+
'float', 'float32', 'float64', 'bool' or 'string'.
82+
83+
Raises:
84+
ValueError:
85+
Parameter dtype should be 'int', 'int32', 'int64',
86+
'float', 'float32', 'float64', 'bool' or 'string'!
87+
88+
Returns:
89+
UltraFastList: A ulist object.
90+
"""
91+
if dtype == "int" or dtype == "int64":
92+
result = UltraFastList(IntegerList64.choices(obj, size))
93+
elif dtype == "int32":
94+
result = UltraFastList(IntegerList32.choices(obj, size))
95+
elif dtype == "float" or dtype == "float64":
96+
result = UltraFastList(FloatList64.choices(obj, size))
97+
elif dtype == "float32":
98+
result = UltraFastList(FloatList32.choices(obj, size))
99+
elif dtype == "bool":
100+
result = UltraFastList(BooleanList.choices(obj, size))
101+
elif dtype == "string":
102+
result = UltraFastList(StringList.choices(obj, size))
103+
else:
104+
raise ValueError(
105+
"Parameter dtype should be 'int', 'int32', 'int64', " +
106+
"'float', 'float32', 'float64', 'bool' or 'string'!"
107+
)
108+
return result
109+
110+
70111
def cycle(obj: Sequence, size: int, dtype: str) -> UltraFastList:
71112
"""Repeats a sequence endlessly until the size is met.
72113

ulist/python/ulist/ulist.pyi

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ class BooleanList:
1717
def as_int32(self) -> IntegerList32: ...
1818
def as_int64(self) -> IntegerList64: ...
1919
def as_str(self) -> StringList: ...
20+
@staticmethod
21+
def choices(obj: Sequence[bool], size: int) -> BooleanList: ...
2022
def copy(self) -> BooleanList: ...
2123
def count_na(self) -> int: ...
2224
def counter(self) -> Dict[Optional[bool], int]: ...
@@ -60,6 +62,8 @@ class FloatList32:
6062
def as_int32(self) -> IntegerList32: ...
6163
def as_int64(self) -> IntegerList64: ...
6264
def as_str(self) -> StringList: ...
65+
@staticmethod
66+
def choices(obj: Sequence[float], size: int) -> FloatList32: ...
6367
def copy(self) -> FloatList32: ...
6468
def count_na(self) -> int: ...
6569
@staticmethod
@@ -119,6 +123,8 @@ class FloatList64:
119123
def as_int32(self) -> IntegerList32: ...
120124
def as_int64(self) -> IntegerList64: ...
121125
def as_str(self) -> StringList: ...
126+
@staticmethod
127+
def choices(obj: Sequence[float], size: int) -> FloatList64: ...
122128
def copy(self) -> FloatList64: ...
123129
def count_na(self) -> int: ...
124130
@staticmethod
@@ -178,6 +184,8 @@ class IntegerList32:
178184
def as_float64(self) -> FloatList64: ...
179185
def as_int64(self) -> IntegerList64: ...
180186
def as_str(self) -> StringList: ...
187+
@staticmethod
188+
def choices(obj: Sequence[int], size: int) -> IntegerList32: ...
181189
def copy(self) -> IntegerList32: ...
182190
def count_na(self) -> int: ...
183191
def counter(self) -> Dict[Optional[int], int]: ...
@@ -236,6 +244,8 @@ class IntegerList64:
236244
def as_float64(self) -> FloatList64: ...
237245
def as_int32(self) -> IntegerList32: ...
238246
def as_str(self) -> StringList: ...
247+
@staticmethod
248+
def choices(obj: Sequence[int], size: int) -> IntegerList64: ...
239249
def copy(self) -> IntegerList64: ...
240250
def count_na(self) -> int: ...
241251
def counter(self) -> Dict[Optional[int], int]: ...
@@ -290,6 +300,8 @@ class StringList:
290300
def as_float64(self) -> FloatList64: ...
291301
def as_int32(self) -> IntegerList32: ...
292302
def as_int64(self) -> IntegerList64: ...
303+
@staticmethod
304+
def choices(obj: Sequence[str], size: int) -> StringList: ...
293305
def contains(self, elem: str) -> BooleanList: ...
294306
def copy(self) -> StringList: ...
295307
def count_na(self) -> int: ...

ulist/src/base.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ use crate::index::IndexList;
33
use pyo3::exceptions::PyIndexError;
44
use pyo3::exceptions::PyRuntimeError;
55
use pyo3::PyResult;
6+
use rand::distributions::Uniform;
7+
use rand::Rng;
68
use std::cell::Ref;
79
use std::cell::RefMut;
810
use std::collections::HashSet;
@@ -108,6 +110,17 @@ where
108110
}
109111
}
110112

113+
fn choices(vec: &[T], size: usize) -> Self {
114+
let n = vec.len();
115+
let dist: Uniform<usize> = Uniform::from(0..n);
116+
let v = rand::thread_rng()
117+
.sample_iter(dist)
118+
.map(|x| unsafe { vec.get_unchecked(x).clone() })
119+
.take(size)
120+
.collect();
121+
List::_new(v, HashSet::new())
122+
}
123+
111124
fn copy(&self) -> Self {
112125
let hset = self.na_indexes().clone();
113126
List::_new(self.values().clone(), hset)
@@ -232,7 +245,7 @@ where
232245
let n = self.size();
233246
let mut vec = self.values_mut();
234247
for i in 0..n {
235-
let ptr = unsafe{ vec.get_unchecked_mut(i)};
248+
let ptr = unsafe { vec.get_unchecked_mut(i) };
236249
if *ptr == old {
237250
*ptr = self.na_value();
238251
self.na_indexes_mut().insert(i);
@@ -265,7 +278,7 @@ where
265278
return Err(PyIndexError::new_err("Index out of range!"));
266279
}
267280
let mut vec = self.values_mut();
268-
let ptr = unsafe{vec.get_unchecked_mut(index)};
281+
let ptr = unsafe { vec.get_unchecked_mut(index) };
269282
if let Some(i) = elem {
270283
*ptr = i;
271284
self.na_indexes_mut().remove(&index);

ulist/src/boolean.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,11 @@ impl BooleanList {
137137
NonFloatList::counter(self)
138138
}
139139

140+
#[staticmethod]
141+
pub fn choices(vec: Vec<bool>, size: usize) -> Self {
142+
List::choices(&vec, size)
143+
}
144+
140145
#[staticmethod]
141146
pub fn cycle(vec: Vec<bool>, size: usize) -> Self {
142147
List::cycle(&vec, size)

ulist/src/floatings/float32.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ impl FloatList32 {
7979
AsStringList::as_str(self)
8080
}
8181

82+
#[staticmethod]
83+
pub fn choices(vec: Vec<f32>, size: usize) -> Self {
84+
List::choices(&vec, size)
85+
}
86+
8287
pub fn copy(&self) -> Self {
8388
List::copy(self)
8489
}
@@ -192,8 +197,8 @@ impl FloatList32 {
192197

193198
#[staticmethod]
194199
fn random(size: usize) -> Self {
195-
let range: Uniform<f32> = Uniform::from(0.0..1.0);
196-
let v: Vec<f32> = rand::thread_rng().sample_iter(&range).take(size).collect();
200+
let dist: Uniform<f32> = Uniform::from(0.0..1.0);
201+
let v: Vec<f32> = rand::thread_rng().sample_iter(&dist).take(size).collect();
197202
FloatList32::new(v, HashSet::new())
198203
}
199204

ulist/src/floatings/float64.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ impl FloatList64 {
7979
AsStringList::as_str(self)
8080
}
8181

82+
#[staticmethod]
83+
pub fn choices(vec: Vec<f64>, size: usize) -> Self {
84+
List::choices(&vec, size)
85+
}
86+
8287
pub fn copy(&self) -> Self {
8388
List::copy(self)
8489
}
@@ -192,8 +197,8 @@ impl FloatList64 {
192197

193198
#[staticmethod]
194199
fn random(size: usize) -> Self {
195-
let range: Uniform<f64> = Uniform::from(0.0..1.0);
196-
let v: Vec<f64> = rand::thread_rng().sample_iter(&range).take(size).collect();
200+
let dist: Uniform<f64> = Uniform::from(0.0..1.0);
201+
let v: Vec<f64> = rand::thread_rng().sample_iter(&dist).take(size).collect();
197202
FloatList64::new(v, HashSet::new())
198203
}
199204

ulist/src/integers/int32.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ impl IntegerList32 {
7979
AsStringList::as_str(self)
8080
}
8181

82+
#[staticmethod]
83+
pub fn choices(vec: Vec<i32>, size: usize) -> Self {
84+
List::choices(&vec, size)
85+
}
86+
8287
pub fn copy(&self) -> Self {
8388
List::copy(self)
8489
}

0 commit comments

Comments
 (0)