Skip to content

Commit 6daed20

Browse files
committed
Improved example [skip ci]
1 parent ed1a1a7 commit 6daed20

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

examples/disco/src/main.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::error::Error;
66
use std::fs::File;
77
use std::io::{BufRead, BufReader};
88
use std::path::Path;
9+
use std::string::FromUtf8Error;
910

1011
fn main() -> Result<(), Box<dyn Error>> {
1112
// https://grouplens.org/datasets/movielens/100k/
@@ -72,12 +73,7 @@ fn load_movielens(path: &Path) -> Result<Dataset<i32, String>, Box<dyn Error>> {
7273
let rdr = BufReader::new(movies_file);
7374
for line in rdr.split(b'\n') {
7475
let line = line?;
75-
// convert encoding to UTF-8
76-
let line = String::from_utf8(
77-
line.into_iter()
78-
.flat_map(|v| if v < 128 { vec![v] } else { vec![195, v - 64] })
79-
.collect(),
80-
)?;
76+
let line = convert_to_utf8(&line)?;
8177
let mut row = line.split('|');
8278
let id = row.next().unwrap().to_string();
8379
let name = row.next().unwrap().to_string();
@@ -99,3 +95,17 @@ fn load_movielens(path: &Path) -> Result<Dataset<i32, String>, Box<dyn Error>> {
9995

10096
Ok(data)
10197
}
98+
99+
// ISO-8859-1 to UTF-8
100+
fn convert_to_utf8(s: &[u8]) -> Result<String, FromUtf8Error> {
101+
let mut buf = Vec::with_capacity(s.len() + 10);
102+
for v in s {
103+
if *v < 128 {
104+
buf.push(*v);
105+
} else {
106+
buf.push(195);
107+
buf.push(*v - 64);
108+
}
109+
}
110+
String::from_utf8(buf)
111+
}

0 commit comments

Comments
 (0)