aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJeffrey Yasskin <[email protected]>2010-07-25 00:36:03 -0700
committerGraydon Hoare <[email protected]>2010-08-06 17:29:21 -0700
commit3f6e8ffe64b57b0eaba6812208e94500422ca40c (patch)
treebb6ed1f9b5ce9603c999c195d0754057e633599a /src
parentAdd an int->str conversion function. (diff)
downloadrust-3f6e8ffe64b57b0eaba6812208e94500422ca40c.tar.xz
rust-3f6e8ffe64b57b0eaba6812208e94500422ca40c.zip
Implement _str.len() to return the number of bytes, rename it to byte_len(),
and add a test.
Diffstat (limited to 'src')
-rw-r--r--src/Makefile1
-rw-r--r--src/lib/_str.rs15
-rw-r--r--src/rt/rust_builtin.cpp6
-rw-r--r--src/test/run-pass/str-lib.rs16
4 files changed, 33 insertions, 5 deletions
diff --git a/src/Makefile b/src/Makefile
index 792eeec0..1d79a467 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -506,6 +506,7 @@ TEST_XFAILS_LLVM := $(TASK_XFAILS) \
str-append.rs \
str-concat.rs \
str-idx.rs \
+ str-lib.rs \
tag.rs \
tail-cps.rs \
tail-direct.rs \
diff --git a/src/lib/_str.rs b/src/lib/_str.rs
index 7d1a2dbd..a607c7d5 100644
--- a/src/lib/_str.rs
+++ b/src/lib/_str.rs
@@ -3,7 +3,7 @@ import rustrt.sbuf;
native "rust" mod rustrt {
type sbuf;
fn str_buf(str s) -> sbuf;
- fn str_len(str s) -> uint;
+ fn str_byte_len(str s) -> uint;
fn str_alloc(uint n_bytes) -> str;
fn refcount[T](str s) -> uint;
}
@@ -13,7 +13,7 @@ fn is_utf8(vec[u8] v) -> bool {
}
fn is_ascii(str s) -> bool {
- let uint i = len(s);
+ let uint i = byte_len(s);
while (i > 0u) {
i -= 1u;
if ((s.(i) & 0x80u8) != 0u8) {
@@ -27,8 +27,13 @@ fn alloc(uint n_bytes) -> str {
ret rustrt.str_alloc(n_bytes);
}
-fn len(str s) -> uint {
- ret rustrt.str_len(s);
+// Returns the number of bytes (a.k.a. UTF-8 code units) in s.
+// Contrast with a function that would return the number of code
+// points (char's), combining character sequences, words, etc. See
+// http://icu-project.org/apiref/icu4c/classBreakIterator.html for a
+// way to implement those.
+fn byte_len(str s) -> uint {
+ ret rustrt.str_byte_len(s);
}
fn buf(str s) -> sbuf {
@@ -39,5 +44,5 @@ fn bytes(&str s) -> vec[u8] {
fn ith(str s, uint i) -> u8 {
ret s.(i);
}
- ret _vec.init_fn[u8](bind ith(s, _), _str.len(s));
+ ret _vec.init_fn[u8](bind ith(s, _), _str.byte_len(s));
}
diff --git a/src/rt/rust_builtin.cpp b/src/rt/rust_builtin.cpp
index 657109c6..d8d9b8d6 100644
--- a/src/rt/rust_builtin.cpp
+++ b/src/rt/rust_builtin.cpp
@@ -115,6 +115,12 @@ str_buf(rust_task *task, rust_str *s)
return (char const *)&s->data[0];
}
+extern "C" CDECL size_t
+str_byte_len(rust_task *task, rust_str *s)
+{
+ return s->fill - 1; // -1 for the '\0' terminator.
+}
+
extern "C" CDECL void *
vec_buf(rust_task *task, type_desc *ty, rust_vec *v, size_t offset)
{
diff --git a/src/test/run-pass/str-lib.rs b/src/test/run-pass/str-lib.rs
new file mode 100644
index 00000000..585f9b8d
--- /dev/null
+++ b/src/test/run-pass/str-lib.rs
@@ -0,0 +1,16 @@
+use std;
+import std._str;
+
+fn test_bytes_len() {
+ check (_str.byte_len("") == 0u);
+ check (_str.byte_len("hello world") == 11u);
+ check (_str.byte_len("\x63") == 1u);
+ check (_str.byte_len("\xa2") == 2u);
+ check (_str.byte_len("\u03c0") == 2u);
+ check (_str.byte_len("\u2620") == 3u);
+ check (_str.byte_len("\U0001d11e") == 4u);
+}
+
+fn main() {
+ test_bytes_len();
+}