From 718c0b5963e6513337e4fee003b34423397c2d14 Mon Sep 17 00:00:00 2001 From: Roy Frostig Date: Wed, 4 Aug 2010 23:09:25 -0700 Subject: Add to std._io some formatter/type-specific-writer mechanism. Make a few type-specific buffered writers as wrappers of buf_writer. --- src/lib/_io.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib/_str.rs | 19 +++++++++++++++++++ 2 files changed, 70 insertions(+) (limited to 'src/lib') diff --git a/src/lib/_io.rs b/src/lib/_io.rs index 142f808a..b0b0c313 100644 --- a/src/lib/_io.rs +++ b/src/lib/_io.rs @@ -1,3 +1,7 @@ +import std.os; +import std._str; +import std._vec; + type buf_reader = unsafe obj { fn read() -> vec[u8]; }; @@ -107,3 +111,50 @@ fn new_buf_writer(str path, vec[fileflag] flags) -> buf_writer { } ret fd_buf_writer(fd); } + +type formatter[T] = fn(&T x) -> vec[u8]; + +type writer[T] = unsafe obj { fn write(&T x); }; + +fn mk_writer[T](str path, + vec[fileflag] flags, + &formatter[T] fmt) + -> writer[T] +{ + unsafe obj w[T](buf_writer out, formatter[T] fmt) { + fn write(&T x) { + out.write(fmt(x)); + } + } + ret w[T](new_buf_writer(path, flags), fmt); +} + +/* TODO: int_writer, uint_writer, ... */ + +fn str_writer(str path, vec[fileflag] flags) -> writer[str] { + auto fmt = _str.bytes; // FIXME (issue #90) + ret mk_writer[str](path, flags, fmt); +} + +fn vec_writer[T](str path, + vec[fileflag] flags, + &formatter[T] inner) + -> writer[vec[T]] +{ + fn fmt[T](&vec[T] v, &formatter[T] inner) -> vec[u8] { + let vec[u8] res = _str.bytes("vec("); + auto first = true; + for (T x in v) { + if (!first) { + res += _str.bytes(", "); + } else { + first = false; + } + res += inner(x); + } + res += _str.bytes(")\n"); + ret res; + } + + ret mk_writer[vec[T]](path, flags, bind fmt[T](_, inner)); +} diff --git a/src/lib/_str.rs b/src/lib/_str.rs index 062d8bf1..8eed9a38 100644 --- a/src/lib/_str.rs +++ b/src/lib/_str.rs @@ -12,6 +12,18 @@ fn is_utf8(vec[u8] v) -> bool { fail; // FIXME } +fn is_ascii(str s) -> bool { + let uint i = len(s); + while (i > 0u) { + i -= 1u; + // FIXME (issue #94) + if ((s.(i as int) & 0x80u8) != 0u8) { + ret false; + } + } + ret true; +} + fn alloc(uint n_bytes) -> str { ret rustrt.str_alloc(n_bytes); } @@ -23,3 +35,10 @@ fn len(str s) -> uint { fn buf(str s) -> sbuf { ret rustrt.str_buf(s); } + +fn bytes(&str s) -> vec[u8] { + fn ith(str s, uint i) -> u8 { + ret s.(i as int); // FIXME (issue #94) + } + ret _vec.init_fn[u8](bind ith(s, _), _str.len(s)); +} -- cgit v1.2.3 From 935b4347e286b0022ae6f38b2875df6f05c55fa3 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Thu, 5 Aug 2010 10:10:39 -0700 Subject: Mop up workarounds in stdlib no longer required as issue #93 is closed. --- src/lib/_str.rs | 5 ++--- src/lib/deque.rs | 18 ++++++------------ src/lib/map.rs | 12 +++--------- 3 files changed, 11 insertions(+), 24 deletions(-) (limited to 'src/lib') diff --git a/src/lib/_str.rs b/src/lib/_str.rs index 8eed9a38..7d1a2dbd 100644 --- a/src/lib/_str.rs +++ b/src/lib/_str.rs @@ -16,8 +16,7 @@ fn is_ascii(str s) -> bool { let uint i = len(s); while (i > 0u) { i -= 1u; - // FIXME (issue #94) - if ((s.(i as int) & 0x80u8) != 0u8) { + if ((s.(i) & 0x80u8) != 0u8) { ret false; } } @@ -38,7 +37,7 @@ fn buf(str s) -> sbuf { fn bytes(&str s) -> vec[u8] { fn ith(str s, uint i) -> u8 { - ret s.(i as int); // FIXME (issue #94) + ret s.(i); } ret _vec.init_fn[u8](bind ith(s, _), _str.len(s)); } diff --git a/src/lib/deque.rs b/src/lib/deque.rs index bd42d7cb..bf7acb53 100644 --- a/src/lib/deque.rs +++ b/src/lib/deque.rs @@ -36,7 +36,7 @@ fn create[T]() -> t[T] { fn fill[T](uint i, uint nelts, uint lo, &vec[cell[T]] old) -> cell[T] { if (i < nelts) { - ret old.(((lo + i) % nelts) as int); + ret old.((lo + i) % nelts); } else { ret util.none[T](); } @@ -47,14 +47,8 @@ fn create[T]() -> t[T] { ret _vec.init_fn[cell[T]](copy_op, nalloc); } - /** - * FIXME (issue #94): We're converting to int every time we index into the - * vec, but we really want to index with the lo and hi uints that we have - * around. - */ - fn get[T](&vec[cell[T]] elts, uint i) -> T { - alt (elts.(i as int)) { + alt (elts.(i)) { case (util.some[T](t)) { ret t; } case (_) { fail; } } @@ -82,7 +76,7 @@ fn create[T]() -> t[T] { hi = nelts; } - elts.(lo as int) = util.some[T](t); + elts.(lo) = util.some[T](t); nelts += 1u; } @@ -93,7 +87,7 @@ fn create[T]() -> t[T] { hi = nelts; } - elts.(hi as int) = util.some[T](t); + elts.(hi) = util.some[T](t); hi = (hi + 1u) % _vec.len[cell[T]](elts); nelts += 1u; } @@ -104,7 +98,7 @@ fn create[T]() -> t[T] { */ fn pop_front() -> T { let T t = get[T](elts, lo); - elts.(lo as int) = util.none[T](); + elts.(lo) = util.none[T](); lo = (lo + 1u) % _vec.len[cell[T]](elts); ret t; } @@ -117,7 +111,7 @@ fn create[T]() -> t[T] { } let T t = get[T](elts, hi); - elts.(hi as int) = util.none[T](); + elts.(hi) = util.none[T](); ret t; } diff --git a/src/lib/map.rs b/src/lib/map.rs index f9574176..ff7b4411 100644 --- a/src/lib/map.rs +++ b/src/lib/map.rs @@ -75,8 +75,7 @@ fn mk_hashmap[K, V](&hashfn[K] hasher, &eqfn[K] eqer) -> hashmap[K, V] { { let uint i = 0u; while (i < nbkts) { - // FIXME (issue #94): as in find_common() - let int j = (hash[K](hasher, nbkts, key, i)) as int; + let uint j = (hash[K](hasher, nbkts, key, i)); alt (bkts.(j)) { case (some[K, V](k, _)) { if (eqer(key, k)) { @@ -103,8 +102,7 @@ fn mk_hashmap[K, V](&hashfn[K] hasher, &eqfn[K] eqer) -> hashmap[K, V] { { let uint i = 0u; while (i < nbkts) { - // FIXME (issue #94): Pending bugfix, remove uint coercion. - let int j = (hash[K](hasher, nbkts, key, i)) as int; + let uint j = (hash[K](hasher, nbkts, key, i)); alt (bkts.(j)) { case (some[K, V](k, v)) { if (eqer(key, k)) { @@ -149,9 +147,6 @@ fn mk_hashmap[K, V](&hashfn[K] hasher, &eqfn[K] eqer) -> hashmap[K, V] { if (!util.rational_leq(load, lf)) { let uint nnewbkts = _int.next_power_of_two(nbkts + 1u); - // FIXME (issue #94): Enforce our workaround to issue #94. - check ((nnewbkts as int) > 0); - let vec[mutable bucket[K, V]] newbkts = make_buckets[K, V](nnewbkts); rehash[K, V](hasher, eqer, bkts, nbkts, newbkts, nnewbkts); } @@ -183,8 +178,7 @@ fn mk_hashmap[K, V](&hashfn[K] hasher, &eqfn[K] eqer) -> hashmap[K, V] { fn remove(&K key) -> util.option[V] { let uint i = 0u; while (i < nbkts) { - // FIXME (issue #94): as in find_common() - let int j = (hash[K](hasher, nbkts, key, i)) as int; + let uint j = (hash[K](hasher, nbkts, key, i)); alt (bkts.(j)) { case (some[K, V](_, val)) { bkts.(j) = deleted[K, V](); -- cgit v1.2.3 From 80a1cd3d1e5e39db00a68ad6c1dc5686b775a4ad Mon Sep 17 00:00:00 2001 From: Roy Frostig Date: Fri, 6 Aug 2010 15:48:23 -0700 Subject: Redo yesterday's buf_writer-wrapper in a less silly and convoluted way. Add integer stringifying functions to _int module. --- src/lib/_int.rs | 44 +++++++++++++++++++++++++++++++++++++++++++ src/lib/_io.rs | 58 +++++++++++++++------------------------------------------ 2 files changed, 59 insertions(+), 43 deletions(-) (limited to 'src/lib') diff --git a/src/lib/_int.rs b/src/lib/_int.rs index 9b756675..03017259 100644 --- a/src/lib/_int.rs +++ b/src/lib/_int.rs @@ -44,3 +44,47 @@ fn next_power_of_two(uint n) -> uint { } ret tmp + 1u; } + +fn uto_string(mutable uint n, uint radix) -> str +{ + check (0u < radix && radix <= 16u); + fn digit(uint n) -> str { + alt (n) { + case (0u) { ret "0"; } + case (1u) { ret "1"; } + case (2u) { ret "2"; } + case (3u) { ret "3"; } + case (4u) { ret "4"; } + case (5u) { ret "5"; } + case (6u) { ret "6"; } + case (7u) { ret "7"; } + case (8u) { ret "8"; } + case (9u) { ret "9"; } + case (10u) { ret "A"; } + case (11u) { ret "B"; } + case (12u) { ret "C"; } + case (13u) { ret "D"; } + case (14u) { ret "E"; } + case (15u) { ret "F"; } + } + } + + if (n == 0u) { ret "0"; } + + let str s = ""; + while (n > 0u) { + s = digit(n % radix) + s; + n /= radix; + } + ret s; +} + +fn to_string(mutable int n, uint radix) -> str +{ + check (0u < radix && radix <= 16u); + if (n < 0) { + ret "-" + uto_string((-n) as uint, radix); + } else { + ret uto_string(n as uint, radix); + } +} diff --git a/src/lib/_io.rs b/src/lib/_io.rs index b0b0c313..dbd60e63 100644 --- a/src/lib/_io.rs +++ b/src/lib/_io.rs @@ -112,49 +112,21 @@ fn new_buf_writer(str path, vec[fileflag] flags) -> buf_writer { ret fd_buf_writer(fd); } -type formatter[T] = fn(&T x) -> vec[u8]; - -type writer[T] = unsafe obj { fn write(&T x); }; - -fn mk_writer[T](str path, - vec[fileflag] flags, - &formatter[T] fmt) - -> writer[T] -{ - unsafe obj w[T](buf_writer out, formatter[T] fmt) { - fn write(&T x) { - out.write(fmt(x)); - } - } - ret w[T](new_buf_writer(path, flags), fmt); -} - -/* TODO: int_writer, uint_writer, ... */ - -fn str_writer(str path, vec[fileflag] flags) -> writer[str] { - auto fmt = _str.bytes; // FIXME (issue #90) - ret mk_writer[str](path, flags, fmt); -} - -fn vec_writer[T](str path, - vec[fileflag] flags, - &formatter[T] inner) - -> writer[vec[T]] +type writer = + unsafe obj { + fn write_str(str s); + fn write_int(int n); + fn write_uint(uint n); + }; + +fn file_writer(str path, + vec[fileflag] flags) + -> writer { - fn fmt[T](&vec[T] v, &formatter[T] inner) -> vec[u8] { - let vec[u8] res = _str.bytes("vec("); - auto first = true; - for (T x in v) { - if (!first) { - res += _str.bytes(", "); - } else { - first = false; - } - res += inner(x); - } - res += _str.bytes(")\n"); - ret res; + unsafe obj fw(buf_writer out) { + fn write_str(str s) { out.write(_str.bytes(s)); } + fn write_int(int n) { out.write(_str.bytes(_int.to_string(n, 10u))); } + fn write_uint(uint n) { out.write(_str.bytes(_int.uto_string(n, 10u))); } } - - ret mk_writer[vec[T]](path, flags, bind fmt[T](_, inner)); + ret fw(new_buf_writer(path, flags)); } -- cgit v1.2.3 From 581a95a804f77259153c030d39f861282b468612 Mon Sep 17 00:00:00 2001 From: Jeffrey Yasskin Date: Sat, 24 Jul 2010 16:01:34 -0700 Subject: Add an int->str conversion function. The test currently fails because string equality isn't implemented. --- src/lib/_int.rs | 20 ++++++++++---------- src/lib/_io.rs | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/lib') diff --git a/src/lib/_int.rs b/src/lib/_int.rs index 03017259..e76c2bf5 100644 --- a/src/lib/_int.rs +++ b/src/lib/_int.rs @@ -45,7 +45,7 @@ fn next_power_of_two(uint n) -> uint { ret tmp + 1u; } -fn uto_string(mutable uint n, uint radix) -> str +fn uto_str(mutable uint n, uint radix) -> str { check (0u < radix && radix <= 16u); fn digit(uint n) -> str { @@ -60,12 +60,12 @@ fn uto_string(mutable uint n, uint radix) -> str case (7u) { ret "7"; } case (8u) { ret "8"; } case (9u) { ret "9"; } - case (10u) { ret "A"; } - case (11u) { ret "B"; } - case (12u) { ret "C"; } - case (13u) { ret "D"; } - case (14u) { ret "E"; } - case (15u) { ret "F"; } + case (10u) { ret "a"; } + case (11u) { ret "b"; } + case (12u) { ret "c"; } + case (13u) { ret "d"; } + case (14u) { ret "e"; } + case (15u) { ret "f"; } } } @@ -79,12 +79,12 @@ fn uto_string(mutable uint n, uint radix) -> str ret s; } -fn to_string(mutable int n, uint radix) -> str +fn to_str(mutable int n, uint radix) -> str { check (0u < radix && radix <= 16u); if (n < 0) { - ret "-" + uto_string((-n) as uint, radix); + ret "-" + uto_str((-n) as uint, radix); } else { - ret uto_string(n as uint, radix); + ret uto_str(n as uint, radix); } } diff --git a/src/lib/_io.rs b/src/lib/_io.rs index dbd60e63..93d06d41 100644 --- a/src/lib/_io.rs +++ b/src/lib/_io.rs @@ -125,8 +125,8 @@ fn file_writer(str path, { unsafe obj fw(buf_writer out) { fn write_str(str s) { out.write(_str.bytes(s)); } - fn write_int(int n) { out.write(_str.bytes(_int.to_string(n, 10u))); } - fn write_uint(uint n) { out.write(_str.bytes(_int.uto_string(n, 10u))); } + fn write_int(int n) { out.write(_str.bytes(_int.to_str(n, 10u))); } + fn write_uint(uint n) { out.write(_str.bytes(_int.uto_str(n, 10u))); } } ret fw(new_buf_writer(path, flags)); } -- cgit v1.2.3 From 3f6e8ffe64b57b0eaba6812208e94500422ca40c Mon Sep 17 00:00:00 2001 From: Jeffrey Yasskin Date: Sun, 25 Jul 2010 00:36:03 -0700 Subject: Implement _str.len() to return the number of bytes, rename it to byte_len(), and add a test. --- src/lib/_str.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src/lib') diff --git a/src/lib/_str.rs b/src/lib/_str.rs index 7d1a2dbd..a607c7d5 100644 --- a/src/lib/_str.rs +++ b/src/lib/_str.rs @@ -3,7 +3,7 @@ import rustrt.sbuf; native "rust" mod rustrt { type sbuf; fn str_buf(str s) -> sbuf; - fn str_len(str s) -> uint; + fn str_byte_len(str s) -> uint; fn str_alloc(uint n_bytes) -> str; fn refcount[T](str s) -> uint; } @@ -13,7 +13,7 @@ fn is_utf8(vec[u8] v) -> bool { } fn is_ascii(str s) -> bool { - let uint i = len(s); + let uint i = byte_len(s); while (i > 0u) { i -= 1u; if ((s.(i) & 0x80u8) != 0u8) { @@ -27,8 +27,13 @@ fn alloc(uint n_bytes) -> str { ret rustrt.str_alloc(n_bytes); } -fn len(str s) -> uint { - ret rustrt.str_len(s); +// Returns the number of bytes (a.k.a. UTF-8 code units) in s. +// Contrast with a function that would return the number of code +// points (char's), combining character sequences, words, etc. See +// http://icu-project.org/apiref/icu4c/classBreakIterator.html for a +// way to implement those. +fn byte_len(str s) -> uint { + ret rustrt.str_byte_len(s); } fn buf(str s) -> sbuf { @@ -39,5 +44,5 @@ fn bytes(&str s) -> vec[u8] { fn ith(str s, uint i) -> u8 { ret s.(i); } - ret _vec.init_fn[u8](bind ith(s, _), _str.len(s)); + ret _vec.init_fn[u8](bind ith(s, _), _str.byte_len(s)); } -- cgit v1.2.3