aboutsummaryrefslogtreecommitdiff
path: root/src/lib/io.rs
diff options
context:
space:
mode:
authorMarijn Haverbeke <[email protected]>2011-03-24 12:11:32 +0100
committerGraydon Hoare <[email protected]>2011-03-25 08:22:52 -0700
commita0455144774de6c9dc0ff0e87fe4352f8a70cac3 (patch)
treea487499a7e61e5fbda47d93eba806fb02373d1ed /src/lib/io.rs
parentfix pretty-printer (diff)
downloadrust-a0455144774de6c9dc0ff0e87fe4352f8a70cac3.tar.xz
rust-a0455144774de6c9dc0ff0e87fe4352f8a70cac3.zip
Start making the standard-lib utf-8 aware
Finally implements _str.is_utf8, adds from_chars, from_char, to_chars, char_at, char_len, (push|pop|shift|unshift)_char. Also, proper character I/O for streams.
Diffstat (limited to 'src/lib/io.rs')
-rw-r--r--src/lib/io.rs91
1 files changed, 57 insertions, 34 deletions
diff --git a/src/lib/io.rs b/src/lib/io.rs
index dea15a27..39399aaa 100644
--- a/src/lib/io.rs
+++ b/src/lib/io.rs
@@ -7,16 +7,16 @@ native "rust" mod rustrt {
// Reading
-// TODO This is all buffered. We might need an unbuffered variant as well
+// FIXME This is all buffered. We might need an unbuffered variant as well
tag seek_style {seek_set; seek_end; seek_cur;}
type reader =
state obj {
- impure fn read_byte() -> u8;
+ impure fn read_byte() -> int;
+ impure fn unread_byte(int byte);
impure fn read_bytes(uint len) -> vec[u8];
- impure fn read_char() -> int;
- impure fn unread_char(int i);
+ impure fn read_char() -> char;
impure fn eof() -> bool;
impure fn read_line() -> str;
impure fn read_c_str() -> str;
@@ -24,7 +24,7 @@ type reader =
impure fn read_le_int(uint size) -> int;
impure fn seek(int offset, seek_style whence);
- impure fn tell() -> uint; // TODO: eventually u64
+ impure fn tell() -> uint; // FIXME: eventually u64
};
fn convert_whence(seek_style whence) -> int {
@@ -36,8 +36,11 @@ fn convert_whence(seek_style whence) -> int {
}
state obj FILE_reader(os.libc.FILE f, bool must_close) {
- impure fn read_byte() -> u8 {
- ret os.libc.fgetc(f) as u8;
+ impure fn read_byte() -> int {
+ ret os.libc.fgetc(f);
+ }
+ impure fn unread_byte(int byte) {
+ os.libc.ungetc(byte, f);
}
impure fn read_bytes(uint len) -> vec[u8] {
auto buf = _vec.alloc[u8](len);
@@ -45,12 +48,26 @@ state obj FILE_reader(os.libc.FILE f, bool must_close) {
_vec.len_set[u8](buf, read);
ret buf;
}
- impure fn read_char() -> int {
- ret os.libc.fgetc(f);
- }
- impure fn unread_char(int ch) {
- os.libc.ungetc(ch, f);
- }
+ impure fn read_char() -> char {
+ auto c0 = os.libc.fgetc(f);
+ if (c0 == -1) {ret -1 as char;} // FIXME will this stay valid?
+ auto b0 = c0 as u8;
+ auto w = _str.utf8_char_width(b0);
+ check(w > 0u);
+ if (w == 1u) {ret b0 as char;}
+ auto val = 0u;
+ while (w > 1u) {
+ w -= 1u;
+ auto next = os.libc.fgetc(f);
+ check(next > -1);
+ check(next & 0xc0 == 0x80);
+ val <<= 6u;
+ val += (next & 0x3f) as uint;
+ }
+ // See _str.char_at
+ val += ((b0 << ((w + 1u) as u8)) as uint) << ((w - 1u) * 6u - w - 1u);
+ ret val as char;
+ }
impure fn eof() -> bool {
auto ch = os.libc.fgetc(f);
if (ch == -1) {ret true;}
@@ -58,25 +75,27 @@ state obj FILE_reader(os.libc.FILE f, bool must_close) {
ret false;
}
impure fn read_line() -> str {
- auto buf = "";
- while (true) {
- auto ch = os.libc.fgetc(f);
- if (ch == -1) { ret buf; }
- if (ch == 10) { ret buf; }
- buf += _str.unsafe_from_bytes(vec(ch as u8));
- }
- ret buf;
+ let vec[u8] buf = vec();
+ // No break yet in rustc
+ auto go_on = true;
+ while (go_on) {
+ auto ch = os.libc.fgetc(f);
+ if (ch == -1 || ch == 10) {go_on = false;}
+ else {_vec.push[u8](buf, ch as u8);}
+ }
+ ret _str.unsafe_from_bytes(buf);
}
impure fn read_c_str() -> str {
- auto buf = "";
- while (true) {
+ let vec[u8] buf = vec();
+ auto go_on = true;
+ while (go_on) {
auto ch = os.libc.fgetc(f);
- if (ch < 1) { ret buf; }
- buf += _str.unsafe_from_bytes(vec(ch as u8));
+ if (ch < 1) {go_on = false;}
+ else {_vec.push[u8](buf, ch as u8);}
}
- ret buf;
+ ret _str.unsafe_from_bytes(buf);
}
- // TODO deal with eof?
+ // FIXME deal with eof?
impure fn read_le_uint(uint size) -> uint {
auto val = 0u;
auto pos = 0u;
@@ -95,7 +114,7 @@ state obj FILE_reader(os.libc.FILE f, bool must_close) {
pos += 8u;
size -= 1u;
}
- ret val as int; // TODO does that work?
+ ret val as int;
}
impure fn seek(int offset, seek_style whence) {
check(os.libc.fseek(f, offset, convert_whence(whence)) == 0);
@@ -123,8 +142,6 @@ fn file_reader(str path) -> reader {
// Writing
-// TODO This is all unbuffered. We might need a buffered variant as well
-
tag fileflag {
append;
create;
@@ -136,7 +153,7 @@ type buf_writer = state obj {
fn write(vec[u8] v);
fn seek(int offset, seek_style whence);
- fn tell() -> uint; // TODO: eventually u64
+ fn tell() -> uint; // FIXME: eventually u64
};
state obj FILE_writer(os.libc.FILE f, bool must_close) {
@@ -224,7 +241,10 @@ fn file_buf_writer(str path, vec[fileflag] flags) -> buf_writer {
type writer =
state obj {
fn get_buf_writer() -> buf_writer;
+ // write_str will continue to do utf-8 output only. an alternative
+ // function will be provided for general encoded string output
impure fn write_str(str s);
+ impure fn write_char(char ch);
impure fn write_int(int n);
impure fn write_uint(uint n);
impure fn write_bytes(vec[u8] bytes);
@@ -249,6 +269,10 @@ state obj new_writer(buf_writer out) {
impure fn write_str(str s) {
out.write(_str.bytes(s));
}
+ impure fn write_char(char ch) {
+ // FIXME needlessly consy
+ out.write(_str.bytes(_str.from_char(ch)));
+ }
impure fn write_int(int n) {
out.write(_str.bytes(_int.to_str(n, 10u)));
}
@@ -275,7 +299,7 @@ fn file_writer(str path, vec[fileflag] flags) -> writer {
ret new_writer(file_buf_writer(path, flags));
}
-// TODO: fileflags
+// FIXME: fileflags
fn buffered_file_buf_writer(str path) -> buf_writer {
auto f = os.libc.fopen(_str.buf(path), _str.buf("w"));
if (f as uint == 0u) {
@@ -300,7 +324,7 @@ type byte_buf = @rec(mutable vec[mutable u8] buf, mutable uint pos);
state obj byte_buf_writer(byte_buf buf) {
fn write(vec[u8] v) {
- // TODO: optimize
+ // FIXME: optimize
auto vlen = _vec.len[u8](v);
auto vpos = 0u;
while (vpos < vlen) {
@@ -336,7 +360,6 @@ state obj byte_buf_writer(byte_buf buf) {
fn tell() -> uint { ret buf.pos; }
}
-// TODO awkward! it's not possible to implement a writer with an extra method
fn string_writer() -> str_writer {
// FIXME: yikes, this is bad. Needs fixing of mutable syntax.
let vec[mutable u8] b = vec(mutable 0u8);