diff options
| author | Marijn Haverbeke <[email protected]> | 2011-05-12 17:24:54 +0200 |
|---|---|---|
| committer | Marijn Haverbeke <[email protected]> | 2011-05-12 21:30:44 +0200 |
| commit | 3816e57fd2a8ab19e4ac6d4b3ddd5b49d5973ff2 (patch) | |
| tree | 508982ed2f789aedd89eebd529343d9dc88b8e01 /src/lib/extfmt.rs | |
| parent | Transitional change to make extfmt output lowercase module name (diff) | |
| download | rust-3816e57fd2a8ab19e4ac6d4b3ddd5b49d5973ff2.tar.xz rust-3816e57fd2a8ab19e4ac6d4b3ddd5b49d5973ff2.zip | |
Downcase std modules again, move to :: for module dereferencing
This should be a snapshot transition.
Diffstat (limited to 'src/lib/extfmt.rs')
| -rw-r--r-- | src/lib/extfmt.rs | 568 |
1 files changed, 568 insertions, 0 deletions
diff --git a/src/lib/extfmt.rs b/src/lib/extfmt.rs new file mode 100644 index 00000000..c5691fe9 --- /dev/null +++ b/src/lib/extfmt.rs @@ -0,0 +1,568 @@ +/* The 'fmt' extension is modeled on the posix printf system. + * + * A posix conversion ostensibly looks like this: + * + * %[parameter][flags][width][.precision][length]type + * + * Given the different numeric type bestiary we have, we omit the 'length' + * parameter and support slightly different conversions for 'type': + * + * %[parameter][flags][width][.precision]type + * + * we also only support translating-to-rust a tiny subset of the possible + * combinations at the moment. + */ + +import option::none; +import option::some; + +/* + * We have a CT (compile-time) module that parses format strings into a + * sequence of conversions. From those conversions AST fragments are built + * that call into properly-typed functions in the RT (run-time) module. Each + * of those run-time conversion functions accepts another conversion + * description that specifies how to format its output. + * + * The building of the AST is currently done in a module inside the compiler, + * but should migrate over here as the plugin interface is defined. + */ + +// Functions used by the fmt extension at compile time +mod CT { + tag signedness { + signed; + unsigned; + } + + tag caseness { + case_upper; + case_lower; + } + + tag ty { + ty_bool; + ty_str; + ty_char; + ty_int(signedness); + ty_bits; + ty_hex(caseness); + ty_octal; + // FIXME: More types + } + + tag flag { + flag_left_justify; + flag_left_zero_pad; + flag_space_for_sign; + flag_sign_always; + flag_alternate; + } + + tag count { + count_is(int); + count_is_param(int); + count_is_next_param; + count_implied; + } + + // A formatted conversion from an expression to a string + type conv = rec(option::t[int] param, + vec[flag] flags, + count width, + count precision, + ty ty); + + // A fragment of the output sequence + tag piece { + piece_string(str); + piece_conv(conv); + } + + fn parse_fmt_string(str s) -> vec[piece] { + let vec[piece] pieces = vec(); + auto lim = _str::byte_len(s); + auto buf = ""; + + fn flush_buf(str buf, &vec[piece] pieces) -> str { + if (_str::byte_len(buf) > 0u) { + auto piece = piece_string(buf); + pieces += vec(piece); + } + ret ""; + } + + auto i = 0u; + while (i < lim) { + auto curr = _str::substr(s, i, 1u); + if (_str::eq(curr, "%")) { + i += 1u; + if (i >= lim) { + log_err "unterminated conversion at end of string"; + fail; + } + auto curr2 = _str::substr(s, i, 1u); + if (_str::eq(curr2, "%")) { + i += 1u; + } else { + buf = flush_buf(buf, pieces); + auto res = parse_conversion(s, i, lim); + pieces += vec(res._0); + i = res._1; + } + } else { + buf += curr; + i += 1u; + } + } + buf = flush_buf(buf, pieces); + ret pieces; + } + + fn peek_num(str s, uint i, uint lim) -> option::t[tup(uint, uint)] { + if (i >= lim) { + ret none[tup(uint, uint)]; + } + + auto c = s.(i); + if (!('0' as u8 <= c && c <= '9' as u8)) { + ret option::none[tup(uint, uint)]; + } + + auto n = (c - ('0' as u8)) as uint; + alt (peek_num(s, i + 1u, lim)) { + case (none[tup(uint, uint)]) { + ret some[tup(uint, uint)](tup(n, i + 1u)); + } + case (some[tup(uint, uint)](?next)) { + auto m = next._0; + auto j = next._1; + ret some[tup(uint, uint)](tup(n * 10u + m, j)); + } + } + + } + + fn parse_conversion(str s, uint i, uint lim) -> tup(piece, uint) { + auto parm = parse_parameter(s, i, lim); + auto flags = parse_flags(s, parm._1, lim); + auto width = parse_count(s, flags._1, lim); + auto prec = parse_precision(s, width._1, lim); + auto ty = parse_type(s, prec._1, lim); + ret tup(piece_conv(rec(param = parm._0, + flags = flags._0, + width = width._0, + precision = prec._0, + ty = ty._0)), + ty._1); + } + + fn parse_parameter(str s, uint i, uint lim) -> tup(option::t[int], uint) { + if (i >= lim) { + ret tup(none[int], i); + } + + auto num = peek_num(s, i, lim); + alt (num) { + case (none[tup(uint, uint)]) { + ret tup(none[int], i); + } + case (some[tup(uint, uint)](?t)) { + auto n = t._0; + auto j = t._1; + if (j < lim && s.(j) == '$' as u8) { + ret tup(some[int](n as int), j + 1u); + } + else { + ret tup(none[int], i); + } + } + } + } + + fn parse_flags(str s, uint i, uint lim) -> tup(vec[flag], uint) { + let vec[flag] noflags = vec(); + + if (i >= lim) { + ret tup(noflags, i); + } + + fn more_(flag f, str s, uint i, uint lim) -> tup(vec[flag], uint) { + auto next = parse_flags(s, i + 1u, lim); + auto rest = next._0; + auto j = next._1; + let vec[flag] curr = vec(f); + ret tup(curr + rest, j); + } + + auto more = bind more_(_, s, i, lim); + + auto f = s.(i); + if (f == ('-' as u8)) { + ret more(flag_left_justify); + } else if (f == ('0' as u8)) { + ret more(flag_left_zero_pad); + } else if (f == (' ' as u8)) { + ret more(flag_space_for_sign); + } else if (f == ('+' as u8)) { + ret more(flag_sign_always); + } else if (f == ('#' as u8)) { + ret more(flag_alternate); + } else { + ret tup(noflags, i); + } + } + + fn parse_count(str s, uint i, uint lim) -> tup(count, uint) { + if (i >= lim) { + ret tup(count_implied, i); + } + + if (s.(i) == ('*' as u8)) { + auto param = parse_parameter(s, i + 1u, lim); + auto j = param._1; + alt (param._0) { + case (none[int]) { + ret tup(count_is_next_param, j); + } + case (some[int](?n)) { + ret tup(count_is_param(n), j); + } + } + } else { + auto num = peek_num(s, i, lim); + alt (num) { + case (none[tup(uint, uint)]) { + ret tup(count_implied, i); + } + case (some[tup(uint, uint)](?num)) { + ret tup(count_is(num._0 as int), num._1); + } + } + } + } + + fn parse_precision(str s, uint i, uint lim) -> tup(count, uint) { + if (i >= lim) { + ret tup(count_implied, i); + } + + if (s.(i) == '.' as u8) { + auto count = parse_count(s, i + 1u, lim); + // If there were no digits specified, i.e. the precision + // was ".", then the precision is 0 + alt (count._0) { + case (count_implied) { + ret tup(count_is(0), count._1); + } + case (_) { + ret count; + } + } + } else { + ret tup(count_implied, i); + } + } + + fn parse_type(str s, uint i, uint lim) -> tup(ty, uint) { + if (i >= lim) { + log_err "missing type in conversion"; + fail; + } + + auto t; + auto tstr = _str::substr(s, i, 1u); + if (_str::eq(tstr, "b")) { + t = ty_bool; + } else if (_str::eq(tstr, "s")) { + t = ty_str; + } else if (_str::eq(tstr, "c")) { + t = ty_char; + } else if (_str::eq(tstr, "d") + || _str::eq(tstr, "i")) { + // TODO: Do we really want two signed types here? + // How important is it to be printf compatible? + t = ty_int(signed); + } else if (_str::eq(tstr, "u")) { + t = ty_int(unsigned); + } else if (_str::eq(tstr, "x")) { + t = ty_hex(case_lower); + } else if (_str::eq(tstr, "X")) { + t = ty_hex(case_upper); + } else if (_str::eq(tstr, "t")) { + t = ty_bits; + } else if (_str::eq(tstr, "o")) { + t = ty_octal; + } else { + log_err "unknown type in conversion"; + fail; + } + + ret tup(t, i + 1u); + } +} + +// Functions used by the fmt extension at runtime. For now there are a lot of +// decisions made a runtime. If it proves worthwhile then some of these +// conditions can be evaluated at compile-time. For now though it's cleaner to +// implement it this way, I think. +mod RT { + + tag flag { + flag_left_justify; + flag_left_zero_pad; + flag_space_for_sign; + flag_sign_always; + flag_alternate; + // FIXME: This is a hack to avoid creating 0-length vec exprs, + // which have some difficulty typechecking currently. See + // comments in front::extfmt::make_flags + flag_none; + } + + tag count { + count_is(int); + count_implied; + } + + tag ty { + ty_default; + ty_bits; + ty_hex_upper; + ty_hex_lower; + ty_octal; + } + + // FIXME: May not want to use a vector here for flags; + // instead just use a bool per flag + type conv = rec(vec[flag] flags, + count width, + count precision, + ty ty); + + fn conv_int(&conv cv, int i) -> str { + auto radix = 10u; + auto prec = get_int_precision(cv); + auto s = int_to_str_prec(i, radix, prec); + if (0 <= i) { + if (have_flag(cv.flags, flag_sign_always)) { + s = "+" + s; + } else if (have_flag(cv.flags, flag_space_for_sign)) { + s = " " + s; + } + } + ret pad(cv, s, pad_signed); + } + + fn conv_uint(&conv cv, uint u) -> str { + auto prec = get_int_precision(cv); + auto res; + alt (cv.ty) { + case (ty_default) { + res = uint_to_str_prec(u, 10u, prec); + } + case (ty_hex_lower) { + res = uint_to_str_prec(u, 16u, prec); + } + case (ty_hex_upper) { + res = _str::to_upper(uint_to_str_prec(u, 16u, prec)); + } + case (ty_bits) { + res = uint_to_str_prec(u, 2u, prec); + } + case (ty_octal) { + res = uint_to_str_prec(u, 8u, prec); + } + } + ret pad(cv, res, pad_unsigned); + } + + fn conv_bool(&conv cv, bool b) -> str { + auto s; + if (b) { + s = "true"; + } else { + s = "false"; + } + // run the boolean conversion through the string conversion logic, + // giving it the same rules for precision, etc. + ret conv_str(cv, s); + } + + fn conv_char(&conv cv, char c) -> str { + ret pad(cv, _str::from_char(c), pad_nozero); + } + + fn conv_str(&conv cv, str s) -> str { + auto unpadded = s; + alt (cv.precision) { + case (count_implied) { + } + case (count_is(?max)) { + // For strings, precision is the maximum characters displayed + if (max as uint < _str::char_len(s)) { + // FIXME: substr works on bytes, not chars! + unpadded = _str::substr(s, 0u, max as uint); + } + } + } + ret pad(cv, unpadded, pad_nozero); + } + + // Convert an int to string with minimum number of digits. If precision is + // 0 and num is 0 then the result is the empty string. + fn int_to_str_prec(int num, uint radix, uint prec) -> str { + if (num < 0) { + ret "-" + uint_to_str_prec((-num) as uint, radix, prec); + } else { + ret uint_to_str_prec(num as uint, radix, prec); + } + } + + // Convert a uint to string with a minimum number of digits. If precision + // is 0 and num is 0 then the result is the empty string. Could move this + // to _uint: but it doesn't seem all that useful. + fn uint_to_str_prec(uint num, uint radix, uint prec) -> str { + auto s; + + if (prec == 0u && num == 0u) { + s = ""; + } else { + s = _uint::to_str(num, radix); + auto len = _str::char_len(s); + if (len < prec) { + auto diff = prec - len; + auto pad = str_init_elt('0', diff); + s = pad + s; + } + } + + ret s; + } + + fn get_int_precision(&conv cv) -> uint { + alt (cv.precision) { + case (count_is(?c)) { + ret c as uint; + } + case (count_implied) { + ret 1u; + } + } + } + + // FIXME: This might be useful in _str: but needs to be utf8 safe first + fn str_init_elt(char c, uint n_elts) -> str { + auto svec = _vec::init_elt[u8](c as u8, n_elts); + // FIXME: Using unsafe_from_bytes because rustboot + // can't figure out the is_utf8 predicate on from_bytes? + ret _str::unsafe_from_bytes(svec); + } + + tag pad_mode { + pad_signed; + pad_unsigned; + pad_nozero; + } + + fn pad(&conv cv, str s, pad_mode mode) -> str { + auto uwidth; + alt (cv.width) { + case (count_implied) { + ret s; + } + case (count_is(?width)) { + // FIXME: Maybe width should be uint + uwidth = width as uint; + } + } + + auto strlen = _str::char_len(s); + if (uwidth <= strlen) { + ret s; + } + + auto padchar = ' '; + auto diff = uwidth - strlen; + if (have_flag(cv.flags, flag_left_justify)) { + auto padstr = str_init_elt(padchar, diff); + ret s + padstr; + } + + auto might_zero_pad = false; + auto signed = false; + + alt (mode) { + case (pad_nozero) { + // fallthrough + } + case (pad_signed) { + might_zero_pad = true; + signed = true; + } + case (pad_unsigned) { + might_zero_pad = true; + } + } + + fn have_precision(&conv cv) -> bool { + alt (cv.precision) { + case (count_implied) { + ret false; + } + case (_) { + ret true; + } + } + } + + auto zero_padding = false; + if (might_zero_pad + && have_flag(cv.flags, flag_left_zero_pad) + && !have_precision(cv)) { + + padchar = '0'; + zero_padding = true; + } + + auto padstr = str_init_elt(padchar, diff); + + // This is completely heinous. If we have a signed value then + // potentially rip apart the intermediate result and insert some + // zeros. It may make sense to convert zero padding to a precision + // instead. + if (signed + && zero_padding + && _str::byte_len(s) > 0u) { + + auto head = s.(0); + if (head == '+' as u8 + || head == '-' as u8 + || head == ' ' as u8) { + + auto headstr = _str::unsafe_from_bytes(vec(head)); + auto bytelen = _str::byte_len(s); + auto numpart = _str::substr(s, 1u, bytelen - 1u); + ret headstr + padstr + numpart; + } + } + ret padstr + s; + } + + fn have_flag(vec[flag] flags, flag f) -> bool { + for (flag candidate in flags) { + if (candidate == f) { + ret true; + } + } + ret false; + } +} + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C .. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: |