diff options
| author | Graydon Hoare <[email protected]> | 2011-04-29 15:26:28 +0000 |
|---|---|---|
| committer | Graydon Hoare <[email protected]> | 2011-04-29 15:26:28 +0000 |
| commit | a2f68b2d585f0b467f0911d162f3cb9bc7d1ad14 (patch) | |
| tree | d91937cb020dd2e69aa94bbe987b4851cf9c1d4f | |
| parent | rustc: Fix vec append glue for strings. Add a test case. (diff) | |
| download | rust-a2f68b2d585f0b467f0911d162f3cb9bc7d1ad14.tar.xz rust-a2f68b2d585f0b467f0911d162f3cb9bc7d1ad14.zip | |
Intern metadata while writing, shrink stage1 from 12mb to 5.7mb.
| -rw-r--r-- | src/comp/front/creader.rs | 65 | ||||
| -rw-r--r-- | src/comp/middle/metadata.rs | 208 | ||||
| -rw-r--r-- | src/comp/middle/trans.rs | 15 | ||||
| -rw-r--r-- | src/comp/middle/ty.rs | 30 |
4 files changed, 224 insertions, 94 deletions
diff --git a/src/comp/front/creader.rs b/src/comp/front/creader.rs index 3ecf3058..f48d47c9 100644 --- a/src/comp/front/creader.rs +++ b/src/comp/front/creader.rs @@ -43,34 +43,29 @@ tag resolve_result { // Compact string representation for ty.t values. API ty_str & parse_from_str. // (The second has to be authed pure.) Extra parameters are for converting -// to/from def_ids in the string rep. Whatever format you choose should not +// to/from def_ids in the data buffer. Whatever format you choose should not // contain pipe characters. // Callback to translate defs to strs or back. type str_def = fn(str) -> ast.def_id; -type pstate = rec(str rep, mutable uint pos, uint len, ty.ctxt tcx); +type pstate = rec(vec[u8] data, int crate, + mutable uint pos, uint len, ty.ctxt tcx); fn peek(@pstate st) -> u8 { - if (st.pos < st.len) {ret st.rep.(st.pos) as u8;} - else {ret ' ' as u8;} + ret st.data.(st.pos); } fn next(@pstate st) -> u8 { - if (st.pos >= st.len) {fail;} - auto ch = st.rep.(st.pos); + auto ch = st.data.(st.pos); st.pos = st.pos + 1u; - ret ch as u8; + ret ch; } -fn parse_ty_str(str rep, str_def sd, ty.ctxt tcx) -> ty.t { - auto len = _str.byte_len(rep); - auto st = @rec(rep=rep, mutable pos=0u, len=len, tcx=tcx); +fn parse_ty_data(vec[u8] data, int crate_num, uint pos, uint len, + str_def sd, ty.ctxt tcx) -> ty.t { + auto st = @rec(data=data, crate=crate_num, + mutable pos=pos, len=len, tcx=tcx); auto result = parse_ty(st, sd); - if (st.pos != len) { - log_err "parse_ty_str: incomplete parse, stopped at byte " - + _uint.to_str(st.pos, 10u) + " of " - + _uint.to_str(len, 10u) + " in str '" + rep + "'"; - } ret result; } @@ -178,6 +173,26 @@ fn parse_ty(@pstate st, str_def sd) -> ty.t { case ('X') { ret ty.mk_var(st.tcx, parse_int(st)); } case ('E') { ret ty.mk_native(st.tcx); } case ('Y') { ret ty.mk_type(st.tcx); } + case ('#') { + auto pos = parse_hex(st); + check (next(st) as char == ':'); + auto len = parse_hex(st); + check (next(st) as char == '#'); + alt (st.tcx.rcache.find(tup(st.crate,pos,len))) { + case (some[ty.t](?tt)) { ret tt; } + case (none[ty.t]) { + auto ps = @rec(pos=pos, len=len with *st); + auto tt = parse_ty(ps, sd); + st.tcx.rcache.insert(tup(st.crate,pos,len), tt); + ret tt; + } + } + } + case (?c) { + log_err "unexpected char in type string: "; + log_err c; + fail; + } } } @@ -212,6 +227,23 @@ fn parse_int(@pstate st) -> int { ret n; } +fn parse_hex(@pstate st) -> uint { + auto n = 0u; + while (true) { + auto cur = peek(st) as char; + if ((cur < '0' || cur > '9') && + (cur < 'a' || cur > 'f')) {break;} + st.pos = st.pos + 1u; + n *= 16u; + if ('0' <= cur && cur <= '9') { + n += (cur as uint) - ('0' as uint); + } else { + n += (10u + (cur as uint) - ('a' as uint)); + } + } + ret n; +} + fn parse_ty_fn(@pstate st, str_def sd) -> tup(vec[ty.arg], ty.t) { check(next(st) as char == '['); let vec[ty.arg] inputs = vec(); @@ -343,7 +375,8 @@ fn item_type(&ebml.doc item, int this_cnum, ty.ctxt tcx) -> ty.t { auto tp = ebml.get_doc(item, metadata.tag_items_data_item_type); auto s = _str.unsafe_from_bytes(ebml.doc_data(tp)); - ret parse_ty_str(s, bind parse_external_def_id(this_cnum, _), tcx); + ret parse_ty_data(item.data, this_cnum, tp.start, tp.end - tp.start, + bind parse_external_def_id(this_cnum, _), tcx); } fn item_ty_param_count(&ebml.doc item, int this_cnum) -> uint { diff --git a/src/comp/middle/metadata.rs b/src/comp/middle/metadata.rs index bd1d5253..0cfe09b7 100644 --- a/src/comp/middle/metadata.rs +++ b/src/comp/middle/metadata.rs @@ -1,9 +1,12 @@ import std._str; import std._uint; import std._vec; +import std.map.hashmap; import std.ebml; import std.io; import std.option; +import std.option.some; +import std.option.none; import front.ast; import middle.fold; @@ -48,119 +51,185 @@ const uint tag_index_table = 0x15u; // Extra parameters are for converting to/from def_ids in the string rep. // Whatever format you choose should not contain pipe characters. +type ty_abbrev = rec(uint pos, uint len, str s); + mod Encode { type ctxt = rec( - fn(ast.def_id) -> str ds, // Callback to translate defs to strs. - ty.ctxt tcx // The type context. + fn(ast.def_id) -> str ds, // Def -> str Callback. + ty.ctxt tcx, // The type context. + bool use_abbrevs, + hashmap[ty.t, ty_abbrev] abbrevs // Type abbrevs. ); fn ty_str(@ctxt cx, ty.t t) -> str { - ret sty_str(cx, ty.struct(cx.tcx, t)); + check (! cx.use_abbrevs); + auto sw = io.string_writer(); + enc_ty(sw.get_writer(), cx, t); + ret sw.get_str(); + } + + fn enc_ty(io.writer w, @ctxt cx, ty.t t) { + if (cx.use_abbrevs) { + alt (cx.abbrevs.find(t)) { + case (some[ty_abbrev](?a)) { + w.write_str(a.s); + ret; + } + case (none[ty_abbrev]) { + auto pos = w.get_buf_writer().tell(); + auto ss = enc_sty(w, cx, ty.struct(cx.tcx, t)); + auto end = w.get_buf_writer().tell(); + auto len = end-pos; + fn estimate_sz(uint u) -> uint { + auto n = u; + auto len = 0u; + while (n != 0u) { + len += 1u; + n = n >> 4u; + } + ret len; + } + auto abbrev_len = + 3u + estimate_sz(pos) + estimate_sz(len); + + if (abbrev_len < len) { + // I.e. it's actually an abbreviation. + auto s = ("#" + + _uint.to_str(pos, 16u) + ":" + + _uint.to_str(len, 16u) + "#"); + auto a = rec(pos=pos, len=len, s=s); + cx.abbrevs.insert(t, a); + } + ret; + } + } + } + enc_sty(w, cx, ty.struct(cx.tcx, t)); } - fn mt_str(@ctxt cx, &ty.mt mt) -> str { - auto mut_str; + fn enc_mt(io.writer w, @ctxt cx, &ty.mt mt) { alt (mt.mut) { - case (ast.imm) { mut_str = ""; } - case (ast.mut) { mut_str = "m"; } - case (ast.maybe_mut) { mut_str = "?"; } + case (ast.imm) { } + case (ast.mut) { w.write_char('m'); } + case (ast.maybe_mut) { w.write_char('?'); } } - ret mut_str + ty_str(cx, mt.ty); + enc_ty(w, cx, mt.ty); } - fn sty_str(@ctxt cx, ty.sty st) -> str { + fn enc_sty(io.writer w, @ctxt cx, ty.sty st) { alt (st) { - case (ty.ty_nil) {ret "n";} - case (ty.ty_bool) {ret "b";} - case (ty.ty_int) {ret "i";} - case (ty.ty_uint) {ret "u";} - case (ty.ty_float) {ret "l";} + case (ty.ty_nil) { w.write_char('n'); } + case (ty.ty_bool) { w.write_char('b'); } + case (ty.ty_int) { w.write_char('i'); } + case (ty.ty_uint) { w.write_char('u'); } + case (ty.ty_float) { w.write_char('l'); } case (ty.ty_machine(?mach)) { alt (mach) { - case (common.ty_u8) {ret "Mb";} - case (common.ty_u16) {ret "Mw";} - case (common.ty_u32) {ret "Ml";} - case (common.ty_u64) {ret "Md";} - case (common.ty_i8) {ret "MB";} - case (common.ty_i16) {ret "MW";} - case (common.ty_i32) {ret "ML";} - case (common.ty_i64) {ret "MD";} - case (common.ty_f32) {ret "Mf";} - case (common.ty_f64) {ret "MF";} + case (common.ty_u8) { w.write_str("Mb"); } + case (common.ty_u16) { w.write_str("Mw"); } + case (common.ty_u32) { w.write_str("Ml"); } + case (common.ty_u64) { w.write_str("Md"); } + case (common.ty_i8) { w.write_str("MB"); } + case (common.ty_i16) { w.write_str("MW"); } + case (common.ty_i32) { w.write_str("ML"); } + case (common.ty_i64) { w.write_str("MD"); } + case (common.ty_f32) { w.write_str("Mf"); } + case (common.ty_f64) { w.write_str("MF"); } } } - case (ty.ty_char) {ret "c";} - case (ty.ty_str) {ret "s";} + case (ty.ty_char) {w.write_char('c');} + case (ty.ty_str) {w.write_char('s');} case (ty.ty_tag(?def,?tys)) { // TODO restore def_id - auto acc = "t[" + cx.ds(def) + "|"; - for (ty.t t in tys) {acc += ty_str(cx, t);} - ret acc + "]"; + w.write_str("t["); + w.write_str(cx.ds(def)); + w.write_char('|'); + for (ty.t t in tys) { + enc_ty(w, cx, t); + } + w.write_char(']'); } - case (ty.ty_box(?mt)) {ret "@" + mt_str(cx, mt);} - case (ty.ty_vec(?mt)) {ret "V" + mt_str(cx, mt);} - case (ty.ty_port(?t)) {ret "P" + ty_str(cx, t);} - case (ty.ty_chan(?t)) {ret "C" + ty_str(cx, t);} + case (ty.ty_box(?mt)) {w.write_char('@'); enc_mt(w, cx, mt); } + case (ty.ty_vec(?mt)) {w.write_char('V'); enc_mt(w, cx, mt); } + case (ty.ty_port(?t)) {w.write_char('P'); enc_ty(w, cx, t); } + case (ty.ty_chan(?t)) {w.write_char('C'); enc_ty(w, cx, t); } case (ty.ty_tup(?mts)) { - auto acc = "T["; - for (ty.mt mt in mts) {acc += mt_str(cx, mt);} - ret acc + "]"; + w.write_str("T["); + for (ty.mt mt in mts) { + enc_mt(w, cx, mt); + } + w.write_char(']'); } case (ty.ty_rec(?fields)) { - auto acc = "R["; + w.write_str("R["); for (ty.field field in fields) { - acc += field.ident + "="; - acc += mt_str(cx, field.mt); + w.write_str(field.ident); + w.write_char('='); + enc_mt(w, cx, field.mt); } - ret acc + "]"; + w.write_char(']'); } case (ty.ty_fn(?proto,?args,?out)) { - ret proto_str(proto) + ty_fn_str(cx, args, out); + enc_proto(w, proto); + enc_ty_fn(w, cx, args, out); } case (ty.ty_native_fn(?abi,?args,?out)) { - auto abistr; + w.write_char('N'); alt (abi) { - case (ast.native_abi_rust) {abistr = "r";} - case (ast.native_abi_cdecl) {abistr = "c";} - case (ast.native_abi_llvm) {abistr = "l";} + case (ast.native_abi_rust) { w.write_char('r'); } + case (ast.native_abi_cdecl) { w.write_char('c'); } + case (ast.native_abi_llvm) { w.write_char('l'); } } - ret "N" + abistr + ty_fn_str(cx, args, out); + enc_ty_fn(w, cx, args, out); } case (ty.ty_obj(?methods)) { - auto acc = "O["; + w.write_str("O["); for (ty.method m in methods) { - acc += proto_str(m.proto); - acc += m.ident; - acc += ty_fn_str(cx, m.inputs, m.output); + enc_proto(w, m.proto); + w.write_str(m.ident); + enc_ty_fn(w, cx, m.inputs, m.output); } - ret acc + "]"; + w.write_char(']'); + } + case (ty.ty_var(?id)) { + w.write_char('X'); + w.write_str(common.istr(id)); } - case (ty.ty_var(?id)) {ret "X" + common.istr(id);} - case (ty.ty_native) {ret "E";} - case (ty.ty_param(?id)) {ret "p" + common.uistr(id);} - case (ty.ty_type) {ret "Y";} + case (ty.ty_native) {w.write_char('E');} + case (ty.ty_param(?id)) { + w.write_char('p'); + w.write_str(common.uistr(id)); + } + case (ty.ty_type) {w.write_char('Y');} // These two don't appear in crate metadata, but are here because // `hash_ty()` uses this function. - case (ty.ty_bound_param(?id)) {ret "o" + common.uistr(id);} - case (ty.ty_local(?def)) {ret "L" + cx.ds(def);} + case (ty.ty_bound_param(?id)) { + w.write_char('o'); + w.write_str(common.uistr(id)); + } + case (ty.ty_local(?def)) { + w.write_char('L'); + w.write_str(cx.ds(def)); + } } } - fn proto_str(ast.proto proto) -> str { + fn enc_proto(io.writer w, ast.proto proto) { alt (proto) { - case (ast.proto_iter) {ret "W";} - case (ast.proto_fn) {ret "F";} + case (ast.proto_iter) { w.write_char('W'); } + case (ast.proto_fn) { w.write_char('F'); } } } - fn ty_fn_str(@ctxt cx, vec[ty.arg] args, ty.t out) -> str { - auto acc = "["; + fn enc_ty_fn(io.writer w, @ctxt cx, vec[ty.arg] args, ty.t out) { + w.write_char('['); for (ty.arg arg in args) { - if (arg.mode == ast.alias) {acc += "&";} - acc += ty_str(cx, arg.ty); + if (arg.mode == ast.alias) { w.write_char('&'); } + enc_ty(w, cx, arg.ty); } - ret acc + "]" + ty_str(cx, out); + w.write_char(']'); + enc_ty(w, cx, out); } } @@ -336,9 +405,9 @@ fn encode_type(@trans.crate_ctxt cx, &ebml.writer ebml_w, ty.t typ) { ebml.start_tag(ebml_w, tag_items_data_item_type); auto f = def_to_str; - auto ty_str_ctxt = @rec(ds=f, tcx=cx.tcx); - ebml_w.writer.write(_str.bytes(Encode.ty_str(ty_str_ctxt, typ))); - + auto ty_str_ctxt = @rec(ds=f, tcx=cx.tcx, + use_abbrevs=true, abbrevs=cx.type_abbrevs); + Encode.enc_ty(io.new_writer_(ebml_w.writer), ty_str_ctxt, typ); ebml.end_tag(ebml_w); } @@ -565,7 +634,6 @@ fn encode_index[T](&ebml.writer ebml_w, vec[vec[tup(T, uint)]] buckets, ebml.end_tag(ebml_w); } - fn write_str(io.writer writer, &str s) { writer.write_str(s); } diff --git a/src/comp/middle/trans.rs b/src/comp/middle/trans.rs index ec0e53d9..788c2fee 100644 --- a/src/comp/middle/trans.rs +++ b/src/comp/middle/trans.rs @@ -114,6 +114,7 @@ state type crate_ctxt = rec(session.session sess, namegen names, std.sha1.sha1 sha, hashmap[ty.t, str] type_sha1s, + hashmap[ty.t, metadata.ty_abbrev] type_abbrevs, ty.ctxt tcx); type local_ctxt = rec(vec[str] path, @@ -189,7 +190,10 @@ fn mangle_name_by_type(@crate_ctxt ccx, vec[str] path, ty.t t) -> str { case (none[str]) { ccx.sha.reset(); auto f = metadata.def_to_str; - auto cx = @rec(ds=f, tcx=ccx.tcx); + // NB: do *not* use abbrevs here as we want the symbol names + // to be independent of one another in the crate. + auto cx = @rec(ds=f, tcx=ccx.tcx, + use_abbrevs=false, abbrevs=ccx.type_abbrevs); ccx.sha.input_str(metadata.Encode.ty_str(cx, t)); hash = _str.substr(ccx.sha.result_str(), 0u, 16u); ccx.type_sha1s.insert(t, hash); @@ -791,7 +795,9 @@ fn type_of_inner(@crate_ctxt cx, ty.t t) -> TypeRef { } check (llty as int != 0); - llvm.LLVMAddTypeName(cx.llmod, _str.buf(ty.ty_to_abbrev_str(cx.tcx, t)), + llvm.LLVMAddTypeName(cx.llmod, + _str.buf(ty.ty_to_short_str(cx.tcx, + cx.type_abbrevs, t)), llty); cx.lltypes.insert(t, llty); ret llty; @@ -1673,8 +1679,7 @@ fn declare_tydesc(@local_ctxt cx, ty.t t) -> @tydesc_info { auto glue_fn_ty = T_ptr(T_glue_fn(ccx.tn)); - auto name = sanitize(ccx.names.next("tydesc_" + - ty.ty_to_abbrev_str(cx.ccx.tcx, t))); + auto name = mangle_name_by_seq(ccx, cx.path, "tydesc"); auto gvar = llvm.LLVMAddGlobal(ccx.llmod, T_tydesc(ccx.tn), _str.buf(name)); auto tydesc = C_struct(vec(C_null(T_ptr(T_ptr(T_tydesc(ccx.tn)))), @@ -7672,6 +7677,7 @@ fn trans_crate(session.session sess, @ast.crate crate, ty.ctxt tcx, auto tydescs = map.mk_hashmap[ty.t,@tydesc_info](hasher, eqer); auto lltypes = map.mk_hashmap[ty.t,TypeRef](hasher, eqer); auto sha1s = map.mk_hashmap[ty.t,str](hasher, eqer); + auto abbrevs = map.mk_hashmap[ty.t,metadata.ty_abbrev](hasher, eqer); auto ccx = @rec(sess = sess, llmod = llmod, @@ -7698,6 +7704,7 @@ fn trans_crate(session.session sess, @ast.crate crate, ty.ctxt tcx, names = namegen(0), sha = std.sha1.mk_sha1(), type_sha1s = sha1s, + type_abbrevs = abbrevs, tcx = tcx); auto cx = new_local_ctxt(ccx); diff --git a/src/comp/middle/ty.rs b/src/comp/middle/ty.rs index b99ae33a..83befefc 100644 --- a/src/comp/middle/ty.rs +++ b/src/comp/middle/ty.rs @@ -46,7 +46,10 @@ type mt = rec(t ty, ast.mutability mut); // Contains information needed to resolve types and (in the future) look up // the types of AST nodes. -type ctxt = rec(@type_store ts, session.session sess); +type creader_cache = hashmap[tup(int,uint,uint),ty.t]; +type ctxt = rec(@type_store ts, + session.session sess, + creader_cache rcache); type ty_ctxt = ctxt; // Needed for disambiguation from Unify.ctxt. // Convert from method type to function type. Pretty easy; we just drop @@ -200,8 +203,26 @@ fn mk_type_store() -> @type_store { others=map.mk_hashmap[t,t](hasher, eqer)); } -fn mk_ctxt(session.session s) -> ctxt { ret rec(ts=mk_type_store(), sess=s); } +fn mk_rcache() -> creader_cache { + fn hash_cache_entry(&tup(int,uint,uint) k) -> uint { + ret (k._0 as uint) + k._1 + k._2; + } + fn eq_cache_entries(&tup(int,uint,uint) a, + &tup(int,uint,uint) b) -> bool { + ret a._0 == b._0 && + a._1 == b._1 && + a._2 == b._2; + } + auto h = hash_cache_entry; + auto e = eq_cache_entries; + ret map.mk_hashmap[tup(int,uint,uint),t](h, e); +} +fn mk_ctxt(session.session s) -> ctxt { + ret rec(ts = mk_type_store(), + sess = s, + rcache = mk_rcache()); +} // Type constructors fn mk_ty_full(&sty st, option.t[str] cname) -> t { @@ -627,9 +648,10 @@ fn ty_to_str(ctxt cx, &t typ) -> str { ret s; } -fn ty_to_abbrev_str(ctxt cx, t typ) -> str { +fn ty_to_short_str(ctxt cx, hashmap[ty.t, metadata.ty_abbrev] abbrevs, + t typ) -> str { auto f = def_to_str; - auto ecx = @rec(ds=f, tcx=cx); + auto ecx = @rec(ds=f, tcx=cx, use_abbrevs=false, abbrevs=abbrevs); auto s = metadata.Encode.ty_str(ecx, typ); if (_str.byte_len(s) >= 64u) { s = _str.substr(s, 0u, 64u); } ret s; |