aboutsummaryrefslogtreecommitdiff
path: root/src/lib/EBML.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/EBML.rs')
-rw-r--r--src/lib/EBML.rs186
1 files changed, 186 insertions, 0 deletions
diff --git a/src/lib/EBML.rs b/src/lib/EBML.rs
new file mode 100644
index 00000000..9b17bf06
--- /dev/null
+++ b/src/lib/EBML.rs
@@ -0,0 +1,186 @@
+// Simple Extensible Binary Markup Language (EBML) reader and writer on a
+// cursor model. See the specification here:
+// http://www.matroska.org/technical/specs/rfc/index.html
+
+import Option.none;
+import Option.some;
+
+type ebml_tag = rec(uint id, uint size);
+type ebml_state = rec(ebml_tag ebml_tag, uint tag_pos, uint data_pos);
+
+// TODO: When we have module renaming, make "reader" and "writer" separate
+// modules within this file.
+
+// EBML reading
+
+type doc = rec(vec[u8] data,
+ uint start,
+ uint end);
+
+fn vint_at(vec[u8] data, uint start) -> tup(uint, uint) {
+ auto a = data.(start);
+ if (a & 0x80u8 != 0u8) { ret tup((a & 0x7fu8) as uint, start + 1u); }
+ if (a & 0x40u8 != 0u8) {
+ ret tup((((a & 0x3fu8) as uint) << 8u) | (data.(start + 1u) as uint),
+ start + 2u);
+ } else if (a & 0x20u8 != 0u8) {
+ ret tup((((a & 0x1fu8) as uint) << 16u) |
+ ((data.(start + 1u) as uint) << 8u) |
+ (data.(start + 2u) as uint), start + 3u);
+ } else if (a & 0x10u8 != 0u8) {
+ ret tup((((a & 0x0fu8) as uint) << 24u) |
+ ((data.(start + 1u) as uint) << 16u) |
+ ((data.(start + 2u) as uint) << 8u) |
+ (data.(start + 3u) as uint), start + 4u);
+ } else {
+ log_err "vint too big"; fail;
+ }
+}
+
+fn new_doc(vec[u8] data) -> doc {
+ ret rec(data=data, start=0u, end=Vec.len[u8](data));
+}
+
+fn doc_at(vec[u8] data, uint start) -> doc {
+ auto elt_tag = vint_at(data, start);
+ auto elt_size = vint_at(data, elt_tag._1);
+ auto end = elt_size._1 + elt_size._0;
+ ret rec(data=data, start=elt_size._1, end=end);
+}
+
+fn maybe_get_doc(doc d, uint tg) -> Option.t[doc] {
+ auto pos = d.start;
+ while (pos < d.end) {
+ auto elt_tag = vint_at(d.data, pos);
+ auto elt_size = vint_at(d.data, elt_tag._1);
+ pos = elt_size._1 + elt_size._0;
+ if (elt_tag._0 == tg) {
+ ret some[doc](rec(data=d.data, start=elt_size._1, end=pos));
+ }
+ }
+ ret none[doc];
+}
+
+fn get_doc(doc d, uint tg) -> doc {
+ alt (maybe_get_doc(d, tg)) {
+ case (some[doc](?d)) {ret d;}
+ case (none[doc]) {
+ log_err "failed to find block with tag " + UInt.to_str(tg, 10u);
+ fail;
+ }
+ }
+}
+
+iter docs(doc d) -> tup(uint, doc) {
+ auto pos = d.start;
+ while (pos < d.end) {
+ auto elt_tag = vint_at(d.data, pos);
+ auto elt_size = vint_at(d.data, elt_tag._1);
+ pos = elt_size._1 + elt_size._0;
+ put tup(elt_tag._0, rec(data=d.data, start=elt_size._1, end=pos));
+ }
+}
+
+iter tagged_docs(doc d, uint tg) -> doc {
+ auto pos = d.start;
+ while (pos < d.end) {
+ auto elt_tag = vint_at(d.data, pos);
+ auto elt_size = vint_at(d.data, elt_tag._1);
+ pos = elt_size._1 + elt_size._0;
+ if (elt_tag._0 == tg) {
+ put rec(data=d.data, start=elt_size._1, end=pos);
+ }
+ }
+}
+
+fn doc_data(doc d) -> vec[u8] {
+ ret Vec.slice[u8](d.data, d.start, d.end);
+}
+
+fn be_uint_from_bytes(vec[u8] data, uint start, uint size) -> uint {
+ auto sz = size;
+ assert (sz <= 4u);
+ auto val = 0u;
+ auto pos = start;
+ while (sz > 0u) {
+ sz -= 1u;
+ val += (data.(pos) as uint) << (sz * 8u);
+ pos += 1u;
+ }
+ ret val;
+}
+
+fn doc_as_uint(doc d) -> uint {
+ ret be_uint_from_bytes(d.data, d.start, d.end - d.start);
+}
+
+// EBML writing
+
+type writer = rec(IO.buf_writer writer, mutable vec[uint] size_positions);
+
+fn write_sized_vint(&IO.buf_writer w, uint n, uint size) {
+ let vec[u8] buf;
+ alt (size) {
+ case (1u) {
+ buf = vec(0x80u8 | (n as u8));
+ }
+ case (2u) {
+ buf = vec(0x40u8 | ((n >> 8u) as u8),
+ (n & 0xffu) as u8);
+ }
+ case (3u) {
+ buf = vec(0x20u8 | ((n >> 16u) as u8),
+ ((n >> 8u) & 0xffu) as u8,
+ (n & 0xffu) as u8);
+ }
+ case (4u) {
+ buf = vec(0x10u8 | ((n >> 24u) as u8),
+ ((n >> 16u) & 0xffu) as u8,
+ ((n >> 8u) & 0xffu) as u8,
+ (n & 0xffu) as u8);
+ }
+ case (_) {
+ log_err "vint to write too big";
+ fail;
+ }
+ }
+
+ w.write(buf);
+}
+
+fn write_vint(&IO.buf_writer w, uint n) {
+ if (n < 0x7fu) { write_sized_vint(w, n, 1u); ret; }
+ if (n < 0x4000u) { write_sized_vint(w, n, 2u); ret; }
+ if (n < 0x200000u) { write_sized_vint(w, n, 3u); ret; }
+ if (n < 0x10000000u) { write_sized_vint(w, n, 4u); ret; }
+ log_err "vint to write too big";
+ fail;
+}
+
+fn create_writer(&IO.buf_writer w) -> writer {
+ let vec[uint] size_positions = vec();
+ ret rec(writer=w, mutable size_positions=size_positions);
+}
+
+// TODO: Provide a function to write the standard EBML header.
+
+fn start_tag(&writer w, uint tag_id) {
+ // Write the tag ID.
+ write_vint(w.writer, tag_id);
+
+ // Write a placeholder four-byte size.
+ w.size_positions += vec(w.writer.tell());
+ let vec[u8] zeroes = vec(0u8, 0u8, 0u8, 0u8);
+ w.writer.write(zeroes);
+}
+
+fn end_tag(&writer w) {
+ auto last_size_pos = Vec.pop[uint](w.size_positions);
+ auto cur_pos = w.writer.tell();
+ w.writer.seek(last_size_pos as int, IO.seek_set);
+ write_sized_vint(w.writer, cur_pos - last_size_pos - 4u, 4u);
+ w.writer.seek(cur_pos as int, IO.seek_set);
+}
+
+// TODO: optionally perform "relaxations" on end_tag to more efficiently
+// encode sizes; this is a fixed point iteration