commit b0a30d7fc6f729e1a6a37a2647de07cfccbbc7f0
Author: James Halliday
Date: Tue Jun 1 11:18:15 2010 +0000
src="/images/binary-stream.png" alt="binary stream"
class="right-art"
width="248" height="361"

/>

Recently, I've been collaborating on a web-based virtual machine interface to qemu over its VNC interface. The latest prototype uses node.js for the socket.io library, which provides an abstract, portable, and efficient interface for passing messages between a browser and web server.

In order to bring down latency, I needed a way to decode the RFB protocol, but node.js is asynchronous, so I couldn't just do:

var width = sock.read(2); var height = sock.read(2);

I had to collect up lots of tiny buffers emitted asyncronously and treat them all as one for the purposes of parsing, which went something like this:

var buffers = []; var bytes = 0, offset = 0; sock.addListener('data', function (buf) { buffers.push(buf); bytes += buf.length; if (offset == 0 && bytes >= 2) { var width = read(2); } else if (offset == 2 && bytes >= 4) { var height = read(2); } function read (n) { var buffer = new Buffer(n); var current = buffers[offset]; var current_i = 0; for (var i = 0; i < n; i++) { if (current_i >= current.length) { buffers.shift(); current = buffers[0]; current_i = 0; } buffer[i] = current[current_i++]; } offset += n; return buffer; } });

Yikes, what a mess!

To preventatively cull down the impending complexity, I hacked together node-bufferlist to build linked lists of buffers from the network stream, which made the code simpler:

var BufferList = require('bufferlist').BufferList;
var bufferList = new BufferList;
var state = 0;

sock.addListener('data', function (buf) {
    bufferList.push(buf);

    if (state == 0 && bufferList.length >= 2) {
        var width = read(2);
        state ++;
    }
    else if (state == 1 && bufferList.length >= 2) {
        var height = read(2);
        state ++;
    }

    function read (n) {
        var s = bufferList.take(n);
        bufferList.advance(2);
        return s;
    }
});

But even with this tool, keeping track of the parser state by hand was very error-prone and ugly. Plus, the real RFB parser would need branches, loops, and nested parsing.

method chain

Borrowing some ideas from haskell's binary Get monad and ruby's methodchain gem, I built a nifty fluent interface for monadic, asynchronous binary bufferlist parsing. Said more simply in code:

var BufferList = require('bufferlist').BufferList;
var Binary = require('bufferlist/binary').Binary;

var bufferList = new BufferList;
Binary(bufferlist)
    .getWord16be('width')
    .getWord16be('height')
    .tap(function (vars) {
        var width = vars.width;
        var height = vars.height;
        // ...
        // You can even start a new chain inside this block!
    })
    .end()
;
sock.addListener('data', function (buf) {
    bufferList.push(buf);
});

Sooooooo much better. Nicer still, with this approach I was able to add goodies like when, unless, repeat, forever, and into, along with some nifty EventEmitter hooks. These abstractions made the resulting node-rfb far prettier and maintainable than it would have been otherwise.

For the code and more examples, you can checkout node-bufferlist on github.

more
git clone http://substack.net/blog.git