Copyright ©1997-1998 by Axel T. Schreiner.  All Rights Reserved.



9
yacc and Limbo

Styx language

Inferno 1.0 contains an implementation of the parser generator yacc as a preprocessor for Limbo. It is virtually undocumented, but from a calculator on Lucent's server one can infer how to use it. As another example, here is a language to generate Styx messages:

Inferno Linux

# tcpd -01 'odin -v /'
203 <Tcp at 0x805a968> fd 3 read write bound listen
203 <Inet at 0x805adb0> host localhost port 1038 (127,0,0,1,4,14)
$ tcp perky!1038 /chan/perky
$ cat /chan/perky >/dev/null &
$ sl >/chan/perky
attach 1 1 axel inferno
cs: tcp!perky!1038 -> /net/tcp/clone 131.173.161.211!1038
[204] odin -v /
odin
kill 205
Tattach tag 1 fid 1 uname axel aname inferno
Rattach tag 1 fid 1 qid dir 1-2.01/May/97-13:23:59
clone 1 1 2
Tclone tag 1 fid 1 newfid 2
Rclone tag 1 fid 1
walk 1 2 tmp
Twalk tag 1 fid 2 name tmp
Rwalk tag 1 fid 2 qid dir 1-6049.25/Jun/97-18:30:06
stat 1 2
Tstat tag 1 fid 2
Rstat tag 1 fid 2 name tmp uid root gid root
qid dir 1-6049.25/Jun/97-18:30:06 mode dir 0777
atime 25/Jun/97-18:19:50 mtime 25/Jun/97-18:30:06
len 1024 type 0 dev 0
clunk 1 2
Tclunk tag 1 fid 2
Rclunk tag 1 fid 2
clunk 1 1
Tclunk tag 1 fid 1
Rclunk tag 1 fid 1
control-D
$ kill Cat
39
odin: eof



Prolog

Input to yacc starts with the customary prolog. There must a a %module phrase:
{09/sl.y}
# language to create styx messages interactively
# Number  decimal or 0octal
# String  "x" with no escapes(!), linefeed and empty permitted
# other between white space either reserved or taken for String
# silently truncated to NAMELEN as required

%{
include "sys.m"; sys: Sys;
include "bufio.m"; bufio: Bufio; Iobuf: import bufio;
include "draw.m"; Context: import Draw;
include "styx.m"; # constants for styx messages
%}

%module Sl {
init: fn (ctxt: ref Context, args: list of string);

YYSTYPE: adt { b: big; i: int; s: string; };
yylval: YYSTYPE;
}

%token <i> ATTACH CLONE CLUNK CREATE FLUSH NOP OPEN
%token <i> READ REMOVE STAT WALK WRITE WSTAT
%token <i> OREAD OWRITE ORDWR OEXEC OTRUNC ORCLOSE
%token <i> CHDIR
%token <i> NAME GID PERM MTIME
%token <b> Number
%token <s> String
%token  Error

%type <i> msg m1 m2 b2
%%
{}
Comments extend from # to end of line.

%module name is compiled into name: module. The generated module exports the tokens as int constants, but it does not a priori export yyparse: fn (): int. YYSTYPE and yylval must be defined but need not be exported.

The epilog is inserted into the compiled code immediately after the prolog.



Grammar

Apart from comments there are no other syntactic peculiarities.
{09/sl.y}
prog : # null
| prog '\n'
| prog  { msg = array[len msg] of { * => byte 0 }; m = 1; }
msg '\n' { msg[0] = byte $3;
if (sys->write(stdout, msg, m) != m) {
sys->fprint(stderr, "sl: write error: %r\n");
exit;
}
}
| prog error '\n'

msg : ATTACH b2 b2 name name
| CLONE b2 b2 b2
| CLUNK b2 b2
| CREATE b2 b2 name perm mode
| FLUSH b2 b2
| NOP   { msg[m++] = byte 16rff;
msg[m++] = byte 16rff;
}
| OPEN b2 b2 mode
| READ b2 b2 b8 b2
| REMOVE b2 b2
| STAT b2 b2
| WALK b2 b2 name
| WRITE b2 b2 b8 data
| WSTAT b2 b2 wstat

b2 : Number  { msg[m++] = byte ($1       & big 16rff);
msg[m++] = byte ($1 >>  8 & big 16rff);
}
b4 : Number  { msg[m++] = byte ($1       & big 16rff);
msg[m++] = byte ($1 >>  8 & big 16rff);
msg[m++] = byte ($1 >> 16 & big 16rff);
msg[m++] = byte ($1 >> 24 & big 16rff);
}
b8 : Number  { msg[m++] = byte ($1       & big 16rff);
msg[m++] = byte ($1 >>  8 & big 16rff);
msg[m++] = byte ($1 >> 16 & big 16rff);
msg[m++] = byte ($1 >> 24 & big 16rff);
msg[m++] = byte ($1 >> 32 & big 16rff);
msg[m++] = byte ($1 >> 40 & big 16rff);
msg[m++] = byte ($1 >> 48 & big 16rff);
msg[m++] = byte ($1 >> 56 & big 16rff);
}
{}
Here, the actions deposit bytes into a global buffer msg[] and advance an index m. At the end of an input line the buffer is sent to standard output.



Strings,modes, data, and status changes are a bit harder. A syntax for a file date is (as yet) missing:
{09/sl.y}
name : String  { x := array of byte $1;
if (len x <= Styx->NAMELEN) {
msg[m:] = array[Styx->NAMELEN]
of { * => byte 0 };
msg[m:] = x;
} else
msg[m:] = x[0:Styx->NAMELEN];
m += Styx->NAMELEN;
}
perm : CHDIR Number  { x := $1 | int $2;
msg[m++] = byte (x       & 16rff);
msg[m++] = byte (x >>  8 & 16rff);
msg[m++] = byte (x >> 16 & 16rff);
msg[m++] = byte (x >> 24 & 16rff);
}
| b4

mode : m1 m2   { x := $1 | $2;
msg[m++] = byte (x       & 16rff);
}
m1 : OREAD | OWRITE | ORDWR | OEXEC
m2 :    { $$ = 0; } # null
| m2 OTRUNC  { $$ = $1 | $2; }
| m2 ORCLOSE  { $$ = $1 | $2; }

data : String  { x := array of byte $1; l := len x;
if (l > Styx->DATALEN) {
yyerror("data too long\n");
msg[m:] = array[3] of { * => byte 0 };
m += 3;
} else {
msg[m++] = byte (l       & 16rff);
msg[m++] = byte (l >>  8 & 16rff);
msg[m+1:] = x;
m += l+1;
}
}

wstat : w   { m = 5 + Styx->STATLEN; } # index to end
| wstat w  { m = 5 + Styx->STATLEN; }

w : NAME   { m = $1; } # token indicates index
name
| GID   { m = $1; }
name
| PERM   { m = $1; }
b4
| MTIME   { m = $1; }
b4
{}



Epilog

The epilog should define yyerror: fn (err: string) and yylex: fn (): int, or suitable functions must be imported. Here, the buffer msg[] and it's index m are defined:
{09/sl.y}
%%

stdin: ref Iobuf;
stdout, stderr: ref Sys->FD;
msg := array[16 + Styx->DATALEN] of byte; # current message
m: int;      # next byte in msg

init (nil: ref Draw->Context, nil: list of string) {
sys = load Sys Sys->PATH;
bufio = load Bufio Bufio->PATH;
stdin = bufio->fopen(sys->fildes(0), Bufio->OREAD);
stdout = sys->fildes(1);
stderr = sys->fildes(2);
yyparse();
}

yyerror (err: string) {
sys->fprint(stderr, "%s\n", err);
}

Token: adt { name: string; symbol, value: int; };
token := array[] of {
Token("attach", ATTACH, Styx->Tattach),
Token("clone", CLONE, Styx->Tclone),
Token("clunk", CLUNK, Styx->Tclunk),
Token("create", CREATE, Styx->Tcreate),
Token("flush", FLUSH, Styx->Tflush),
Token("nop", NOP, Styx->Tnop),
Token("open", OPEN, Styx->Topen),
Token("read", READ, Styx->Tread),
Token("remove", REMOVE, Styx->Tremove),
Token("stat", STAT, Styx->Tstat),
Token("walk", WALK, Styx->Twalk),
Token("write", WRITE, Styx->Twrite),
Token("wstat", WSTAT, Styx->Twstat),
Token("OREAD", OREAD, Sys->OREAD),
Token("OWRITE", OWRITE, Sys->OWRITE),
Token("ORDWR", ORDWR, Sys->ORDWR),
Token("OEXEC", OEXEC, Styx->OEXEC),
Token("OTRUNC", OTRUNC, Sys->OTRUNC),
Token("ORCLOSE",ORCLOSE,Sys->ORCLOSE),
Token("CHDIR", CHDIR, Sys->CHDIR),
Token("name", NAME, 5),  # offsets in Twstat
Token("gid", GID, 5 + 2*28),
Token("perm", PERM, 5 + 3*28 + 8),
Token("mtime", MTIME, 5 + 3*28 + 16) };
{}
yylex() uses Iobuf to split input and linear search in a table of tokens. The result is a token value. Assignments to yylval appear as $x in the grammar actions. Apparently, -1 serves as end of file.



{09/sl.y}
yylex (): int {
eof: for (;;) {
word := stdin.gett(" \t\n#\"");
if (word == nil) break eof;
word: case word[0] {
' ' or '\t' => continue;
'\n' =>  return '\n';
'#' => if (word[len word-1] != '\n') stdin.gets('\n');
return '\n';
'"' =>
word = stdin.gets('"');
if (word == nil) break eof;
if (len word == 1)
yylval.s = "";
else if (word[len word-1] != '"')
return Error;
else
yylval.s = word[0:len word-1];
return String;
'0' to '9' =>
if (len word > 1) {
stdin.ungetc();
word = word[0:len word-1];
}
yylval.b = big(word[0] - '0'); ## interpret 0777
base := big 10; if (word[0] == '0') base = big 8;
for (i := 1; i < len word; ++ i)
if (word[i] < '0' || word[i] > '9')
break word;
else {
yylval.b *= base;
yylval.b += big(word[i]-'0');
}
return Number;
'a' to 'z' or 'A' to 'Z' =>
if (len word > 1) {
stdin.ungetc();
word = word[0:len word-1];
}
for (i := 0; i < len token; ++ i)
if (word == token[i].name) {
yylval.i = token[i].value;
return token[i].symbol;
}
* =>
if (len word > 1) {
stdin.ungetc();
word = word[0:len word-1];
}
}
yylval.s = word;
return String;
}
return -1; # EOF
}
{}


Compilation

$ yacc -o sl.b sl.y
$ limbo sl.b

In particular, there are the following options:

-d to output the module definition separately,
-Dn to set yydebug (constant, at most 4 is useful),
-s stem to set the stem of the generated files, or
-o name to set the name of the generated Limbo file,
-v to show the parser table in y.output.

Apparently, limbo accepts lines like #line number "file" to relate error messages.

26/Apr/1998