#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define D if(0) /* PARSER -> numers (int/float) -> symbols -> lists & trees -> strings -> packets basic parser rules: end of symbol/number ( start of list/tree atom " start of string atom # comment terminated by newline packet (type + raw data) */ #define COMMENT_LEFT PF_PARSE_COMMENT_LEFT #define COMMENT_RIGHT PF_PARSE_COMMENT_RIGHT #define LIST_LEFT PF_PARSE_LIST_LEFT #define LIST_RIGHT PF_PARSE_LIST_RIGHT #define QUOTE PF_PARSE_QUOTE #define ESCAPE PF_PARSE_ESCAPE #define RAW PF_PARSE_RAW /* since Fri Nov 3 2006, the parser has a pre-emptive core the routine takes two arguments: - stream (stream packet) - state (list) this is no more than a C program where CALL/RETURN are replaced and functions are accessed as local labels. the 'state' variable is the parser task's return stack. i'm not ready to do a coroutine implementation for C, and i'd like to keep the parser implemented in C to be fast enough: it is used as network protocol in addition to just code files. note that the parser state can be anything after CALL(nextchar), since the PF code calling this can mess up things, so checks are necessary. */ // default size string static pf_packet_t new_string(void){ pf_packet_t p = pf_packet_string_buffer(20); return p; } static pf_error_t save_char(pf_vm_t *vm, pf_list_t *state, int c){ pf_string_t *string; pf_atom_t *a; PF_ASSERT(state); if ((a = state->first) && (a->t == a_packet) && (string = (pf_string_t *)a->w.w_packet) && (string->super.type == PF_STRING)) { pf_string_allot(string, 1)[0] = c; EXIT; } THROW(e_internal, "save_char() invalid state"); } static pf_error_t save_hexdigit(pf_vm_t *vm, pf_list_t *state, int c){ if ((state->elements < 1) || (state->first->t != a_int)) { THROW(e_internal, "save_hexdigit() invalid state"); } if ('0' <= c <= '9') { c -= '0'; } else if ('a' <= c <= 'f') { c-= 'a'; } else if ('A' <= c <= 'F') { c-= 'A'; } else { THROW(e_parse, "'%c' is not a valid hex digit", c); } state->first->w.w_int <<= 4; state->first->w.w_int += c; return c; } #define DD if (1) #define DUMMY 0 #define CONTINUE(label) pf_list_push_pointer(state, &&label) #define COMMAND(command) { CONTINUE(done); goto command; } #define KILL pf_list_clear(state) #define CALL(label) ({ __label__ nxt; CONTINUE(nxt); goto label; nxt: DUMMY; }) #define RETURN goto exit #define NEW_STRING pf_list_push_packet(state, new_string()) #define END_OF_FILE -1 // this opti doesnt seem to matter much #define SAVE_CHAR {e = save_char(vm, state, c); if (e) return e;} PF_PRIMITIVE(pf_word_read_task){ CHECKN(2); pf_list_t *state = LIST(ARG0); pf_stream_t *stream = STREAM(ARG1); pf_error_t e = e_ok; // need to set e for error. default == ok. int c, bytes; goto enter; /*** ATOM ***/ atom: CALL(whitespace_or_eof); atom0: switch(c){ case COMMENT_LEFT: CALL(comment); goto atom; case LIST_RIGHT: CALL(nextchar); e = e_endoflist; RETURN; case LIST_LEFT: goto list; case QUOTE: goto string; case RAW: goto packet; default: goto pure; } /*** PACKET ***/ // FIXME: this should be extensible // means a packet method should have a read equivalent method packet: CALL(nextchar); if (RAW != c){ THROW(e_internal, "expected raw packet"); } NEW_STRING; packet_type_next: CALL(nextchar); if (c){ SAVE_CHAR; goto packet_type_next; } else { void *data, *endx; char *ctype = STRING(state->first); if (!ctype[0]) { THROW(e_parse, "zero size symbol"); } pf_symbol_t *type = pf_symbol(ctype); pf_packet_t p = pf_factory_newpacket(type); if (!p) { THROW(e_inval, "can't parse packet type %s", type->s_name); } pf_stack_drop(state); pf_list_push_packet(state, p); data = pf_packet_data(p); endx = data + pf_packet_data_size(p); pf_list_push_pointer(state, endx); pf_list_push_pointer(state, data); goto packet_nextchunk; } packet_nextchunk: if ((state->elements < 3) || (state->first->t != a_pointer) || (state->first->next->t != a_pointer) || (state->first->next->next->t != a_packet)){ THROW(e_internal, "inconsistent state during packet chunk reading"); } else { void *data = state->first->w.w_pointer; void *endx = state->first->next->w.w_pointer; int bytes_read; for (;;){ bytes_read = stream->m->m_read(stream, data, endx-data); if (bytes_read == 0){ goto eof_error; // always an error } else if (bytes_read < 0){ switch(errno){ case EINTR: break; // just continue case EAGAIN: PF_ASSERT(e == e_ok); state->first->w.w_pointer = data; CONTINUE(packet_nextchunk); goto undefined; default: goto read_error; } } else { data += bytes_read; if (data == endx){ pf_stack_drop(state); pf_stack_drop(state); pf_list_push_atom(s, pf_list_pop_atom(state)); RETURN; } } } } /**** STRING ****/ string: NEW_STRING; CALL(nextchar); if (c != QUOTE){ THROW(e_parse, "expected '%c', got '%c'", QUOTE, c); } string_next: CALL(nextchar); if (QUOTE == c){ PF_ASSERT(state->first); pf_list_push_atom(s, pf_list_pop_atom(state)); RETURN; } if (ESCAPE == c){ CALL(nextchar); switch(c){ case '"': case '#': case '\\': break; case 'n': c = '\n'; break; case 't': c = '\t'; break; case 'x': pf_list_push_int(state, 0); CALL(nextchar); if (e = save_hexdigit(vm, state, c)) return e; CALL(nextchar); if (e = save_hexdigit(vm, state, c)) return e; c = pf_list_pop(state).w_int; break; default: D pf_post("WARNING: '\\%c' is not an escape code." "ignoring backslash."); break; } } SAVE_CHAR; goto string_next; /**** LIST ****/ list: CALL(whitespace); CALL(nextchar); if (LIST_LEFT != c){ THROW(e_parse, "atom is not a list, starts with '%c'", c); } pf_list_push_list(state, pf_list_new()); list_next: CALL(whitespace); // don't tolerate EOF here CALL(atom0); // consistency check if ((!state->first) ||(state->first->t != a_list)) { THROW(e_internal, "state does not contain a list"); } switch(e){ case e_ok: // got one atom, add to queue pf_list_queue_atom(state->first->w.w_list, pf_list_pop_atom(s)); goto list_next; case e_endoflist: // got list terminator: done parsing pf_list_push_atom(s, pf_list_pop_atom(state)); e = e_ok; RETURN; case e_eof: // end of input is a syntax error here: need list terminator THROW(e_again, "EOF during parsing"); //THROW(e_parse, "non-terminated list of %d elements", // state->first->w.w_list->elements); default: PF_ASSERT(0); } /**** COMMENT ****/ comment: CALL(nextchar); PF_ASSERT(COMMENT_LEFT == c); comment_next: CALL(nextchar_or_eof); if (COMMENT_RIGHT == c) RETURN; if (END_OF_FILE == c) RETURN; // tolerate EOF to terminate comment goto comment_next; /**** PURE : NUMBER/SYMBOL ****/ pure: NEW_STRING; pure_next: CALL(nextchar_or_eof); // only place where EOF is not an error // done? if ((c == END_OF_FILE) |(c == QUOTE) |(c == LIST_LEFT) |(c == LIST_RIGHT) |(isspace(c))) { if (!isspace(c)) { CALL(putback); } // convert string to atom char *next, *str = STRING(state->first); int has_decimal = 0; // pf_post("PARSING: %08x %s", state->first, str); // check if the string has a decimal point for(next = str; *next; next++){ if (*next == PF_PARSE_DECIMAL) { has_decimal = 1; break; } } // try parsing as a number (int or float) first if (has_decimal){ // try float float f = strtod(str, &next); if (next[0] == 0){ // got everything? PUSH_FLOAT(f); goto pure_done; } } else { // try int int i = strtol(str, &next, 0); if (next[0] == 0){ // got everything? PUSH_INT(i); goto pure_done; } } // number parsing failed: it's a symbol PUSH_SYMBOL(pf_symbol(str)); goto pure_done; } // collect until separator SAVE_CHAR; goto pure_next; pure_done: pf_stack_drop(state); RETURN; /**** CHARACTER INPUT ****/ nextchar_or_eof: errno = 0; // make sure that errno is zero when no error occurs bytes = stream->m->m_get_char(stream, &c); if (!bytes) c = END_OF_FILE; else if (bytes < 0){ switch(errno){ case EINTR: goto nextchar_or_eof; // just restart case EAGAIN: CONTINUE(nextchar_or_eof); // retry here goto undefined; default: goto read_error; } } RETURN; nextchar: CALL(nextchar_or_eof); if (END_OF_FILE == c) goto eof_error; RETURN; putback: stream->m->m_unget_char(stream, c); RETURN; /**** WHITESPACE ****/ whitespace: CALL(nextchar); if (isspace(c)) goto whitespace; goto putback; whitespace_or_eof: CALL(nextchar_or_eof); if (END_OF_FILE == c) goto eof_ok; if (isspace(c)) goto whitespace_or_eof; goto putback; /**** LINE ****/ line: NEW_STRING; line_next: CALL(nextchar); if ('\n' != c){ SAVE_CHAR; goto line_next; } pf_list_push_atom(s, pf_list_pop_atom(state)); RETURN; /**** LINE ****/ slurp: NEW_STRING; slurp_next: CALL(nextchar_or_eof); if (c != END_OF_FILE){ SAVE_CHAR; goto slurp_next; } pf_list_push_atom(s, pf_list_pop_atom(state)); RETURN; /**** BYTES ****/ bytes: { int size = pf_list_pop(state).w_int; if (size < 0) THROW(e_inval, "negative read size"); NEW_STRING; pf_string_t *string = (pf_string_t *)state->first->w.w_packet; void *data = pf_string_allot(string, size); pf_list_push_pointer(state, data+size); pf_list_push_pointer(state, data); goto packet_nextchunk; } /**** RAWPACKET ****/ rawpacket: { PF_ASSERT(state->first); PF_ASSERT(state->first->t == a_symbol); pf_symbol_t *type = pf_list_pop(state).w_symbol; pf_packet_t packet = pf_factory_newpacket(type); if (!packet) { THROW(e_inval, "can't create packet buffer for %s", type->s_name); } pf_list_push_packet(state, packet); void *data = pf_packet_data(packet); PF_ASSERT(data); int size = pf_packet_data_size(packet); if (!size) { THROW(e_inval, "zero size packet buffer for %s", type->s_name); } pf_list_push_pointer(state, data+size); pf_list_push_pointer(state, data); goto packet_nextchunk; } /**** CONTROL ****/ enter: if (state->elements) goto exit; // continue COMMAND(atom); // start a new program exit: PF_ASSERT(state->elements); switch(state->first->t){ case a_pointer: goto *((void **)pf_list_pop(state).w_pointer); case a_int: COMMAND(bytes); case a_symbol: { pf_symbol_t *command = state->first->w.w_symbol; if (pf_symbol("atom") == command) COMMAND(atom); if (pf_symbol("line") == command) COMMAND(line); if (pf_symbol("slurp") == command) COMMAND(slurp); // interpret symbol as packet type CONTINUE(done); pf_stack_swap(state); goto rawpacket; } default: THROW(e_inval, "invalid state list"); } read_error: switch(errno){ case EINTR: PF_ASSERT(0); // need to handle above: restart return e_internal; case 0: PF_ASSERT(0); default: THROW(e_file, "read error : %s (%d)", strerror(errno), errno); } undefined: pf_list_push_atom(s, pf_atom_new()); // result is undefined return e; eof_error: THROW(e_again, "EOF during parsing"); eof_ok: // eof is a good kind of error, it means no more actual data is // present, but this condition didn't happen in the middle of // expecting something, which would be e_parse. e = e_eof; KILL; // actually, state should be clean already.. no? goto undefined; done: if (state->elements > 100){ THROW(e_internal, "parser state memory leak"); } // map errors switch(e){ case e_endoflist: THROW(e_parse, "unexpected ')'"); default: return e; } } /* BLOCKING INPUT */ // bootstrap reader PF_PRIMITIVE(pf_word_read_atom_boot){ PF_ASSERT(ARG0); PF_ASSERT(ARG0->t == a_packet); CHECKN(1); pf_stream_t *stream = STREAM(ARG0); pf_atom_t *stream_atom = ARG0; PUSH_LIST(pf_list_new()); pf_error_t e; TRY(pf_word_read_task); PF_ASSERT(ARG0); if (ARG0->t != a_undef){ NIP2; EXIT; } // blocking is error here (setup.pf is a file, so that's ok) e = e_file; error: switch(e){ case e_idle: default: // clean up stack after error to keep bootstrap interpreter // happy, since it doesn't do stack unrolling. // pf_post("DROPPING"); while (ARG0 != stream_atom) DROP; DROP; return e; } } static pf_packet_t string_buffer(int size){ pf_packet_t p = pf_packet_string_using_buffer( pf_alloc(size), size, 0); return p; } // nonblocking read for message based file descriptors (i.e. UDP) static PF_PRIMITIVE(read_pending_messages){ CHECKN(2); pf_list_t *l = pf_list_new(); pf_stream_t *stream = STREAM(ARG0); int bytes_wanted = INT(ARG1); if (bytes_wanted < 0) return e_inval; // use this for 'large enough' later pf_packet_t p; char *data; int bytes_read; PUSH_LIST(l); for (;;) { p = string_buffer(bytes_wanted+1); if (!p) { THROW(e_inval, "can't create buffer of size %d'", bytes_wanted); } pf_list_push_packet(l, p); data = pf_packet_string_allot(p, bytes_wanted); bytes_read = stream->m->m_read(stream, data, bytes_wanted); if (bytes_read == 0){ goto droplast; } else if (bytes_read < 0){ if (EAGAIN == errno) { goto droplast; } else { THROW(e_file, "can't read from stream : %s", strerror(errno)); } } else if (bytes_read < bytes_wanted){ pf_packet_string_allot(p, bytes_read - bytes_wanted); } } droplast: pf_stack_drop(l); done: pf_list_reverse(l); NIP2; EXIT; } PF_PRIMITIVE(pf_word_read_setup){ PF_REGISTER_FUNCTION (read_pending_messages, "read-pending-messages", "( maxbytes stream -- list-of-strings )\t" "Use non-blocking read to read all raw atomic message."); PF_REGISTER_FUNCTION (pf_word_read_atom_boot, "read-atom-boot", "( stream -- atom )\tBootstrap reader: blocking is error."); PF_REGISTER_FUNCTION (pf_word_read_task, "read-task", "( stream task -- stream task atom)\tYield control to parser."); EXIT; }