/*
* Copyright 2023 Oleg Borodin <borodin@unix7.org>
*/
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
#define RES_OK 0
#define RES_ERR -1
typedef struct {
size_t rsize;
size_t wsize;
size_t capa;
uint8_t* data;
} bstream_t;
#define STREAM_INITCAPA 64
int bstream_init(bstream_t * stream) {
stream->data = malloc(STREAM_INITCAPA);
stream->wsize = 0;
stream->rsize = 0;
stream->capa = STREAM_INITCAPA;
return RES_OK;
}
ssize_t bstream_dump(bstream_t * stream) {
for (size_t i = 0; i < stream->wsize; i++) {
printf("%c", stream->data[i]);
}
return stream->wsize;
}
ssize_t bstream_write(bstream_t * stream, void* buf, ssize_t size) {
if ((stream->wsize + size) > stream->capa) {
size_t newcapa = stream->capa * 2;
stream->data = realloc(stream->data, newcapa);
stream->capa = newcapa;
}
if (buf != NULL) {
memcpy(&(stream->data[stream->wsize]), buf, size);
}
stream->wsize += size;
return size;
}
ssize_t bstream_read(bstream_t * stream, void* buf, ssize_t size) {
size_t unread = stream->wsize - stream->rsize;
if (size > unread) {
size = unread;
}
if (buf != NULL) {
memcpy(buf, &(stream->data[stream->rsize]), size);
}
stream->rsize += size;
return size;
}
char bstream_getc(bstream_t * stream) {
size_t unread = stream->wsize - stream->rsize;
if (unread == 0)
return EOF;
return stream->data[stream->rsize++];
}
size_t bstream_rrewind(bstream_t * stream, ssize_t size) {
return stream->rsize - size;
}
size_t bstream_wrewind(bstream_t * stream, ssize_t size) {
return stream->wsize - size;
}
void bstream_destroy(bstream_t * stream) {
if (stream != NULL)
free(stream->data);
}
typedef struct {
bstream_t* stream;
int context;
char letter;
int pos;
} lexer_t;
#define MAX_TOK_SIZE 1024
#define TOKEN_NULL 0
#define TOKEN_WORD 1
#define TOKEN_SPACE 2
#define TOKEN_OPER 4
#define TOKEN_ENDFL 5
#define TOKEN_BGOPT 7
#define LEXCONT_UNDEF 0
#define LEXCONT_WORD 1
#define LEXCONT_SPACE 2
#define LEXCONT_OPER 4
#define LEXCONT_ENDFL 5
#define LEXCONT_BGOPT 7
#define LTYPE_SPACE 1
#define LTYPE_LETTER 2
#define LTYPE_OPER 3
#define LTYPE_BGOPT 5
#define LTYPE_ENDFL 7
int get_ltype(char letter) {
switch (letter) {
case '-':
return LTYPE_BGOPT;
case ' ':
case '\t':
case '\n':
return LTYPE_SPACE;
case '=':
return LTYPE_OPER;
case EOF:
return LTYPE_ENDFL;
}
return LTYPE_LETTER;
}
void lexer_init(lexer_t * lexer, bstream_t * stream) {
lexer->stream = stream;
lexer->context = LEXCONT_UNDEF;
lexer->pos = 0;
}
int lexer_get_token(lexer_t * lexer, char* token, int maxsize) {
lexer->pos = 0;
if (lexer->pos > (maxsize - 1)) {
return -2;
}
if (lexer->context == LEXCONT_UNDEF) {
lexer->letter = bstream_getc(lexer->stream);
}
while (true) {
int ltype = get_ltype(lexer->letter);
switch (lexer->context) {
case LEXCONT_ENDFL:{
return TOKEN_ENDFL;
}
case LEXCONT_WORD:{
int newcontext = LEXCONT_WORD;
switch (ltype) {
case LTYPE_SPACE:{
newcontext = LEXCONT_SPACE;
break;
}
case LTYPE_OPER:{
newcontext = LEXCONT_OPER;
break;
}
case LTYPE_ENDFL:{
newcontext = LEXCONT_ENDFL;
break;
}
}
if (newcontext != lexer->context) {
lexer->context = newcontext;
token[lexer->pos++] = '\0';
return TOKEN_WORD;
}
token[lexer->pos++] = lexer->letter;
break;
}
case LEXCONT_SPACE:{
int newcontext = LEXCONT_SPACE;
switch (ltype) {
case LTYPE_OPER:{
newcontext = LEXCONT_OPER;
break;
}
case LTYPE_LETTER:{
newcontext = LEXCONT_WORD;
break;
}
case LTYPE_BGOPT:{
newcontext = LEXCONT_BGOPT;
break;
}
case LTYPE_ENDFL:{
newcontext = LEXCONT_ENDFL;
break;
}
}
if (newcontext != lexer->context) {
lexer->context = newcontext;
strcpy(token, "SPACE");
return TOKEN_SPACE;
}
token[lexer->pos++] = lexer->letter;
break;
}
case LEXCONT_OPER:{
int newcontext = LEXCONT_OPER;
switch (ltype) {
case LTYPE_OPER:{
strcpy(token, "OPER");
lexer->letter = bstream_getc(lexer->stream);
return TOKEN_OPER;
}
case LTYPE_SPACE:{
newcontext = LEXCONT_SPACE;
break;
}
//case LTYPE_BGOPT:{
//newcontext = LEXCONT_BGOPT;
//break;
//}
case LTYPE_LETTER:{
newcontext = LEXCONT_WORD;
break;
}
case LTYPE_ENDFL:{
newcontext = LEXCONT_ENDFL;
break;
}
}
if (newcontext != lexer->context) {
lexer->context = newcontext;
token[lexer->pos++] = '\0';
lexer->pos = 0;
strcpy(token, "NULL");
return TOKEN_NULL;
}
token[lexer->pos++] = lexer->letter;
break;
}
case LEXCONT_BGOPT:{
int newcontext = LEXCONT_BGOPT;
switch (ltype) {
case LTYPE_BGOPT:{
strcpy(token, "BGOPT");
lexer->letter = bstream_getc(lexer->stream);
return TOKEN_BGOPT;
}
case LTYPE_SPACE:{
newcontext = LEXCONT_SPACE;
break;
}
case LTYPE_LETTER:{
newcontext = LEXCONT_WORD;
break;
}
case LTYPE_ENDFL:{
newcontext = LEXCONT_ENDFL;
break;
}
}
if (newcontext != lexer->context) {
lexer->context = newcontext;
strcpy(token, "NULL");
return TOKEN_NULL;
}
token[lexer->pos++] = lexer->letter;
break;
}
case LEXCONT_UNDEF: {
int newcontext = LEXCONT_UNDEF;
switch (ltype) {
case LTYPE_SPACE:{
newcontext = LEXCONT_SPACE;
break;
}
case LTYPE_BGOPT:{
lexer->context = LEXCONT_BGOPT;
strcpy(token, "BGOPT");
lexer->letter = bstream_getc(lexer->stream);
return TOKEN_BGOPT;
}
case LTYPE_LETTER:{
newcontext = LEXCONT_WORD;
break;
}
case LTYPE_OPER:{
newcontext = LEXCONT_OPER;
break;
}
case LTYPE_ENDFL:{
newcontext = LEXCONT_ENDFL;
break;
}
}
lexer->context = newcontext;
token[lexer->pos++] = lexer->letter;
break;
}
}
lexer->letter = bstream_getc(lexer->stream);
}
return TOKEN_ENDFL;
}
static char* strcopy(char* src) {
size_t srcsize = strlen(src) + 1;
char* dst = malloc(srcsize);
memset(dst, '\0', srcsize);
strcpy(dst, src);
return dst;
}
typedef struct {
lexer_t* lexer;
int pos;
int lnum;
} yacc_t;
void yacc_init(yacc_t * yacc, lexer_t * lexer) {
yacc->lexer = lexer;
yacc->pos = 0;
yacc->lnum = 0;
}
#define POS1TYPE TOKEN_WORD
#define POS2TYPE TOKEN_OPER
#define POS3TYPE TOKEN_WORD
#define POS4TYPE TOKEN_COMM
int yacc_parse(yacc_t * yacc) {
char token[MAX_TOK_SIZE];
int toktype = -1;
lexer_t* lexer = yacc->lexer;
char* key = NULL;
char* var = NULL;
while ((toktype = lexer_get_token(lexer, token, MAX_TOK_SIZE)) != TOKEN_ENDFL) {
//if (toktype == TOKEN_SPACE) {
// continue;
//}
if (toktype == TOKEN_NULL) {
continue;
}
//printf("tok=%d pos=%d line=%d [%s]\n", toktype, yacc->pos, yacc->lnum, token);
//continue;
if (toktype == TOKEN_BGOPT) {
yacc->lnum++;
}
switch (yacc->pos) {
case 0: {
if (toktype == TOKEN_SPACE) {
yacc->pos = 0;
break;
}
if (toktype == TOKEN_WORD) {
var = strcopy(token);
yacc->pos = 0;
printf("(add %s)\n", var);
break;
}
yacc->pos++;
break;
}
case 1: {
if (toktype != TOKEN_BGOPT) {
return -1;
}
yacc->pos++;
break;
}
case 2: {
if (toktype != TOKEN_WORD) {
return -1;
}
yacc->pos++;
key = strcopy(token);
break;
}
case 3: {
if ((toktype != TOKEN_SPACE) && (toktype != TOKEN_OPER)) {
return -1;
}
yacc->pos++;
break;
}
case 4: {
if (toktype != TOKEN_WORD) {
return -1;
}
var = strcopy(token);
yacc->pos = 0;
printf("(let %s %s)\n", key, var);
free(key);
free(var);
break;
}
}
}
return 0;
}
int main(int argc, char** argv) {
char* src = " --port 12345 --ident=qwert arg1 arg2";
bstream_t stream;
bstream_init(&stream);
bstream_write(&stream, src, strlen(src));
bstream_dump(&stream);
printf("\n");
lexer_t lexer;
lexer_init(&lexer, &stream);
yacc_t yacc;
yacc_init(&yacc, &lexer);
int res = yacc_parse(&yacc);
if (res < 0) {
printf("parsing error pos %d line %d\n", yacc.pos, yacc.lnum);
}
bstream_destroy(&stream);
return 0;
}