User Tools

Site Tools


,

Argument string analyzer

lex02.c
/*
 * Copyright 2023 Oleg Borodin  <borodin@unix7.org>
 */
 
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
 
#define RES_OK   0
#define RES_ERR -1
 
typedef struct {
    size_t rsize;
    size_t wsize;
    size_t capa;
    uint8_t* data;
} bstream_t;
 
#define STREAM_INITCAPA 64
 
int bstream_init(bstream_t * stream) {
    stream->data = malloc(STREAM_INITCAPA);
    stream->wsize = 0;
    stream->rsize = 0;
    stream->capa = STREAM_INITCAPA;
    return RES_OK;
}
 
 
ssize_t bstream_dump(bstream_t * stream) {
    for (size_t i = 0; i < stream->wsize; i++) {
        printf("%c", stream->data[i]);
    }
    return stream->wsize;
}
 
 
ssize_t bstream_write(bstream_t * stream, void* buf, ssize_t size) {
    if ((stream->wsize + size) > stream->capa) {
        size_t newcapa = stream->capa * 2;
 
        stream->data = realloc(stream->data, newcapa);
        stream->capa = newcapa;
    }
    if (buf != NULL) {
        memcpy(&(stream->data[stream->wsize]), buf, size);
    }
    stream->wsize += size;
    return size;
}
 
ssize_t bstream_read(bstream_t * stream, void* buf, ssize_t size) {
    size_t unread = stream->wsize - stream->rsize;
 
    if (size > unread) {
        size = unread;
    }
    if (buf != NULL) {
        memcpy(buf, &(stream->data[stream->rsize]), size);
    }
    stream->rsize += size;
    return size;
}
 
char bstream_getc(bstream_t * stream) {
    size_t unread = stream->wsize - stream->rsize;
 
    if (unread == 0)
        return EOF;
    return stream->data[stream->rsize++];
}
 
 
size_t bstream_rrewind(bstream_t * stream, ssize_t size) {
    return stream->rsize - size;
}
 
size_t bstream_wrewind(bstream_t * stream, ssize_t size) {
    return stream->wsize - size;
}
 
void bstream_destroy(bstream_t * stream) {
    if (stream != NULL)
        free(stream->data);
}
 
 
typedef struct {
    bstream_t* stream;
    int context;
    char letter;
    int pos;
} lexer_t;
 
#define MAX_TOK_SIZE 1024
 
#define TOKEN_NULL      0
#define TOKEN_WORD      1
#define TOKEN_SPACE     2
#define TOKEN_OPER      4
#define TOKEN_ENDFL     5
#define TOKEN_BGOPT     7
 
#define LEXCONT_UNDEF   0
#define LEXCONT_WORD    1
#define LEXCONT_SPACE   2
#define LEXCONT_OPER    4
#define LEXCONT_ENDFL   5
#define LEXCONT_BGOPT   7
 
#define LTYPE_SPACE     1
#define LTYPE_LETTER    2
#define LTYPE_OPER      3
#define LTYPE_BGOPT     5
#define LTYPE_ENDFL     7
 
int get_ltype(char letter) {
    switch (letter) {
        case '-':
            return LTYPE_BGOPT;
        case ' ':
        case '\t':
        case '\n':
            return LTYPE_SPACE;
        case '=':
            return LTYPE_OPER;
        case EOF:
            return LTYPE_ENDFL;
    }
    return LTYPE_LETTER;
}
 
void lexer_init(lexer_t * lexer, bstream_t * stream) {
    lexer->stream = stream;
    lexer->context = LEXCONT_UNDEF;
    lexer->pos = 0;
}
 
 
int lexer_get_token(lexer_t * lexer, char* token, int maxsize) {
    lexer->pos = 0;
 
    if (lexer->pos > (maxsize - 1)) {
        return -2;
    }
 
    if (lexer->context == LEXCONT_UNDEF) {
        lexer->letter = bstream_getc(lexer->stream);
    }
 
    while (true) {
        int ltype = get_ltype(lexer->letter);
 
        switch (lexer->context) {
            case LEXCONT_ENDFL:{
                return TOKEN_ENDFL;
            }
            case LEXCONT_WORD:{
                int newcontext = LEXCONT_WORD;
 
                switch (ltype) {
                    case LTYPE_SPACE:{
                        newcontext = LEXCONT_SPACE;
                        break;
                    }
                    case LTYPE_OPER:{
                        newcontext = LEXCONT_OPER;
                        break;
                    }
                    case LTYPE_ENDFL:{
                        newcontext = LEXCONT_ENDFL;
                        break;
                    }
                }
                if (newcontext != lexer->context) {
                    lexer->context = newcontext;
                    token[lexer->pos++] = '\0';
                    return TOKEN_WORD;
                }
                token[lexer->pos++] = lexer->letter;
                break;
            }
            case LEXCONT_SPACE:{
                int newcontext = LEXCONT_SPACE;
 
                switch (ltype) {
                    case LTYPE_OPER:{
                        newcontext = LEXCONT_OPER;
                        break;
                    }
                    case LTYPE_LETTER:{
                        newcontext = LEXCONT_WORD;
                        break;
                    }
                    case LTYPE_BGOPT:{
                        newcontext = LEXCONT_BGOPT;
                        break;
                    }
                    case LTYPE_ENDFL:{
                        newcontext = LEXCONT_ENDFL;
                        break;
                    }
                }
                if (newcontext != lexer->context) {
                    lexer->context = newcontext;
                    strcpy(token, "SPACE");
                    return TOKEN_SPACE;
                }
                token[lexer->pos++] = lexer->letter;
                break;
            }
            case LEXCONT_OPER:{
                int newcontext = LEXCONT_OPER;
                switch (ltype) {
                    case LTYPE_OPER:{
                        strcpy(token, "OPER");
                        lexer->letter = bstream_getc(lexer->stream);
                        return TOKEN_OPER;
                    }
                    case LTYPE_SPACE:{
                        newcontext = LEXCONT_SPACE;
                        break;
                    }
                    //case LTYPE_BGOPT:{
                        //newcontext = LEXCONT_BGOPT;
                        //break;
                    //}
                    case LTYPE_LETTER:{
                        newcontext = LEXCONT_WORD;
                        break;
                    }
                    case LTYPE_ENDFL:{
                        newcontext = LEXCONT_ENDFL;
                        break;
                    }
                }
                if (newcontext != lexer->context) {
                    lexer->context = newcontext;
                    token[lexer->pos++] = '\0';
                    lexer->pos = 0;
                    strcpy(token, "NULL");
                    return TOKEN_NULL;
                }
                token[lexer->pos++] = lexer->letter;
                break;
            }
            case LEXCONT_BGOPT:{
                int newcontext = LEXCONT_BGOPT;
 
                switch (ltype) {
                    case LTYPE_BGOPT:{
                        strcpy(token, "BGOPT");
                        lexer->letter = bstream_getc(lexer->stream);
                        return TOKEN_BGOPT;
                    }
                    case LTYPE_SPACE:{
                        newcontext = LEXCONT_SPACE;
                        break;
                    }
                    case LTYPE_LETTER:{
                        newcontext = LEXCONT_WORD;
                        break;
                    }
                    case LTYPE_ENDFL:{
                        newcontext = LEXCONT_ENDFL;
                        break;
                    }
                }
                if (newcontext != lexer->context) {
                    lexer->context = newcontext;
                    strcpy(token, "NULL");
                    return TOKEN_NULL;
                }
                token[lexer->pos++] = lexer->letter;
                break;
            }
            case LEXCONT_UNDEF: {
                int newcontext = LEXCONT_UNDEF;
                switch (ltype) {
                    case LTYPE_SPACE:{
                        newcontext = LEXCONT_SPACE;
                        break;
                    }
                    case LTYPE_BGOPT:{
                        lexer->context = LEXCONT_BGOPT;
                        strcpy(token, "BGOPT");
                        lexer->letter = bstream_getc(lexer->stream);
                        return TOKEN_BGOPT;
                    }
                    case LTYPE_LETTER:{
                        newcontext = LEXCONT_WORD;
                        break;
                    }
                    case LTYPE_OPER:{
                        newcontext = LEXCONT_OPER;
                        break;
                    }
                    case LTYPE_ENDFL:{
                        newcontext = LEXCONT_ENDFL;
                        break;
                    }
                }
                lexer->context = newcontext;
                token[lexer->pos++] = lexer->letter;
                break;
            }
        }
        lexer->letter = bstream_getc(lexer->stream);
    }
    return TOKEN_ENDFL;
}
 
 
static char* strcopy(char* src) {
    size_t srcsize = strlen(src) + 1;
    char* dst = malloc(srcsize);
    memset(dst, '\0', srcsize);
    strcpy(dst, src);
    return dst;
}
 
typedef struct {
    lexer_t* lexer;
    int pos;
    int lnum;
} yacc_t;
 
 
void yacc_init(yacc_t * yacc, lexer_t * lexer) {
    yacc->lexer = lexer;
    yacc->pos = 0;
    yacc->lnum = 0;
}
 
 
#define POS1TYPE TOKEN_WORD
#define POS2TYPE TOKEN_OPER
#define POS3TYPE TOKEN_WORD
#define POS4TYPE TOKEN_COMM
 
 
int yacc_parse(yacc_t * yacc) {
    char token[MAX_TOK_SIZE];
    int toktype = -1;
    lexer_t* lexer =  yacc->lexer;
    char* key = NULL;
    char* var = NULL;
 
    while ((toktype = lexer_get_token(lexer, token, MAX_TOK_SIZE)) != TOKEN_ENDFL) {
        //if (toktype == TOKEN_SPACE) {
        //    continue;
        //}
        if (toktype == TOKEN_NULL) {
            continue;
        }
        //printf("tok=%d pos=%d line=%d [%s]\n", toktype, yacc->pos, yacc->lnum, token);
        //continue;
 
        if (toktype == TOKEN_BGOPT) {
            yacc->lnum++;
        }
 
        switch (yacc->pos) {
            case 0: {
                    if (toktype == TOKEN_SPACE) {
                        yacc->pos = 0;
                        break;
                    }
                    if (toktype == TOKEN_WORD) {
                        var = strcopy(token);
                        yacc->pos = 0;
                        printf("(add %s)\n", var);
                        break;
                    }
                    yacc->pos++;
                    break;
                }
            case 1: {
                if (toktype != TOKEN_BGOPT) {
                    return -1;
                }
                yacc->pos++;
                break;
            }
            case 2: {
                if (toktype != TOKEN_WORD) {
                    return -1;
                }
                yacc->pos++;
                key = strcopy(token);
                break;
            }
            case 3: {
                if ((toktype != TOKEN_SPACE) && (toktype != TOKEN_OPER)) {
                    return -1;
                }
                yacc->pos++;
                break;
            }
            case 4: {
                if (toktype != TOKEN_WORD) {
                    return -1;
                }
                var = strcopy(token);
                yacc->pos = 0;
                printf("(let %s %s)\n", key, var);
                free(key);
                free(var);
                break;
            }
        }
    }
    return 0;
}
 
int main(int argc, char** argv) {
 
    char* src = " --port 12345 --ident=qwert arg1 arg2";
 
    bstream_t stream;
 
    bstream_init(&stream);
    bstream_write(&stream, src, strlen(src));
    bstream_dump(&stream);
    printf("\n");
 
    lexer_t lexer;
    lexer_init(&lexer, &stream);
 
    yacc_t yacc;
    yacc_init(&yacc, &lexer);
    int res = yacc_parse(&yacc);
 
    if (res < 0) {
        printf("parsing error pos %d line %d\n", yacc.pos, yacc.lnum);
    }
 
    bstream_destroy(&stream);
 
    return 0;
}

Out

$ gcc -Wall -o lex2 lex2.c
$ ./lex02

 --port 12345 --ident=qwert arg1 arg2

(let port 12345)
(let ident qwert)
(add arg1)
(add arg2)