trilobite2
January 10th, 2018, 05:55 AM
Hi all -
I've written most of a simple lexer in C but the part I have problems with is where to put the loops that iterate through a string (or through a file) and also the code that gets tokens.
I've noticed that most (if not all) lexers that I've seen don't seem to have parameters for the main lexing and the sub-lexing functions so I'm keen to use that approach if possible.
Here's what I have so far -
/* toysql.c */
/* A lexer for a very small subset of SQL. */
/* This code is released to the public domain. */
/* "Share and enjoy......" :) */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define NUMBER_OF_KEYWORDS 9
/* Array of our keywords in string form. */
char *kw_strings[] = {
"select", "from", "where", "and", "or", "not", "in", "is", "null"
} ;
/* Search function to search the array of keywords. */
int search(char *arr[], int dim, char *str) {
int i;
int found_match;
for (i=0; i<dim; i++) {
if ( !strcmp(arr[i] , str ) ) {
found_match = 1;
break;
} else found_match = 0;
} /* For */
return found_match;
} /* search */
/* Token types. */
typedef enum { KEYWORD, IDENTIFIER, OPERATOR, STRING, NUMBER, _EOF_ }
token_type ;
/* Tokens. */
typedef struct {
token_type type;
union {
char *string_value;
int int_value;
} value;
} token;
token *lex_kwident() {
token *t;
/* Get some characters here from string or file and */
/* iterate through until a non-aphanum character is found */
if (search(kw_strings, NUMBER_OF_KEYWORDS, t.value.string_value) == 1 )
t.type = KEYWORD;
else
t.type = IDENTIFIER;
return t;
}
token *lex_space() {
token *t;
return t;
}
/* "Main" lexer. Usually does not take parameters from what I've seen. */
token *lex() {
/* ??? Do I put function to get characters from string/file
* here or should it go elsewhere? */
token *t;
/* Give token to "parser" */
parse(token) ;
}
/* Not a parser (yet) - just prints the token type. */
/* I think this might have to take a parameter - the token returned by lexer. */
void parse() {
printf("Tokentype: %d\n", t.type);
}
int main() {
/* Try the lexer on a string first. */
char *mystr1 = "select mycol from mytable" ;
/* Loop here? */
parse();
return 0;
}
So.... any help in "beefing out" this code is very welcome!
Many thanks in advance -
- trilobite2
I've written most of a simple lexer in C but the part I have problems with is where to put the loops that iterate through a string (or through a file) and also the code that gets tokens.
I've noticed that most (if not all) lexers that I've seen don't seem to have parameters for the main lexing and the sub-lexing functions so I'm keen to use that approach if possible.
Here's what I have so far -
/* toysql.c */
/* A lexer for a very small subset of SQL. */
/* This code is released to the public domain. */
/* "Share and enjoy......" :) */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define NUMBER_OF_KEYWORDS 9
/* Array of our keywords in string form. */
char *kw_strings[] = {
"select", "from", "where", "and", "or", "not", "in", "is", "null"
} ;
/* Search function to search the array of keywords. */
int search(char *arr[], int dim, char *str) {
int i;
int found_match;
for (i=0; i<dim; i++) {
if ( !strcmp(arr[i] , str ) ) {
found_match = 1;
break;
} else found_match = 0;
} /* For */
return found_match;
} /* search */
/* Token types. */
typedef enum { KEYWORD, IDENTIFIER, OPERATOR, STRING, NUMBER, _EOF_ }
token_type ;
/* Tokens. */
typedef struct {
token_type type;
union {
char *string_value;
int int_value;
} value;
} token;
token *lex_kwident() {
token *t;
/* Get some characters here from string or file and */
/* iterate through until a non-aphanum character is found */
if (search(kw_strings, NUMBER_OF_KEYWORDS, t.value.string_value) == 1 )
t.type = KEYWORD;
else
t.type = IDENTIFIER;
return t;
}
token *lex_space() {
token *t;
return t;
}
/* "Main" lexer. Usually does not take parameters from what I've seen. */
token *lex() {
/* ??? Do I put function to get characters from string/file
* here or should it go elsewhere? */
token *t;
/* Give token to "parser" */
parse(token) ;
}
/* Not a parser (yet) - just prints the token type. */
/* I think this might have to take a parameter - the token returned by lexer. */
void parse() {
printf("Tokentype: %d\n", t.type);
}
int main() {
/* Try the lexer on a string first. */
char *mystr1 = "select mycol from mytable" ;
/* Loop here? */
parse();
return 0;
}
So.... any help in "beefing out" this code is very welcome!
Many thanks in advance -
- trilobite2