/*************************************************************************** parser.c - description ------------------- begin : Sat Mar 9 2002 copyright : (C) 2001 by Michael Speck email : kulkanie@gmx.net ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include #include #include "parser.h" #include "tools.h" /* ==================================================================== Error string. ==================================================================== */ static char parser_sub_error[1024]; static char parser_error[1024]; /* ==================================================================== This buffer is used to fully load resource files when the compact format is used. ==================================================================== */ enum { CBUFFER_SIZE = 131072 }; /* 128 KB */ static char cbuffer[CBUFFER_SIZE]; static char* cbuffer_pos = 0; /* position in cbuffer */ /* ==================================================================== As we need constant strings sometimes we have to define a maximum length for tokens. ==================================================================== */ enum { PARSER_MAX_TOKEN_LENGTH = 1024 }; /* ==================================================================== Locals ==================================================================== */ /* ==================================================================== Macro to shorten the fread call for a single character. ==================================================================== */ #define FILE_READCHAR( file, c ) fread( &c, sizeof( char ), 1, file ) /* ==================================================================== Find next newline in cbuffer and replace it with \0 and return the pointer to the current line. ==================================================================== */ static char* parser_get_next_line() { char *line = cbuffer_pos; char *newpos; if ( cbuffer_pos[0] == 0 ) return 0; /* completely read. no more lines. */ if ( ( newpos = strchr( cbuffer_pos, 10 ) ) == 0 ) cbuffer_pos += strlen( cbuffer_pos ); /* last line */ else { cbuffer_pos = newpos + 1; /* set pointer to next line */ newpos[0] = 0; /* terminate current line */ } return line; } /* ==================================================================== Set parse error string: "file:line: error" ==================================================================== */ static void parser_set_parse_error( char *fname, FILE *file, char *error ) { int end, pos; int line_count = 1; char c; end = ftell( file ); pos = 0; fseek( file, 0, SEEK_SET ); while ( pos < end ) { FILE_READCHAR( file, c ); pos++; if ( c == 10 ) line_count++; } sprintf( parser_error, "%s: %i: %s", fname, line_count, error ); } /* ==================================================================== Check if the given character occurs in the symbol list. If the first symbol is ' ' it is used as wildcard for all white-spaces. ==================================================================== */ static int is_symbol( int c, char *symbols ) { int i = 0; if ( symbols[0] == ' ' && c <= 32 ) return 1; while ( symbols[i] != 0 ) if ( c == symbols[i++] ) return 1; return 0; } /* ==================================================================== Move file position forward until reading in the given character. If stop is ' ' whitespaces will be ignored. ==================================================================== */ static void file_skip( FILE *file, char stop ) { char c = 0; FILE_READCHAR( file, c ); while ( ( ( stop == ' ' && c <= 32 ) || ( stop != ' ' && c != stop ) ) && !feof( file ) ) FILE_READCHAR( file, c ); if ( !feof( file ) ) fseek( file, -1, SEEK_CUR ); } /* ==================================================================== Read next token from current file position where symbols is a list of characters used to break up the tokens. The symbols themself are returned as tokens. If ' ' occurs in the symbol list it will be ignored and whitespaces are removed automatically. The token does not exceed PARSER_MAX_TOKEN_LENGTH. Enclosing ".." are kept at the token. Use file_compare_token() to test it's contents. Returns False on EoF. ==================================================================== */ static int file_read_token_intern( FILE *file, char *symbols, char *token ) { int pos = 0; char c; token[0] = 0; file_skip( file, ' ' ); FILE_READCHAR( file, c ); if ( feof( file ) ) { sprintf( parser_sub_error, "unexpected end of file" ); return 0; } /* string? */ if ( c == '"' ) { token[pos++] = '"'; FILE_READCHAR( file, c ); while ( ( !feof( file ) && c != '"' ) ) { token[pos++] = c; if ( pos == PARSER_MAX_TOKEN_LENGTH - 2 ) { token[pos++] = '"'; token[pos] = 0; sprintf( parser_sub_error, "token exceeds limit" ); return 0; } FILE_READCHAR( file, c ); } token[pos++] = '"'; token[pos] = 0; if ( feof( file ) ) { sprintf( parser_sub_error, "unexpected end of file" ); token[0] = 0; return 0; } return 1; } /* symbol? */ if ( is_symbol( c, symbols ) ) { token[0] = c; token[1] = 0; return 1; } /* other token */ while ( !is_symbol( c, symbols ) && !feof( file ) ) { token[pos++] = c; if ( pos == PARSER_MAX_TOKEN_LENGTH - 1 ) { token[pos] = 0; sprintf( parser_sub_error, "token exceeds limit" ); return 0; } FILE_READCHAR( file, c ); } token[pos] = 0; if ( feof( file ) ) return 1; fseek( file, -1, SEEK_CUR ); return 1; } /* ==================================================================== Skip all tokens until one begins with character 'stop'. This token is also ignored. ==================================================================== */ static void file_skip_section( FILE *file, char stop ) { char token[PARSER_MAX_TOKEN_LENGTH]; do { file_read_token_intern( file, PARSER_SYMBOLS, token ); } while ( !feof( file ) && token[0] != stop ); } /* ==================================================================== Read next token and skip comments enclosed in tokens skip[0], skip[1] (if skip is not NULL). Return 0 if EoF. ==================================================================== */ static int file_read_token( FILE *file, char *symbols, char *skip, char *token ) { while ( 1 ) { if ( !file_read_token_intern( file, symbols, token ) ) return 0; if ( skip ) { if ( token[0] == skip[0] ) file_skip_section( file, skip[1] ); else break; } else break; } return 1; } /* ==================================================================== Remove quotes if any and return result as newly allocated string. ==================================================================== */ static char* parser_remove_quotes( char *string ) { char *new; if ( string[0] != '"' ) return strdup( string ); new = calloc( strlen( string ) - 1, sizeof( char ) ); strncpy( new, string + 1, strlen( string ) - 2 ); new[strlen( string ) - 2] = 0; return new; } /* ==================================================================== Proceed in the given string until it ends or non-whitespace occurs and return the new position. ==================================================================== */ static char* string_ignore_whitespace( char *string ) { int i = 0; while ( string[i] != 0 && string[i] <= 32 ) i++; return string + i; } /* ==================================================================== This function searches file from the current position for the next pdata entry. ==================================================================== */ static PData* parser_parse_file( FILE *file ) { char token[PARSER_MAX_TOKEN_LENGTH]; PData *pd = 0, *sub = 0; /* get name */ if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) return 0; if ( is_symbol( token[0], PARSER_SYMBOLS ) ) { sprintf( parser_sub_error, "parse error before '%s'", token ); return 0; } pd = calloc( 1, sizeof( PData ) ); pd->name = parser_remove_quotes( token ); /* check type */ if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) goto failure; switch ( token[0] ) { case PARSER_SET: /* assign single value or list */ pd->values = list_create( LIST_AUTO_DELETE, LIST_NO_CALLBACK ); if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) goto failure; if ( token[0] != PARSER_LIST_BEGIN ) { if ( is_symbol( token[0], PARSER_SYMBOLS ) ) { sprintf( parser_sub_error, "parse error before '%s'", token ); goto failure; } else list_add( pd->values, parser_remove_quotes( token ) ); } else { if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) goto failure; while ( token[0] != PARSER_LIST_END ) { if ( is_symbol( token[0], PARSER_SYMBOLS ) ) { sprintf( parser_sub_error, "parse error before '%s'", token ); goto failure; } else list_add( pd->values, parser_remove_quotes( token ) ); if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) goto failure; } } break; case PARSER_GROUP_BEGIN: /* check all entries until PARSER_GROUP_END */ pd->entries = list_create( LIST_NO_AUTO_DELETE, LIST_NO_CALLBACK ); while ( 1 ) { if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) goto failure; if ( token[0] == PARSER_GROUP_END ) break; fseek( file, -strlen( token ), SEEK_CUR ); sub = parser_parse_file( file ); if ( sub ) list_add( pd->entries, sub ); else goto failure; } break; default: sprintf( parser_sub_error, "parse error before '%s'", token ); goto failure; } return pd; failure: parser_free( &pd ); return 0; } /* ==================================================================== Publics ==================================================================== */ /* ==================================================================== This function splits a string into tokens using the characters found in symbols as breakpoints. If the first symbol is ' ' all whitespaces are used as breakpoints though NOT added as a token (thus removed from string). ==================================================================== */ List* parser_split_string( char *string, char *symbols ) { int pos; char *token = 0; List *list = list_create( LIST_AUTO_DELETE, LIST_NO_CALLBACK ); while ( string[0] != 0 ) { if ( symbols[0] == ' ' ) string = string_ignore_whitespace( string ); if ( string[0] == 0 ) break; pos = 1; /* 'read in' first character */ while ( string[pos - 1] != 0 && !is_symbol( string[pos - 1], symbols ) && string[pos - 1] != '"' ) pos++; if ( pos > 1 ) pos--; else if ( string[pos - 1] == '"' ) { /* read a string */ string = string + 1; pos = 0; while ( string[pos] != 0 && string[pos] != '"' ) pos++; token = calloc( pos + 1, sizeof( char ) ); strncpy( token, string, pos ); token[pos] = 0; list_add( list, token ); string = string + pos + (string[pos] != 0); continue; } token = calloc( pos + 1, sizeof( char ) ); strncpy( token, string, pos); token[pos] = 0; list_add( list, token ); string = string + pos; } return list; } /* ==================================================================== This is the light version of parser_split_string which checks for just one character and does not add this glue characters to the list. It's about 2% faster. Wow. ==================================================================== */ List *parser_explode_string( char *string, char c ) { List *list = list_create( LIST_AUTO_DELETE, LIST_NO_CALLBACK ); char *next_slash = 0; char buffer[64]; while ( string[0] != 0 && ( next_slash = strchr( string, c ) ) != 0 ) { if ( next_slash != string ) { strcpy_lt( buffer, string, (next_slash-string>63)?63:(next_slash-string) ); list_add( list, strdup( buffer ) ); } string += next_slash - string + 1; } if ( string[0] != 0 ) list_add( list, strdup( string ) ); return list; } /* ==================================================================== This function reads in a whole file and converts it into a PData tree struct. If an error occurs NULL is returned and parser_error is set. ==================================================================== */ static int parser_read_file_full( FILE *file, PData *top ) { PData *sub = 0; char token[1024]; /* parse file */ while ( !feof( file ) ) { if ( ( sub = parser_parse_file( file ) ) != 0 ) list_add( top->entries, sub ); else return 0; /* skip comments and whitespaces */ if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) { if ( token[0] != 0 ) return 0; break; } else fseek( file, -strlen( token ), SEEK_CUR ); } return 1; } static int parser_read_file_compact( PData *section ) { /* section is the parent pdata that needs some entries */ PData *pd = 0; char *line, *cur; while ( ( line = parser_get_next_line() ) ) { switch ( line[0] ) { case '>': /* this section is finished */ return 1; case '<': /* add a whole subsection */ pd = calloc( 1, sizeof( PData ) ); pd->name = strdup( line + 1 ); pd->entries = list_create( LIST_NO_AUTO_DELETE, LIST_NO_CALLBACK ); parser_read_file_compact( pd ); /* add to section */ list_add( section->entries, pd ); break; default: /* read values as subsection */ pd = calloc( 1, sizeof( PData ) ); /* check name */ if ( ( cur = strchr( line, '»' ) ) == 0 ) { sprintf( parser_sub_error, "parse error: use '»' for assignment or '<' for section" ); return 0; } cur[0] = 0; cur++; pd->name = strdup( line ); /* get values */ pd->values = parser_explode_string( cur, '°' ); /* add to section */ list_add( section->entries, pd ); break; } } return 1; } PData* parser_read_file( char *tree_name, char *fname ) { int size; char magic = 0; FILE *file = 0; PData *top = 0; /* open file */ if ( ( file = fopen( fname, "r" ) ) == 0 ) { sprintf( parser_error, "%s: file not found", fname ); return 0; } /* create top level pdata */ top = calloc( 1, sizeof( PData ) ); top->name = strdup( tree_name ); top->entries = list_create( LIST_NO_AUTO_DELETE, LIST_NO_CALLBACK ); /* parse */ FILE_READCHAR( file, magic ); if ( magic == '@' ) { /* get the whole contents -- 1 and CBUFFER_SIZE are switched */ fseek( file, 0, SEEK_END ); size = ftell( file ) - 2; if ( size >= CBUFFER_SIZE ) { fprintf( stderr, "%s: file's too big to fit the compact buffer (128KB)\n", fname ); size = CBUFFER_SIZE - 1; } fseek( file, 2, SEEK_SET ); fread( cbuffer, 1, size, file ); cbuffer[size] = 0; /* set indicator to beginning of text */ cbuffer_pos = cbuffer; /* parse cbuffer */ if ( !parser_read_file_compact( top ) ) { parser_set_parse_error( fname, file, parser_sub_error ); goto failure; } } else { fseek( file, 0, SEEK_SET ); if ( !parser_read_file_full( file, top ) ) { parser_set_parse_error( fname, file, parser_sub_error ); goto failure; } } /* finalize */ fclose( file ); return top; failure: fclose( file ); parser_free( &top ); return 0; } /* ==================================================================== This function frees a PData tree struct. ==================================================================== */ void parser_free( PData **pdata ) { PData *entry = 0; if ( (*pdata) == 0 ) return; if ( (*pdata)->name ) free( (*pdata)->name ); if ( (*pdata)->values ) list_delete( (*pdata)->values ); if ( (*pdata)->entries ) { list_reset( (*pdata)->entries ); while ( ( entry = list_next( (*pdata)->entries ) ) ) parser_free( &entry ); list_delete( (*pdata)->entries ); } free( *pdata ); *pdata = 0; } /* ==================================================================== Functions to access a PData tree. 'name' is the pass within tree 'pd' where subtrees are separated by '/' (e.g.: name = 'config/graphics/animations') parser_get_pdata : get pdata entry associated with 'name' parser_get_entries : get list of subtrees (PData structs) in 'name' parser_get_values : get value list of 'name' parser_get_value : get a single value from value list of 'name' parser_get_int : get first value of 'name' converted to integer parser_get_double : get first value of 'name' converted to double parser_get_string : get first value of 'name' _duplicated_ If an error occurs result is set NULL, False is returned and parse_error is set. ==================================================================== */ int parser_get_pdata ( PData *pd, char *name, PData **result ) { int i, found; PData *pd_next = pd; PData *entry = 0; char *sub = 0; List *path = parser_explode_string( name, '/' ); for ( i = 0, list_reset( path ); i < path->count; i++ ) { sub = list_next( path ); if ( !pd_next->entries ) { sprintf( parser_sub_error, "%s: no subtrees", pd_next->name ); goto failure; } list_reset( pd_next->entries ); found = 0; while ( ( entry = list_next( pd_next->entries ) ) ) if ( strlen( entry->name ) == strlen( sub ) && !strncmp( entry->name, sub, strlen( sub ) ) ) { pd_next = entry; found = 1; break; } if ( !found ) { sprintf( parser_sub_error, "%s: subtree '%s' not found", pd_next->name, sub ); goto failure; } } list_delete( path ); *result = pd_next; return 1; failure: sprintf( parser_error, "parser_get_pdata: %s/%s: %s", pd->name, name, parser_sub_error ); list_delete( path ); *result = 0; return 0; } int parser_get_entries( PData *pd, char *name, List **result ) { PData *entry; *result = 0; if ( !parser_get_pdata( pd, name, &entry ) ) { sprintf( parser_sub_error, "parser_get_entries:\n %s", parser_error ); strcpy( parser_error, parser_sub_error ); return 0; } if ( !entry->entries || entry->entries->count == 0 ) { sprintf( parser_error, "parser_get_entries: %s/%s: no subtrees", pd->name, name ); return 0; } *result = entry->entries; return 1; } int parser_get_values ( PData *pd, char *name, List **result ) { PData *entry; *result = 0; if ( !parser_get_pdata( pd, name, &entry ) ) { sprintf( parser_sub_error, "parser_get_values:\n %s", parser_error ); strcpy( parser_error, parser_sub_error ); return 0; } if ( !entry->values || entry->values->count == 0 ) { sprintf( parser_error, "parser_get_values: %s/%s: no values", pd->name, name ); return 0; } *result = entry->values; return 1; } int parser_get_value ( PData *pd, char *name, char **result, int index ) { List *values; if ( !parser_get_values( pd, name, &values ) ) { sprintf( parser_sub_error, "parser_get_value:\n %s", parser_error ); strcpy( parser_error, parser_sub_error ); return 0; } if ( index >= values->count ) { sprintf( parser_error, "parser_get_value: %s/%s: index %i out of range (%i elements)", pd->name, name, index, values->count ); return 0; } *result = list_get( values, index ); return 1; } int parser_get_int ( PData *pd, char *name, int *result ) { char *value; if ( !parser_get_value( pd, name, &value, 0 ) ) { sprintf( parser_sub_error, "parser_get_int:\n %s", parser_error ); strcpy( parser_error, parser_sub_error ); return 0; } *result = atoi( value ); return 1; } int parser_get_double ( PData *pd, char *name, double *result ) { char *value; if ( !parser_get_value( pd, name, &value, 0 ) ) { sprintf( parser_sub_error, "parser_get_double:\n %s", parser_error ); strcpy( parser_error, parser_sub_error ); return 0; } *result = strtod( value, 0 ); return 1; } int parser_get_string ( PData *pd, char *name, char **result ) { char *value; if ( !parser_get_value( pd, name, &value, 0 ) ) { sprintf( parser_sub_error, "parser_get_string:\n %s", parser_error ); strcpy( parser_error, parser_sub_error ); return 0; } *result = strdup( value ); return 1; } /* ==================================================================== If an error occurred you can query the reason with this function. ==================================================================== */ char* parser_get_error( void ) { return parser_error; }