cScm Configuration Daemon

cScm – is a tool to convert SCM configuration files into binary format and store its in shared memory for reading by cSvn-ui and cGit-ui CGI scripts

12 Commits   0 Branches   1 Tag
     5         kx 
     5         kx #ifdef HAVE_CONFIG_H
     5         kx #include <config.h>
     5         kx #endif
     5         kx 
     5         kx #include <stdlib.h>
     5         kx #include <stdio.h>
     5         kx #include <unistd.h>
     5         kx #include <string.h>
     5         kx #include <stdarg.h>
     5         kx #include <limits.h>
     5         kx #include <locale.h>
     5         kx #include <wchar.h>
     5         kx #include <wctype.h>
     5         kx 
     5         kx #define PCRE2_CODE_UNIT_WIDTH 32
     5         kx #include <pcre2.h>
     5         kx 
     5         kx #include <defs.h>
     5         kx 
     5         kx #include <main.h>
     5         kx #include <error.h>
     5         kx #include <msglog.h>
     5         kx #include <xalloc.h>
     5         kx #include <utf8ing.h>
     5         kx #include <symtab.h>
     5         kx #include <parse.h>
     5         kx 
     5         kx #include <lex.h>
     5         kx 
     5         kx 
     5         kx 
     5         kx int lineno = 0;
     5         kx int colno  = 0;
     5         kx 
     5         kx static int       maxtoken;
     5         kx static wchar_t  *token_buffer;
     5         kx 
     5         kx static int       max8token;
     5         kx static utf8_t   *token_utf8_buffer;
     5         kx 
     5         kx int       indent_level = 0; /* Number of '{' minus number of '}'. */
     5         kx 
     5         kx static int       end_of_file = 0;
     5         kx static int       nextchar = -1;
     5         kx 
     5         kx static char     *locale;
     5         kx 
     5         kx #define GETC(c)    ({ wint_t ret; ++colno; ret = fgetwc( config ); ret; })
     5         kx #define UNGETC(c)  ({ wint_t ret; --colno; ret = ungetwc( c, config ); ret; })
     5         kx 
     5         kx 
     5         kx static wchar_t *extend_token_buffer( wchar_t *p )
     5         kx {
     5         kx    int offset = p - token_buffer;
     5         kx    maxtoken = maxtoken * 2 + 10;
     5         kx    token_buffer = (wchar_t *)xrealloc( token_buffer, (maxtoken + 2)*sizeof(wchar_t) );
     5         kx 
     5         kx    return( token_buffer + offset );
     5         kx }
     5         kx 
     5         kx static utf8_t *extend_token_utf8_buffer( utf8_t *p )
     5         kx {
     5         kx    int offset = p - token_utf8_buffer;
     5         kx    max8token = max8token * 2 + 10;
     5         kx    token_utf8_buffer = (utf8_t *)xrealloc( token_utf8_buffer, (max8token + 2)*6 );
     5         kx 
     5         kx    return( token_utf8_buffer + offset );
     5         kx }
     5         kx 
     5         kx 
     5         kx void yyerror( char const *s )
     5         kx {
     5         kx   error( "%s", s );
     5         kx }
     5         kx 
     5         kx 
     5         kx void init_lex( void )
     5         kx {
     5         kx   locale = setlocale( LC_ALL, "en_US.utf8" );
     5         kx 
     5         kx   lineno = 0;
     5         kx   colno  = 0;
     5         kx 
     5         kx   nextchar  = -1;
     5         kx   maxtoken  = 40;
     5         kx   max8token = 40;
     5         kx 
     5         kx   indent_level = 0;
     5         kx   end_of_file  = 0;
     5         kx 
     5         kx   token_buffer = (wchar_t *)xmalloc( maxtoken * sizeof(wchar_t) + 2 );
     5         kx   token_utf8_buffer = (utf8_t *)xmalloc( max8token * 6 + 2 );
     5         kx }
     5         kx 
     5         kx void fini_lex( void )
     5         kx {
     5         kx   locale = setlocale( LC_ALL, locale );
     5         kx 
     5         kx   if( token_buffer ) { free( token_buffer ); token_buffer = NULL; }
     5         kx   if( token_utf8_buffer ) { free( token_utf8_buffer ); token_utf8_buffer = NULL; }
     5         kx 
     5         kx   indent_level = 0;
     5         kx   end_of_file  = 0;
     5         kx 
     5         kx   max8token =  0;
     5         kx   maxtoken  =  0;
     5         kx   nextchar  = -1;
     5         kx 
     5         kx   lineno = 0;
     5         kx   colno  = 0;
     5         kx }
     5         kx 
     5         kx static wint_t check_newline( void )
     5         kx {
     5         kx   wint_t  c;
     5         kx 
     5         kx   ++lineno;
     5         kx   colno  = 0; /* считает GETC()/UNGETC(); здесь надо только обнулить */
     5         kx 
     5         kx   /*****************************************
     5         kx     Read first nonwhite char on the line.
     5         kx    *****************************************/
     5         kx   c = GETC();
     5         kx   while( c == ' ' || c == '\t' ) c = GETC();
     5         kx 
     5         kx   if( c == '#' ) goto skipline;
     5         kx   else           return( c );
     5         kx 
     5         kx   /* skip the rest of this line */
     5         kx skipline:
     5         kx 
     5         kx   while( c != '\n' && c != WEOF )
     5         kx     c = GETC();
     5         kx 
     5         kx   return( c );
     5         kx }
     5         kx 
     5         kx static wint_t skip_comment( int c )
     5         kx {
     5         kx   if( c == '*' )
     5         kx   {
     5         kx do1:
     5         kx     do
     5         kx     {
     5         kx        c = GETC();
     5         kx        if( c == '\n' ) { ++lineno; colno = 0; }
     5         kx 
     5         kx     } while( c != '*' && c != WEOF );
     5         kx 
     5         kx     if( c == WEOF )
     5         kx     {
     5         kx        unterminated_comment();
     5         kx        return( WEOF );
     5         kx     }
     5         kx 
     5         kx     c = GETC();
     5         kx 
     5         kx     if( c == '/' )
     5         kx     {
     5         kx        c = GETC();
     5         kx        if( c == '\n' ) c = check_newline();
     5         kx        return( c );
     5         kx     }
     5         kx     else
     5         kx     {
     5         kx        UNGETC( c );
     5         kx        goto do1;
     5         kx     }
     5         kx   }
     5         kx   else if( c == '/' || c == '#' )
     5         kx   {
     5         kx     do
     5         kx     {
     5         kx        c = GETC();
     5         kx 
     5         kx     } while( c != '\n' && c != WEOF );
     5         kx 
     5         kx     if( c == WEOF )
     5         kx     {
     5         kx        unterminated_comment();
     5         kx        return( WEOF );
     5         kx     }
     5         kx     else c = check_newline();
     5         kx 
     5         kx     return( c );
     5         kx   }
     5         kx 
     5         kx   return( c );
     5         kx 
     5         kx } /* End skip_commemnt() */
     5         kx 
     5         kx static wint_t skip_white_space( wint_t c )
     5         kx {
     5         kx   for( ;; )
     5         kx   {
     5         kx     switch( c )
     5         kx     {
     5         kx       case '\n':
     5         kx         c = check_newline();
     5         kx         break;
     5         kx 
     5         kx       case '#':
     5         kx         c = skip_comment( c );
     5         kx         return( skip_white_space( c ) );
     5         kx         break;
     5         kx 
     5         kx       case '/':
     5         kx         c = GETC();
     5         kx         if( c == '/' || c == '*' )
     5         kx         {
     5         kx           c = skip_comment( c );
     5         kx           return( skip_white_space( c ) );
     5         kx         }
     5         kx         else
     5         kx         {
     5         kx           UNGETC( c );
     5         kx           return( '/' );
     5         kx         }
     5         kx         break;
     5         kx 
     5         kx       case ' ':
     5         kx       case '\t':
     5         kx       case '\f':
     5         kx       case '\v':
     5         kx       case '\b':
     5         kx       case '\r':
     5         kx         c = GETC();
     5         kx         break;
     5         kx       case '\\':
     5         kx         c = GETC();
     5         kx         if( c == '\n' ) { ++lineno; colno = 0; }
     5         kx         else
     5         kx         {
     5         kx           warning( "%s", "Stray '\\' in program" );
     5         kx         }
     5         kx         c = GETC();
     5         kx         break;
     5         kx       default:
     5         kx         return( c );
     5         kx 
     5         kx     } /* End switch( c ) */
     5         kx 
     5         kx   } /* End for( ;; ) */
     5         kx 
     5         kx } /* End skip_white_space() */
     5         kx 
     5         kx static wint_t readescape( int *ignore_ptr )
     5         kx /*
     5         kx    read escape sequence, returning a char, or store 1 in *ignore_ptr
     5         kx    if it is backslash-newline
     5         kx  */
     5         kx {
     5         kx   wint_t    c = GETC();
     5         kx   wint_t    code;
     5         kx   unsigned  count;
     5         kx   unsigned  firstdig = 0;
     5         kx   int       nonull;
     5         kx 
     5         kx   switch( c )
     5         kx   {
     5         kx      case 'x':
     5         kx         code   = 0;
     5         kx         count  = 0;
     5         kx         nonull = 0;
     5         kx         while( 1 )
     5         kx         {
     5         kx            c = GETC();
     5         kx            if( !(c >= 'a' && c <= 'f') &&
     5         kx                !(c >= 'A' && c <= 'F') &&
     5         kx                !(c >= '0' && c <= '9')   )
     5         kx            {
     5         kx               UNGETC( c );
     5         kx               break;
     5         kx            }
     5         kx            code *= 16;
     5         kx            if( c >= 'a' && c <= 'f' ) code += c - 'a' + 10;
     5         kx            if( c >= 'A' && c <= 'F' ) code += c - 'A' + 10;
     5         kx            if( c >= '0' && c <= '9' ) code += c - '0';
     5         kx            if( code != 0 || count != 0 )
     5         kx            {
     5         kx               if( count == 0 ) firstdig = code;
     5         kx               count++;
     5         kx            }
     5         kx            nonull = 1;
     5         kx 
     5         kx         } /* End while( 1 ) */
     5         kx 
     5         kx         if( !nonull )
     5         kx         {
     5         kx            error( "%s", "\\x used with no following hex digits" );
     5         kx         }
     5         kx         else if( count == 0 )
     5         kx            /* Digits are all 0's. Ok. */
     5         kx            ;
     5         kx         else if( (count - 1) * 4 >= 32 || /* 32 == bits per INT */
     5         kx                  (count > 1 && ((1 << (32 - (count-1) * 4)) <= firstdig )))
     5         kx         {
     5         kx            warning( "%s", "Hex escape out of range" );
     5         kx         }
     5         kx         return( code );
     5         kx 
     5         kx      case '0': case '1': case '2': case '3': case '4':
     5         kx      case '5': case '6': case '7':
     5         kx         code  = 0;
     5         kx         count = 0;
     5         kx         while( (c <= '7') && (c >= '0') && (count++ < 6) )
     5         kx         {
     5         kx            code = (code * 8) + (c - '0');
     5         kx            c = GETC();
     5         kx         }
     5         kx         UNGETC( c );
     5         kx         return( code );
     5         kx 
     5         kx      case '\\': case '\'': case '"':
     5         kx         return( c );
     5         kx 
     5         kx      case '\n':
     5         kx         lineno++; colno = 0;
     5         kx         *ignore_ptr = 1;
     5         kx         return( 0 );
     5         kx 
     5         kx      case 'n':
     5         kx         return( '\n' );
     5         kx 
     5         kx      case 't':
     5         kx         return( '\t' );
     5         kx 
     5         kx      case 'r':
     5         kx         return( '\r' );
     5         kx 
     5         kx      case 'f':
     5         kx         return( '\f' );
     5         kx 
     5         kx      case 'b':
     5         kx         return( '\b' );
     5         kx 
     5         kx      case 'a':
     5         kx         return( '\a' );
     5         kx 
     5         kx      case 'v':
     5         kx         return( '\v' );
     5         kx   }
     5         kx 
     5         kx   return( c );
     5         kx 
     5         kx } /* End of readescape() */
     5         kx 
     5         kx 
     5         kx int html_symbol_name( wchar_t *str )
     5         kx {
     5         kx   int         rc = 0, error = 0;
     5         kx   PCRE2_SIZE  offset = 0;
     5         kx   wchar_t     pattern[] = L"^(&[#A-Za-z0-9]*;)";
     5         kx 
     5         kx   pcre2_match_data *match;
     5         kx 
     5         kx   pcre2_code *regexp = pcre2_compile( (PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, 0, &error, &offset, NULL );
     5         kx   if( regexp == NULL )
     5         kx   {
     5         kx     return 0; /* PCRE compilation failed */
     5         kx   }
     5         kx 
     5         kx   match = pcre2_match_data_create_from_pattern( regexp, NULL );
     5         kx 
     5         kx   rc = pcre2_match( regexp, (PCRE2_SPTR)str, (int)wcslen(str), 0, 0, match, NULL );
     5         kx   if( rc < 0 )
     5         kx   {
     5         kx     /* not match */
     5         kx     pcre2_match_data_free( match );
     5         kx     pcre2_code_free( regexp );
     5         kx     return 0;
     5         kx   }
     5         kx   else
     5         kx   {
     5         kx     /* match */
     5         kx     pcre2_match_data_free( match );
     5         kx     pcre2_code_free( regexp );
     5         kx     return 1;
     5         kx   }
     5         kx }
     5         kx 
     5         kx 
     5         kx int yylex( void )
     5         kx {
     5         kx   wint_t   c;
     5         kx   wchar_t *p;
     5         kx   int      value;
     5         kx 
     5         kx   if( nextchar >= 0 )
     5         kx     c = nextchar, nextchar = -1;
     5         kx   else
     5         kx     c = GETC();
     5         kx 
     5         kx   while( 1 )
     5         kx   {
     5         kx     switch( c )
     5         kx     {
     5         kx       case ' ':
     5         kx       case '\t':
     5         kx       case '\f':
     5         kx       case '\v':
     5         kx       case '\b':
     5         kx         c = skip_white_space( c );
     5         kx         break;
     5         kx 
     5         kx       case '\r':
     5         kx       case '\n':
     5         kx       case '/':
     5         kx   case '#':
     5         kx       case '\\':
     5         kx         c = skip_white_space( c );
     5         kx 
     5         kx       default:
     5         kx         goto found_nonwhite;
     5         kx 
     5         kx     } /* End switch( c ) */
     5         kx found_nonwhite:
     5         kx 
     5         kx     token_buffer[0] = c;
     5         kx     token_buffer[1] = 0;
     5         kx 
     5         kx     switch( c )
     5         kx     {
     5         kx       case WEOF:
     5         kx         end_of_file = 1;
     5         kx         token_buffer[0] = 0;
     5         kx         value = 0;
     5         kx         goto done;
     5         kx         break;
     5         kx 
     5         kx       case '$': /* dollar in identifier */
     5         kx         if( 1 ) goto letter;
     5         kx         return '$';
     5         kx 
     5         kx       case 'A': case 'B': case 'C': case 'D': case 'E':
     5         kx       case 'F': case 'G': case 'H': case 'I': case 'J':
     5         kx       case 'K': case 'L': case 'M': case 'N': case 'O':
     5         kx       case 'P': case 'Q': case 'R': case 'S': case 'T':
     5         kx       case 'U': case 'V': case 'W': case 'X': case 'Y':
     5         kx       case 'Z':
     5         kx       case 'a': case 'b': case 'c': case 'd': case 'e':
     5         kx       case 'f': case 'g': case 'h': case 'i': case 'j':
     5         kx       case 'k': case 'l': case 'm': case 'n': case 'o':
     5         kx       case 'p': case 'q': case 'r': case 's': case 't':
     5         kx       case 'u': case 'v': case 'w': case 'x': case 'y':
     5         kx       case 'z':
     5         kx       case '_':
     5         kx 
     5         kx       /* RUSSIAN */
     5         kx       case L'А': case L'Б': case L'В': case L'Г': case L'Д':
     5         kx       case L'Е': case L'Ё': case L'Ж': case L'З': case L'И':
     5         kx       case L'Й': case L'К': case L'Л': case L'М': case L'Н':
     5         kx       case L'О': case L'П': case L'Р': case L'С': case L'Т':
     5         kx       case L'У': case L'Ф': case L'Х': case L'Ц': case L'Ч':
     5         kx       case L'Ш': case L'Щ': case L'Ъ': case L'Ы': case L'Ь':
     5         kx       case L'Э': case L'Ю': case L'Я':
     5         kx 
     5         kx       case L'а': case L'б': case L'в': case L'г': case L'д':
     5         kx       case L'е': case L'ё': case L'ж': case L'з': case L'и':
     5         kx       case L'й': case L'к': case L'л': case L'м': case L'н':
     5         kx       case L'о': case L'п': case L'р': case L'с': case L'т':
     5         kx       case L'у': case L'ф': case L'х': case L'ц': case L'ч':
     5         kx       case L'ш': case L'щ': case L'ъ': case L'ы': case L'ь':
     5         kx       case L'э': case L'ю': case L'я':
     5         kx 
     5         kx letter:
     5         kx         p = token_buffer;
     5         kx         while( iswalnum( c ) || c == '_' || c == '$' || c == '@' || c == '-' || c == '.' || c == ':' )
     5         kx         {
     5         kx           if( p >= token_buffer + maxtoken )
     5         kx           {
     5         kx             p = extend_token_buffer( p );
     5         kx             extend_token_utf8_buffer( token_utf8_buffer );
     5         kx           }
     5         kx 
     5         kx           *p++ = c;
     5         kx           c = GETC();
     5         kx         }
     5         kx         *p = 0;
     5         kx         nextchar = c;
     5         kx         value = VARIABLE;
     5         kx 
     5         kx         (void)copy_ucs4_to_utf8( (utf8_t *)token_utf8_buffer, (const ucs4_t *)token_buffer );
     5         kx 
     5         kx         /*********************
     5         kx           install into symtab
     5         kx          *********************/
     5         kx         {
     5         kx           if( !strcmp( "section", (const char *)token_utf8_buffer ) )
     5         kx           {
     5         kx             value = SECTION;
     5         kx             yylval.sym = install( NULL, SECTION, NULL );
     5         kx           }
     5         kx           else if( !strcmp( "repo", (const char *)token_utf8_buffer ) )
     5         kx           {
     5         kx             value = REPO;
     5         kx             yylval.sym = install( NULL, REPO, NULL );
     5         kx           }
     5         kx           else
     5         kx           {
     5         kx             SYMBOL *sp = NULL;
     5         kx 
     5         kx             if( (sp = lookup( (const char *)token_utf8_buffer )) == (SYMBOL *)0 )
     5         kx               sp = install( (const char *)token_utf8_buffer, VARIABLE, 0 );
     5         kx 
     5         kx             /******************************************************************
     5         kx               Если переменная уже в таблице, то мы предполагаем, что она имеет
     5         kx               тип равный одному из допустимых: NUMERICAL, STRING, или PATH.
     5         kx              ******************************************************************/
     5         kx             if( sp->type != VARIABLE )
     5         kx             {
     5         kx               switch( sp->type )
     5         kx               {
     5         kx                 case NUMERICAL:
     5         kx                 case STRING:
     5         kx                 case PATH:
     5         kx                   value = sp->type;
     5         kx                   break;
     5         kx                 default:
     5         kx                   /* error */
     5         kx                   break;
     5         kx               }
     5         kx             }
     5         kx             yylval.sym = sp;
     5         kx           }
     5         kx         }
     5         kx 
     5         kx         token_buffer[0] = 0;
     5         kx         token_utf8_buffer[0] = 0;
     5         kx         goto done;
     5         kx         break;
     5         kx 
     5         kx       case '0': case '1': case '2': case '3': case '4':
     5         kx       case '5': case '6': case '7': case '8': case '9':
     5         kx         {
     5         kx           int constant = 0;
     5         kx /* integer: */
     5         kx           p = token_buffer;
     5         kx           while( iswdigit( c ) )
     5         kx           {
     5         kx             if( p >= token_buffer + maxtoken )
     5         kx             {
     5         kx               p = extend_token_buffer( p );
     5         kx               extend_token_utf8_buffer( token_utf8_buffer );
     5         kx             }
     5         kx 
     5         kx             *p++ = c;
     5         kx             c = GETC();
     5         kx           }
     5         kx           *p = 0;
     5         kx           nextchar = c;
     5         kx           value = NUMERICAL;
     5         kx 
     5         kx           (void)copy_ucs4_to_utf8( (utf8_t *)token_utf8_buffer, (const ucs4_t *)token_buffer );
     5         kx 
     5         kx           /*********************
     5         kx             install into symtab
     5         kx            *********************/
     5         kx           {
     5         kx             (void)swscanf( (const wchar_t *)token_buffer, L"%d", &constant );
     5         kx             yylval.sym = install( NULL, NUMERICAL, constant );
     5         kx           }
     5         kx 
     5         kx           token_buffer[0] = 0;
     5         kx           token_utf8_buffer[0] = 0;
     5         kx           goto done;
     5         kx           break;
     5         kx         }
     5         kx 
     5         kx       case '\'':
     5         kx /* path_constant: */
     5         kx         {
     5         kx           int           num_chars = 0;
     5         kx           unsigned int  width = 8; /* to allow non asscii in path set width = 16 */
     5         kx 
     5         kx           while( 1 )
     5         kx           {
     5         kx tryagain:
     5         kx             c = GETC();
     5         kx 
     5         kx             if( c == '\'' || c == WEOF ) break;
     5         kx             if( c == '\\' )
     5         kx             {
     5         kx               int ignore = 0;
     5         kx               c = readescape( &ignore );
     5         kx               if( ignore ) goto tryagain;
     5         kx               if( (unsigned)c >= (1 << width) )
     5         kx               {
     5         kx                 warning( "%s", "Escape sequence out of range" );
     5         kx               }
     5         kx             }
     5         kx             else if( c == '\n' ) { lineno++; colno = 0; }
     5         kx 
     5         kx             num_chars++;
     5         kx             if( num_chars > maxtoken - 4 )
     5         kx             {
     5         kx               extend_token_buffer( token_buffer );
     5         kx               extend_token_utf8_buffer( token_utf8_buffer );
     5         kx             }
     5         kx 
     5         kx             token_buffer[num_chars] = c;
     5         kx 
     5         kx           } /* End while( 1 ) */
     5         kx 
     5         kx           token_buffer[num_chars + 1] = '\'';
     5         kx           token_buffer[num_chars + 2] = 0;
     5         kx 
     5         kx           if( c != '\'' )
     5         kx           {
     5         kx             error( "%s", "Malformated path constant" );
     5         kx           }
     5         kx           else if( num_chars == 0 )
     5         kx           {
     5         kx             error( "%s", "Empty path constant" );
     5         kx           }
     5         kx 
     5         kx           /* build path: */
     5         kx           {
     5         kx             wchar_t *s, *string = NULL;
     5         kx             wchar_t *p = &token_buffer[0];
     5         kx 
     5         kx             while( *p )
     5         kx             {
     5         kx               if( *p == '\n' || *p == '\t' ) *p = ' ';
     5         kx               ++p;
     5         kx             }
     5         kx 
     5         kx             string = (wchar_t *)malloc( maxtoken * 4 + 10 );
     5         kx 
     5         kx             p = &token_buffer[1];
     5         kx             s = &string[0];
     5         kx 
     5         kx             while( *p == ' ' ) ++p;
     5         kx 
     5         kx             while( *p )
     5         kx             {
     5         kx               if( *p != ' ' )
     5         kx                 *s++ = *p++;
     5         kx               else
     5         kx                 ++p;
     5         kx             }
     5         kx             --s; *s = 0;
     5         kx             while( *(s-1) == ' ' ) --s;
     5         kx             *s = 0;
     5         kx 
     5         kx             (void)copy_ucs4_to_utf8( (utf8_t *)token_utf8_buffer, (const ucs4_t *)string );
     5         kx 
     5         kx             free( string );
     5         kx           }
     5         kx 
     5         kx           /*********************
     5         kx             install into symtab
     5         kx            *********************/
     5         kx           {
     5         kx             yylval.sym = install( NULL, PATH, (char *)token_utf8_buffer );
     5         kx           }
     5         kx 
     5         kx           token_buffer[0] = 0;
     5         kx           token_utf8_buffer[0] = 0;
     5         kx           value = PATH;
     5         kx           goto done;
     5         kx         }
     5         kx 
     5         kx       case '"':
     5         kx /* string_constant: */
     5         kx         {
     5         kx           c = GETC();
     5         kx           p = token_buffer + 1;
     5         kx 
     5         kx           while( c != '"' && c >= 0 )
     5         kx           {
     5         kx             if( c == '\\' )
     5         kx             {
     5         kx               int ignore = 0;
     5         kx               c = readescape( &ignore );
     5         kx               if( ignore ) goto skipnewline;
     5         kx             }
     5         kx             else if( c == '\n' ) lineno++;
     5         kx 
     5         kx             if( p == token_buffer + maxtoken )
     5         kx             {
     5         kx               p = extend_token_buffer( p );
     5         kx               extend_token_utf8_buffer( token_utf8_buffer );
     5         kx             }
     5         kx             *p++ = c;
     5         kx 
     5         kx skipnewline:
     5         kx             c = GETC();
     5         kx 
     5         kx           } /* End while( " ) */
     5         kx 
     5         kx           *p = 0;
     5         kx 
     5         kx           if( c < 0 )
     5         kx           {
     5         kx             error( "%s", "Unterminated string constant" );
     5         kx           }
     5         kx 
     5         kx 
     5         kx           *p++ = '"';
     5         kx           *p = 0;
     5         kx 
     5         kx           /* build string: */
     5         kx           {
     5         kx             wchar_t *s, *string = NULL;
     5         kx             wchar_t *p = &token_buffer[0];
     5         kx 
     5         kx             while( *p )
     5         kx             {
     5         kx               if( *p == '\n' || *p == '\t' ) *p = ' ';
     5         kx               ++p;
     5         kx             }
     5         kx 
     5         kx             string = (wchar_t *)malloc( maxtoken * 4 + 10 );
     5         kx 
     5         kx             p = &token_buffer[1];
     5         kx             s = &string[0];
     5         kx 
     5         kx             while( *p == ' ' ) ++p;
     5         kx 
     5         kx             while( *p )
     5         kx             {
     5         kx               if( *p != ' ' )
     5         kx               {
     5         kx                 switch( *p )
     5         kx                 {
     5         kx                   case '&':
     5         kx                     /************************************************
     5         kx                       Skip HTML symbol names such as &nbsp,... etc.:
     5         kx                      */
     5         kx                     if( ! html_symbol_name( p ) )
     5         kx                     {
     5         kx                       *s++ = '&'; *s++ = 'a'; *s++ = 'm'; *s++ = 'p'; *s++ = ';'; ++p;
     5         kx                     }
     5         kx                     else
     5         kx                     {
     5         kx                       *s++ = *p++;
     5         kx                     }
     5         kx                     break;
     5         kx 
     5         kx                   case '<':
     5         kx                     *s++ = '&'; *s++ = 'l'; *s++ = 't'; *s++ = ';'; ++p;
     5         kx                     break;
     5         kx 
     5         kx                   case '>':
     5         kx                     *s++ = '&'; *s++ = 'g'; *s++ = 't'; *s++ = ';'; ++p;
     5         kx                     break;
     5         kx 
     5         kx                   default:
     5         kx                     *s++ = *p++;
     5         kx                     break;
     5         kx                 }
     5         kx               }
     5         kx               else
     5         kx               {
     5         kx                 /* skip multiple spaces */
     5         kx                 if( *(p+1) != ' ' )
     5         kx                   *s++ = *p++;
     5         kx                 else
     5         kx                   ++p;
     5         kx               }
     5         kx             }
     5         kx             --s; *s = 0;
     5         kx             while( *(s-1) == ' ' ) --s;
     5         kx             *s = 0;
     5         kx 
     5         kx             (void)copy_ucs4_to_utf8( (utf8_t *)token_utf8_buffer, (const ucs4_t *)string );
     5         kx 
     5         kx             free( string );
     5         kx           }
     5         kx 
     5         kx           /*********************
     5         kx             install into symtab
     5         kx            *********************/
     5         kx           {
     5         kx             yylval.sym = install( NULL, STRING, (char *)token_utf8_buffer );
     5         kx           }
     5         kx 
     5         kx           token_buffer[0] = 0;
     5         kx           token_utf8_buffer[0] = 0;
     5         kx           value = STRING;
     5         kx           goto done;
     5         kx         }
     5         kx 
     5         kx       case 0:
     5         kx         value = 1;
     5         kx         goto done;
     5         kx         break;
     5         kx 
     5         kx       case '{':
     5         kx         indent_level++;
     5         kx         value = c;
     5         kx         goto done;
     5         kx         break;
     5         kx 
     5         kx       case '}':
     5         kx         indent_level--;
     5         kx         value = c;
     5         kx         goto done;
     5         kx         break;
     5         kx 
     5         kx       default:
     5         kx         value = c;
     5         kx         goto done;
     5         kx         break;
     5         kx 
     5         kx     } /* End switch( c ) */
     5         kx 
     5         kx   } /* End while( 1 ) */
     5         kx 
     5         kx done:
     5         kx 
     5         kx    return( value );
     5         kx }