From 7868767ddeb65bc1c3fa072b1b121d2ec9c129d3 Mon Sep 17 00:00:00 2001 From: Peace-Maker <peace-maker@wcfan.de> Date: Tue, 27 May 2014 19:30:13 -0700 Subject: [PATCH] Add string literal concatenation using ellipses "..." (bug 4261, PR #5) commit cd0d31805c8c2bb3ba64322dda83834c61b0bda7 Author: Peace-Maker <peace-maker@wcfan.de> Date: Wed May 28 03:07:25 2014 +0200 Ignore \ ctrlchar in lexing commit cf617a4d209fb11e2b2b79224f8dbd438ec21c7c Author: Peace-Maker <peace-maker@wcfan.de> Date: Tue May 27 13:32:59 2014 +0200 Add string literal concatenation using ellipses "..." (bug 4261) Backported the changes CompuPhase did to the compiler to support string literal concatenation including all fixes in later commits from r30 on. http://code.google.com/p/pawnscript/source/detail?r=30 Pawn uses ellipses "..." to concatenate so it looks like this: #define PROJECT_AUTHOR "Greyscale" #define PROJECT_COPYRIGHT "Copyright (C) 2010 " ... PROJECT_AUTHOR This would result in PROJECT_COPYRIGHT being defined as "Copyright (C) 2010 Greyscale" While i've been at it, that stringizing a macro parameter feature was ported too. From the changelog for version 3.3.4026 (http://www.compuphase.com/pawn/pawnhistory.htm): The macro substition processor now recognizes the "#" character for "stringizing" a parameter. For example, if you have the definition #define log(%1) #%1 Then the expression log(test) will result in "test". Note that concatenation of literal strings requires an ellipsis in pawn (which is different than C/C++). So to combine the parameter with literal strings, use a syntax like: #define log(%1) "logging: " ... #%1 ... "\n" The stringize operator is only available in the replacement text of a macro. Doing PrintToServer(log(hello)); would print logging: hello\n --- sourcepawn/compiler/libpawnc.c | 48 +++++--- sourcepawn/compiler/sc.h | 4 +- sourcepawn/compiler/sc1.c | 2 +- sourcepawn/compiler/sc2.c | 193 ++++++++++++++++++++++++++++----- sourcepawn/compiler/sci18n.c | 3 +- 5 files changed, 201 insertions(+), 49 deletions(-) diff --git a/sourcepawn/compiler/libpawnc.c b/sourcepawn/compiler/libpawnc.c index e0e736a2..32a47c9c 100644 --- a/sourcepawn/compiler/libpawnc.c +++ b/sourcepawn/compiler/libpawnc.c @@ -146,16 +146,6 @@ void pc_closesrc(void *handle) fclose((FILE*)handle); } -/* pc_resetsrc() - * "position" may only hold a pointer that was previously obtained from - * pc_getpossrc() - */ -void pc_resetsrc(void *handle,void *position) -{ - assert(handle!=NULL); - fsetpos((FILE*)handle,(fpos_t *)position); -} - /* pc_readsrc() * Reads a single line from the source file (or up to a maximum number of * characters if the line in the input file is too long). @@ -174,12 +164,40 @@ int pc_writesrc(void *handle,unsigned char *source) return fputs((char*)source,(FILE*)handle) >= 0; } -void *pc_getpossrc(void *handle) -{ - static fpos_t lastpos; /* may need to have a LIFO stack of such positions */ +#define MAXPOSITIONS 4 +static fpos_t srcpositions[MAXPOSITIONS]; +static unsigned char srcposalloc[MAXPOSITIONS]; - fgetpos((FILE*)handle,&lastpos); - return &lastpos; +void *pc_getpossrc(void *handle,void *position) +{ + if (position==NULL) { + /* allocate a new slot */ + int i; + for (i=0; i<MAXPOSITIONS && srcposalloc[i]!=0; i++) + /* nothing */; + assert(i<MAXPOSITIONS); /* if not, there is a queue overrun */ + if (i>=MAXPOSITIONS) + return NULL; + position=&srcpositions[i]; + srcposalloc[i]=1; + } else { + /* use the gived slot */ + assert((fpos_t*)position>=srcpositions && (fpos_t*)position<srcpositions+sizeof(srcpositions)); + } /* if */ + fgetpos((FILE*)handle,(fpos_t*)position); + return position; +} + +/* pc_resetsrc() + * "position" may only hold a pointer that was previously obtained from + * pc_getpossrc() + */ +void pc_resetsrc(void *handle,void *position) +{ + assert(handle!=NULL); + assert(position!=NULL); + fsetpos((FILE*)handle,(fpos_t *)position); + /* note: the item is not cleared from the pool */ } int pc_eofsrc(void *handle) diff --git a/sourcepawn/compiler/sc.h b/sourcepawn/compiler/sc.h index 27ea591e..0b87fe0e 100644 --- a/sourcepawn/compiler/sc.h +++ b/sourcepawn/compiler/sc.h @@ -484,10 +484,10 @@ int pc_error(int number,char *message,char *filename,int firstline,int lastline, void *pc_opensrc(char *filename); /* reading only */ void *pc_createsrc(char *filename); void pc_closesrc(void *handle); /* never delete */ -void pc_resetsrc(void *handle,void *position); /* reset to a position marked earlier */ char *pc_readsrc(void *handle,unsigned char *target,int maxchars); int pc_writesrc(void *handle,unsigned char *source); -void *pc_getpossrc(void *handle); /* mark the current position */ +void *pc_getpossrc(void *handle,void *position); /* mark the current position */ +void pc_resetsrc(void *handle,void *position); /* reset to a position marked earlier */ int pc_eofsrc(void *handle); /* output to intermediate (.ASM) file */ diff --git a/sourcepawn/compiler/sc1.c b/sourcepawn/compiler/sc1.c index 717a9e25..28c3215b 100644 --- a/sourcepawn/compiler/sc1.c +++ b/sourcepawn/compiler/sc1.c @@ -333,7 +333,7 @@ int pc_compile(int argc, char *argv[]) } /* if */ /* do the first pass through the file (or possibly two or more "first passes") */ sc_parsenum=0; - inpfmark=pc_getpossrc(inpf_org); + inpfmark=pc_getpossrc(inpf_org,NULL); do { /* reset "defined" flag of all functions and global variables */ reduce_referrers(&glbtab); diff --git a/sourcepawn/compiler/sc2.c b/sourcepawn/compiler/sc2.c index 312eeafd..a2ce4c9d 100644 --- a/sourcepawn/compiler/sc2.c +++ b/sourcepawn/compiler/sc2.c @@ -38,8 +38,9 @@ #endif /* flags for litchar() */ -#define RAWMODE 1 -#define UTF8MODE 2 +#define RAWMODE 0x1 +#define UTF8MODE 0x2 +#define ISPACKED 0x4 static cell litchar(const unsigned char **lptr,int flags); static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int automaton,int *cmptag); @@ -1492,6 +1493,7 @@ static int substpattern(unsigned char *line,size_t buffersize,char *pattern,char const unsigned char *p,*s,*e; unsigned char *args[10]; int match,arg,len,argsnum=0; + int stringize; memset(args,0,sizeof args); @@ -1588,11 +1590,18 @@ static int substpattern(unsigned char *line,size_t buffersize,char *pattern,char if (match) { /* calculate the length of the substituted string */ for (e=(unsigned char*)substitution,len=0; *e!='\0'; e++) { + if(*e=='#' && *(e+1)=='%' && isdigit(*(e+2)) && argsnum) { + stringize=1; + e++; /* skip '#' */ + } else { + stringize=0; + } /* if */ if (*e=='%' && isdigit(*(e+1)) && argsnum) { arg=*(e+1)-'0'; assert(arg>=0 && arg<=9); + assert(stringize==0 || stringize==1); if (args[arg]!=NULL) { - len+=strlen((char*)args[arg]); + len+=strlen((char*)args[arg])+2*stringize; e++; } else { len++; @@ -1608,12 +1617,22 @@ static int substpattern(unsigned char *line,size_t buffersize,char *pattern,char /* substitute pattern */ strdel((char*)line,(int)(s-line)); for (e=(unsigned char*)substitution,s=line; *e!='\0'; e++) { + if (*e=='#' && *(e+1)=='%' && isdigit(*(e+2))) { + stringize=1; + e++; /* skip '#' */ + } else { + stringize=0; + } /* if */ if (*e=='%' && isdigit(*(e+1))) { arg=*(e+1)-'0'; assert(arg>=0 && arg<=9); if (args[arg]!=NULL) { + if (stringize) + strins((char*)s++,"\"",1); strins((char*)s,(char*)args[arg],strlen((char*)args[arg])); s+=strlen((char*)args[arg]); + if (stringize) + strins((char*)s++,"\"",1); } else { error(236); /* parameter does not exist, incorrect #define pattern */ strins((char*)s,(char*)e,2); @@ -1702,6 +1721,61 @@ static void substallpatterns(unsigned char *line,int buffersize) } #endif +/* scanellipsis + * Look for ... in the string and (if not there) in the remainder of the file, + * but restore (or keep intact): + * - the current position in the file + * - the comment parsing state + * - the line buffer used by the lexical analyser + * - the active line number and the active file + * + * The function returns 1 if an ellipsis was found and 0 if not + */ +static int scanellipsis(const unsigned char *lptr) +{ + static void *inpfmark=NULL; + unsigned char *localbuf; + short localcomment,found; + + /* first look for the ellipsis in the remainder of the string */ + while (*lptr<=' ' && *lptr!='\0') + lptr++; + if (lptr[0]=='.' && lptr[1]=='.' && lptr[2]=='.') + return 1; + if (*lptr!='\0') + return 0; /* stumbled on something that is not an ellipsis and not white-space */ + + /* the ellipsis was not on the active line, read more lines from the current + * file (but save its position first) + */ + if (inpf==NULL || pc_eofsrc(inpf)) + return 0; /* quick exit: cannot read after EOF */ + if ((localbuf=(unsigned char*)malloc((sLINEMAX+1)*sizeof(unsigned char)))==NULL) + return 0; + inpfmark=pc_getpossrc(inpf,inpfmark); + localcomment=icomment; + + found=0; + /* read from the file, skip preprocessing, but strip off comments */ + while (!found && pc_readsrc(inpf,localbuf,sLINEMAX)!=NULL) { + stripcom(localbuf); + lptr=localbuf; + /* skip white space */ + while (*lptr<=' ' && *lptr!='\0') + lptr++; + if (lptr[0]=='.' && lptr[1]=='.' && lptr[2]=='.') + found=1; + else if (*lptr!='\0') + break; /* stumbled on something that is not an ellipsis and not white-space */ + } /* while */ + + /* clean up & reset */ + free(localbuf); + pc_resetsrc(inpf,inpfmark); + icomment=localcomment; + return found; +} + /* preprocess * * Reads a line by readline() into "pline" and performs basic preprocessing: @@ -1865,7 +1939,7 @@ char *sc_tokens[] = { SC_FUNC int lex(cell *lexvalue,char **lexsym) { - int i,toolong,newline,stringflags; + int i,toolong,newline; char **tokptr; const unsigned char *starttoken; @@ -1977,35 +2051,94 @@ SC_FUNC int lex(cell *lexvalue,char **lexsym) error(220); } /* if */ } /* if */ - } else if (*lptr=='\"' || (*lptr==sc_ctrlchar && *(lptr+1)=='\"')) - { /* unpacked string literal */ + } else if (*lptr=='\"' /* unpacked string literal */ +#if 0 + || (*lptr==sc_ctrlchar && *(lptr+1)=='\"') /* unpacked raw string */ + || (*lptr=='!' && *(lptr+1)=='\"') /* packed string */ + || (*lptr=='!' && *(lptr+1)==sc_ctrlchar && *(lptr+2)=='\"') /* packed raw string */ + || (*lptr==sc_ctrlchar && *(lptr+1)=='!' && *(lptr+2)=='\"') /* packed raw string */ +#endif + ) + { + int stringflags,segmentflags; + char *cat; _lextok=tSTRING; - stringflags= (*lptr==sc_ctrlchar) ? RAWMODE : 0; *lexvalue=_lexval=litidx; - lptr+=1; /* skip double quote */ - if ((stringflags & RAWMODE)!=0) - lptr+=1; /* skip "escape" character too */ - /* Note that this should always be packedstring() for SourcePawn */ - lptr=sc_packstr ? packedstring(lptr,stringflags) : unpackedstring(lptr,stringflags); - if (*lptr=='\"') - lptr+=1; /* skip final quote */ + _lexstr[0]='\0'; + stringflags=-1; /* to mark the first segment */ + for ( ;; ) { + if(*lptr=='!') + segmentflags= (*(lptr+1)==sc_ctrlchar) ? RAWMODE | ISPACKED : ISPACKED; + else if (*lptr==sc_ctrlchar) + segmentflags= (*(lptr+1)=='!') ? RAWMODE | ISPACKED : RAWMODE; + else + segmentflags=0; + if ((segmentflags & ISPACKED)!=0) + lptr+=1; /* skip '!' character */ + if ((segmentflags & RAWMODE)!=0) + lptr+=1; /* skip "escape" character too */ + assert(*lptr=='\"'); + lptr+=1; + if (stringflags==-1) + stringflags=segmentflags; + else if (stringflags!=segmentflags) + error(238); /* mixing packed/unpacked/raw strings in concatenation */ + cat=strchr(_lexstr,'\0'); + assert(cat!=NULL); + while (*lptr!='\"' && *lptr!='\0' && (cat-_lexstr)<sLINEMAX) { + if (*lptr!='\a') { /* ignore '\a' (which was inserted at a line concatenation) */ + *cat++=*lptr; + if (*lptr==sc_ctrlchar && *(lptr+1)!='\0') + *cat++=*++lptr; /* skip escape character plus the escaped character */ + } /* if */ + lptr++; + } /* while */ + *cat='\0'; /* terminate string */ + if (*lptr=='\"') + lptr+=1; /* skip final quote */ + else + error(37); /* invalid (non-terminated) string */ + /* see whether an ellipsis is following the string */ + if (!scanellipsis(lptr)) + break; /* no concatenation of string literals */ + /* there is an ellipses, go on parsing (this time with full preprocessing) */ + while (*lptr<=' ') { + if (*lptr=='\0') { + preprocess(); /* preprocess resets "lptr" */ + assert(freading && lptr!=term_expr); + } else { + lptr++; + } /* if */ + } /* while */ + assert(freading && lptr[0]=='.' && lptr[1]=='.' && lptr[2]=='.'); + lptr+=3; + while (*lptr<=' ') { + if (*lptr=='\0') { + preprocess(); /* preprocess resets "lptr" */ + assert(freading && lptr!=term_expr); + } else { + lptr++; + } /* if */ + } /* while */ + if (!freading || !(*lptr=='\"' +#if 0 + || *lptr==sc_ctrlchar && *(lptr+1)=='\"' + || *lptr=='!' && *(lptr+1)=='\"' + || *lptr=='!' && *(lptr+1)==sc_ctrlchar && *(lptr+2)=='\"' + || *lptr==sc_ctrlchar && *(lptr+1)=='!' && *(lptr+2)=='\"' +#endif + )) + { + error(37); /* invalid string concatenation */ + break; + } /* if */ + } /* for */ + if (sc_packstr) + stringflags ^= ISPACKED; /* invert packed/unpacked parameters */ + if ((stringflags & ISPACKED)!=0) + packedstring(_lexstr,stringflags); else - error(37); /* invalid (non-terminated) string */ - } else if ((*lptr=='!' && *(lptr+1)=='\"') - || (*lptr=='!' && *(lptr+1)==sc_ctrlchar && *(lptr+2)=='\"') - || (*lptr==sc_ctrlchar && *(lptr+1)=='!' && *(lptr+2)=='\"')) - { /* packed string literal */ - _lextok=tSTRING; - stringflags= (*lptr==sc_ctrlchar || *(lptr+1)==sc_ctrlchar) ? RAWMODE : 0; - *lexvalue=_lexval=litidx; - lptr+=2; /* skip exclamation point and double quote */ - if ((stringflags & RAWMODE)!=0) - lptr+=1; /* skip "escape" character too */ - lptr=sc_packstr ? unpackedstring(lptr,stringflags) : packedstring(lptr,stringflags); - if (*lptr=='\"') - lptr+=1; /* skip final quote */ - else - error(37); /* invalid (non-terminated) string */ + unpackedstring(_lexstr,stringflags); } else if (*lptr=='\'') { /* character literal */ lptr+=1; /* skip quote */ _lextok=tNUMBER; diff --git a/sourcepawn/compiler/sci18n.c b/sourcepawn/compiler/sci18n.c index 15c80f0d..8730e4ec 100644 --- a/sourcepawn/compiler/sci18n.c +++ b/sourcepawn/compiler/sci18n.c @@ -396,11 +396,12 @@ SC_FUNC int scan_utf8(FILE *fp,const char *filename) #if defined NO_UTF8 return 0; #else - void *resetpos=pc_getpossrc(fp); + static void *resetpos=NULL; int utf8=TRUE; int firstchar=TRUE,bom_found=FALSE; const unsigned char *ptr; + resetpos=pc_getpossrc(fp,resetpos); while (utf8 && pc_readsrc(fp,pline,sLINEMAX)!=NULL) { ptr=pline; if (firstchar) {