From b8bd7c517e79c89a1204e6fce590ab30297f4517 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 26 Jun 2014 21:05:19 -0700 Subject: [PATCH] Add parsing functions. --- sourcepawn/compiler/sc.h | 37 +++- sourcepawn/compiler/sc1.c | 324 ++++++++++++++++++++++++++--------- sourcepawn/compiler/sc2.c | 24 +++ sourcepawn/compiler/sc5.scp | 4 +- sourcepawn/compiler/scvars.c | 1 + 5 files changed, 301 insertions(+), 89 deletions(-) diff --git a/sourcepawn/compiler/sc.h b/sourcepawn/compiler/sc.h index 3d052794..69a23640 100644 --- a/sourcepawn/compiler/sc.h +++ b/sourcepawn/compiler/sc.h @@ -50,6 +50,7 @@ #define CTRL_CHAR '\\' /* default control character */ #define sCHARBITS 8 /* size of a packed character */ +#define MAXTAGS 16 #define sDIMEN_MAX 4 /* maximum number of array dimensions */ #define sLINEMAX 4095 /* input line length (in characters) */ #define sCOMP_STACK 32 /* maximum nesting of #if .. #endif sections */ @@ -256,14 +257,35 @@ typedef struct svalue_s { int lvalue; } svalue; -#define DECLFLAG_ONLY_NEW_TYPES 0x1 +#define TYPEFLAG_ONLY_NEW 0x01 // Only new-style types are allowed. +#define TYPEFLAG_ARGUMENT 0x02 // The declaration is for an argument. +#define TYPEFLAG_VARIABLE 0x04 // The declaration is for a variable. +#define TYPEFLAG_ENUMROOT 0x08 // Multi-dimensional arrays should have an enumroot. +#define TYPEFLAG_NO_POSTDIMS 0x10 // Do not parse post-fix dimensions. +#define TYPEFLAG_RETURN 0x20 // Return type. +#define TYPEMASK_NAMED_DECL (TYPEFLAG_ARGUMENT | TYPEFLAG_VARIABLE) + +typedef struct { + char type[sNAMEMAX + 1]; + + // Array information. + int numdim; + int dim[sDIMEN_MAX]; + int idxtag[sDIMEN_MAX]; + constvalue *enumroot; + + // Type information. + int tag; // Same as tags[0]. + int tags[MAXTAGS]; // List of tags if multi-tagged. + int numtags; // Number of tags found. + int ident; // Either iREFERENCE, iARRAY, or iVARIABLE. + char usage; // Usage flags. +} typeinfo_t; /* For parsing declarations. */ typedef struct { - char type[sNAMEMAX + 1]; - constvalue *enumroot; - int tag; - char usage; + char name[sNAMEMAX + 1]; + typeinfo_t type; } declinfo_t; /* "while" statement queue (also used for "for" and "do - while" loops) */ @@ -513,7 +535,7 @@ int pc_findtag(const char *name); constvalue *pc_tagptr(const char *name); int pc_enablewarning(int number,int enable); const char *pc_tagname(int tag); -int parse_decl(declinfo_t *decl, const token_t *first, int flags); +int parse_decl(declinfo_t *decl, int flags); /* * Functions called from the compiler (to be implemented by you) @@ -599,11 +621,13 @@ SC_FUNC void preprocess(void); SC_FUNC void lexinit(void); SC_FUNC int lex(cell *lexvalue,char **lexsym); SC_FUNC int lextok(token_t *tok); +SC_FUNC int lexpeek(int id); SC_FUNC void lexpush(void); SC_FUNC void lexclr(int clreol); SC_FUNC int matchtoken(int token); SC_FUNC int tokeninfo(cell *val,char **str); SC_FUNC int needtoken(int token); +SC_FUNC int matchtoken2(int id, token_t *tok); SC_FUNC int expecttoken(int id, token_t *tok); SC_FUNC int matchsymbol(token_ident_t *ident); SC_FUNC int needsymbol(token_ident_t *ident); @@ -890,6 +914,7 @@ SC_VDECL int pc_tag_void; /* global void tag */ SC_VDECL int pc_tag_object; /* root object tag */ SC_VDECL int pc_anytag; /* global any tag */ SC_VDECL int glbstringread; /* last global string read */ +SC_VDECL int sc_require_newdecls; /* only newdecls are allowed */ SC_VDECL constvalue sc_automaton_tab; /* automaton table */ SC_VDECL constvalue sc_state_tab; /* state table */ diff --git a/sourcepawn/compiler/sc1.c b/sourcepawn/compiler/sc1.c index f74aee8d..21613b8c 100644 --- a/sourcepawn/compiler/sc1.c +++ b/sourcepawn/compiler/sc1.c @@ -3242,70 +3242,6 @@ static void declstruct(void) matchtoken(';'); /* eat up optional semicolon */ } -int parse_typeexpr(declinfo_t *decl, const token_t *first, int flags) -{ - token_t tok; - - if (first) { - tok = *first; - } else { - lextok(&tok); - } - - if (tok.id == tCONST) { - decl->usage |= uCONST; - lextok(&tok); - } - - if (tok.id == tLABEL && (flags & DECLFLAG_ONLY_NEW_TYPES)) { - error(120); - return FALSE; - } - - if (tok.id == '[') { - error(121); - return FALSE; - } - - switch (tok.id) { - case tINT: - strcpy(decl->type, "int"); - decl->tag = 0; - break; - case tCHAR: - strcpy(decl->type, "char"); - decl->tag = pc_tag_string; - break; - case tVOID: - strcpy(decl->type, "void"); - decl->tag = pc_tag_void; - break; - case tOBJECT: - strcpy(decl->type, "object"); - decl->tag = pc_tag_object; - break; - case tSYMBOL: - strcpy(decl->type, tok.str); - if (strcmp(decl->type, "float") == 0) { - decl->tag = sc_rationaltag; - } else { - decl->tag = pc_findtag(decl->type); - if (decl->tag == sc_rationaltag) - error(98, "Float", "float"); - else if (decl->tag == pc_tag_string) - error(98, "String", "char"); - else if (decl->tag == 0) - error(98, "_", "int"); - } - break; - default: - error(122); - return FALSE; - } - - return TRUE; -} - // Consumes a line, returns FALSE if EOF hit. static int consume_line() { @@ -3326,18 +3262,240 @@ static int consume_line() return TRUE; } -// Parse a new-style declaration. If the name was already fetched (because we -// didn't have enough lookahead), it can be given ahead of time. -int parse_decl(declinfo_t *decl, const token_t *first, int flags) +static int parse_new_typeexpr(typeinfo_t *type, const token_t *first, int flags) { - memset(decl, 0, sizeof(*decl)); + token_t tok; - if (!parse_typeexpr(decl, first, flags)) + if (first) + tok = *first; + else + lextok(&tok); + + type->ident = iVARIABLE; + + if (tok.id == tCONST) { + type->usage |= uCONST; + lextok(&tok); + } + + if (tok.id == '[') { + // Not yet supported for return vals. This is allowed with old decls, but + // it's a huge hack. For now we forbid it in new code until it works right. + if (flags & TYPEFLAG_RETURN) + error(136); + + while (tok.id == '[') { + if (type->numdim == sDIMEN_MAX) { + error(53); + break; + } + type->dim[type->numdim++] = 0; + if (!needtoken(']')) + return FALSE; + lextok(&tok); + } + type->ident = iARRAY; + } + + switch (tok.id) { + case tINT: + strcpy(type->type, "int"); + type->tag = 0; + break; + case tCHAR: + strcpy(type->type, "char"); + type->tag = pc_tag_string; + break; + case tVOID: + strcpy(type->type, "void"); + type->tag = pc_tag_void; + break; + case tOBJECT: + strcpy(type->type, "object"); + type->tag = pc_tag_object; + break; + case tSYMBOL: + strcpy(type->type, tok.str); + if (strcmp(type->type, "float") == 0) { + type->tag = sc_rationaltag; + } else { + type->tag = pc_findtag(type->type); + if (type->tag == sc_rationaltag) + error(98, "Float", "float"); + else if (type->tag == pc_tag_string) + error(98, "String", "char"); + else if (type->tag == 0) + error(98, "_", "int"); + } + break; + default: + error(122); + return FALSE; + } + + if (flags & TYPEFLAG_ARGUMENT) { + if (matchtoken('&')) { + if (type->ident == iARRAY) { + error(137); + return FALSE; + } + type->ident = iREFERENCE; + } + } + + type->tags[0] = type->tag; + type->numtags = 1; + return TRUE; +} + +static void parse_old_array_dims(declinfo_t *decl, int flags) +{ + typeinfo_t *type = &decl->type; + constvalue **enumrootp; + + // Illegal declaration (we'll have a name since ref requires decl). + if (type->ident == iREFERENCE) + error(67, decl->name); + + if (flags & TYPEFLAG_ENUMROOT) + enumrootp = &type->enumroot; + else + enumrootp = NULL; + + do { + cell size; + + if (type->numdim == sDIMEN_MAX) { + error(53); + return; + } + + size = needsub(&type->idxtag[type->numdim], enumrootp); + if (size > INT_MAX) + error(165); + + type->dim[type->numdim++] = (int)size; + } while (matchtoken('[')); + + type->ident = iARRAY; +} + +static int parse_old_decl(declinfo_t *decl, int flags) +{ + token_t tok; + typeinfo_t *type = &decl->type; + + type->ident = iVARIABLE; + + if (matchtoken(tCONST)) + type->usage |= uCONST; + + if (flags & TYPEFLAG_ARGUMENT) { + if (matchtoken('&')) + type->ident = iREFERENCE; + + // grammar for multitags is: + // multi-tag ::= '{' (symbol (',' symbol)*)? '}' ':' + if (matchtoken('{')) { + while (type->numtags < MAXTAGS) { + int tag = 0; + + if (!matchtoken('_')) { + // If we don't get the magic tag '_', then we should have a symbol. + if (expecttoken(tSYMBOL, &tok)) + tag = pc_addtag(tok.str); + } + type->tags[type->numtags++] = tag; + + if (matchtoken('}')) + break; + needtoken(','); + } + needtoken(':'); + } + } else { + if (matchtoken2(tLABEL, &tok)) + type->tags[type->numtags++] = pc_addtag(tok.str); + } + + if (flags & TYPEMASK_NAMED_DECL) { + if (expecttoken(tSYMBOL, &tok)) + strcpy(decl->name, tok.str); + else + strcpy(decl->name, ""); + } + + if ((flags & TYPEMASK_NAMED_DECL) && !(flags & TYPEFLAG_NO_POSTDIMS)) { + if (matchtoken('[')) + parse_old_array_dims(decl, flags); + } + + type->tag = type->tags[0]; + return TRUE; +} + +static int parse_new_decl(declinfo_t *decl, int flags) +{ + token_t tok; + + if (!parse_new_typeexpr(&decl->type, NULL, flags)) return FALSE; + if (!expecttoken(tSYMBOL, &tok)) + return FALSE; + strcpy(decl->name, tok.str); + + if ((flags & TYPEMASK_NAMED_DECL) && !(flags & TYPEFLAG_NO_POSTDIMS)) { + if (matchtoken('[')) { + if (decl->type.numdim == 0) + parse_old_array_dims(decl, flags); + else + error(121); + } + } return TRUE; } +// Parse a declaration. +// +// Grammar for named declarations is: +// "const"? symbol ('[' ']')* '&'? symbol +// | "const"? label? '&'? symbol '[' ']' +// +int parse_decl(declinfo_t *decl, int flags) +{ + token_t tok; + + memset(decl, 0, sizeof(*decl)); + + // If parsing an argument, there are two simple checks for whether this is a + // new or old-style declaration. + if ((flags & TYPEFLAG_ARGUMENT) && (lexpeek('&') || lexpeek('{'))) + return parse_old_decl(decl, flags); + + // Another dead giveaway is there being a label. + if (lexpeek(tLABEL)) + return parse_old_decl(decl, flags); + + // Otherwise, we have to eat a symbol to tell. + if (lextok(&tok) == tSYMBOL) { + if (lexpeek('[') || lexpeek(tSYMBOL)) { + // A new-style declaration only allows array dims or a symbol name, so + // this is a new-style declaration. Make sure to push back the first + // symbol. + lexpush(); + return parse_new_decl(decl, flags); + } + + // Push the symbol back, we've got an old-style decl. + lexpush(); + return parse_old_decl(decl, flags); + } + + // All else has failed. Probably got a type keyword. New-style. + return parse_new_decl(decl, flags); +} + void define_constructor(methodmap_t *map, methodmap_method_t *method) { symbol *sym = findglb(map->name, sGLOBAL); @@ -3410,7 +3568,7 @@ void check_name_length(char *original) } } -symbol *parse_inline_function(methodmap_t *map, const declinfo_t *decl, const char *name, int is_native, int is_ctor, int is_dtor) +symbol *parse_inline_function(methodmap_t *map, const typeinfo_t *type, const char *name, int is_native, int is_ctor, int is_dtor) { funcstub_setup_t setup; if (is_dtor) @@ -3418,7 +3576,7 @@ symbol *parse_inline_function(methodmap_t *map, const declinfo_t *decl, const ch else if (is_ctor) setup.return_tag = map->tag; else - setup.return_tag = decl->tag; + setup.return_tag = type->tag; if (is_ctor) setup.this_tag = -1; @@ -3474,7 +3632,7 @@ int check_this_tag(methodmap_t *map, symbol *target) return ok; } -int parse_property_accessor(const declinfo_t *decl, methodmap_t *map, methodmap_method_t *method) +int parse_property_accessor(const typeinfo_t *type, methodmap_t *map, methodmap_method_t *method) { token_ident_t ident; int is_native = FALSE; @@ -3517,7 +3675,7 @@ int parse_property_accessor(const declinfo_t *decl, methodmap_t *map, methodmap_ char tmpname[METHOD_NAMEMAX + 1]; strcpy(tmpname, method->name); strcat(tmpname, ".get"); - target = parse_inline_function(map, decl, tmpname, is_native, FALSE, FALSE); + target = parse_inline_function(map, type, tmpname, is_native, FALSE, FALSE); } if (!target) @@ -3537,9 +3695,9 @@ int parse_property_accessor(const declinfo_t *decl, methodmap_t *map, methodmap_ } // Must return the same tag as the property. - if (decl->tag != target->tag) { + if (type->tag != target->tag) { const char *kind = getter ? "getter" : "setter"; - error(128, "getter", map->name, decl->type); + error(128, "getter", map->name, type->type); } if (!check_this_tag(map, target)) { @@ -3553,11 +3711,12 @@ int parse_property_accessor(const declinfo_t *decl, methodmap_t *map, methodmap_ methodmap_method_t *parse_property(methodmap_t *map) { - declinfo_t decl; + typeinfo_t type; token_ident_t ident; methodmap_method_t *method; - if (!parse_decl(&decl, NULL, DECLFLAG_ONLY_NEW_TYPES)) + memset(&type, 0, sizeof(type)); + if (!parse_new_typeexpr(&type, NULL, TYPEFLAG_RETURN)) return NULL; if (!needsymbol(&ident)) return NULL; @@ -3573,7 +3732,7 @@ methodmap_method_t *parse_property(methodmap_t *map) return method; while (!matchtoken('}')) { - if (!parse_property_accessor(&decl, map,method)) { + if (!parse_property_accessor(&type, map,method)) { if (!consume_line()) return NULL; } @@ -3601,8 +3760,10 @@ methodmap_method_t *parse_method(methodmap_t *map) token_ident_t bindsource; strcpy(bindsource.name, ""); + typeinfo_t type; + memset(&type, 0, sizeof(type)); + token_t tok; - declinfo_t decl; if (matchtoken('~')) { // We got something like "public ~Blah = X" is_bind = TRUE; @@ -3657,7 +3818,7 @@ methodmap_method_t *parse_method(methodmap_t *map) // Parse for type expression, priming it with the token we predicted // would be an identifier. - if (!parse_decl(&decl, first, DECLFLAG_ONLY_NEW_TYPES)) + if (!parse_new_typeexpr(&type, first, TYPEFLAG_RETURN)) return NULL; // Now, we should get an identifier. @@ -3697,7 +3858,7 @@ methodmap_method_t *parse_method(methodmap_t *map) else if (target->ident != iFUNCTN) error(10); } else { - target = parse_inline_function(map, &decl, ident.name, is_native, is_ctor, is_dtor); + target = parse_inline_function(map, &type, ident.name, is_native, is_ctor, is_dtor); } if (!target) @@ -5156,7 +5317,6 @@ static int argcompare(arginfo *a1,arginfo *a2) */ static int declargs(symbol *sym, int chkshadow, const int *thistag) { - #define MAXTAGS 16 char *ptr; int argcnt,oldargcnt,tok,tags[MAXTAGS],numtags; cell val; diff --git a/sourcepawn/compiler/sc2.c b/sourcepawn/compiler/sc2.c index 5024904b..9a94a880 100644 --- a/sourcepawn/compiler/sc2.c +++ b/sourcepawn/compiler/sc2.c @@ -1151,6 +1151,10 @@ static int command(void) cell val; preproc_expr(&val,NULL); sc_needsemicolon=(int)val; + } else if (strcmp(str, "require_newdecls")==0) { + cell val; + preproc_expr(&val,NULL); + sc_require_newdecls = (int)val; } else if (strcmp(str,"tabsize")==0) { cell val; preproc_expr(&val,NULL); @@ -2264,6 +2268,16 @@ SC_FUNC void lexclr(int clreol) } /* if */ } +// Return true if the symbol is ahead, false otherwise. +SC_FUNC int lexpeek(int id) +{ + if (matchtoken(id)) { + lexpush(); + return TRUE; + } + return FALSE; +} + /* matchtoken * * This routine is useful if only a simple check is needed. If the token @@ -3119,6 +3133,16 @@ SC_FUNC int expecttoken(int id, token_t *tok) return FALSE; } +SC_FUNC int matchtoken2(int id, token_t *tok) +{ + if (matchtoken(id)) { + tok->id = tokeninfo(&tok->val, &tok->str); + return TRUE; + + } + return FALSE; +} + SC_FUNC int matchsymbol(token_ident_t *ident) { if (lextok(&ident->tok) != tSYMBOL) { diff --git a/sourcepawn/compiler/sc5.scp b/sourcepawn/compiler/sc5.scp index 08769f9e..876ce74c 100644 --- a/sourcepawn/compiler/sc5.scp +++ b/sourcepawn/compiler/sc5.scp @@ -164,7 +164,7 @@ static char *errmsg[] = { /*118*/ "destructors must be native functions\n", /*119*/ "destructors cannot have extra arguments\n", /*120*/ "methodmap and class signatures must use new-style type declarations\n", -/*121*/ "this syntax is not yet supported\n", +/*121*/ "cannot specify array dimensions on both type and name\n", /*122*/ "expected type expression\n", /*123*/ "fully-qualified name \"%s\" is too long, would be truncated to \"%s\"\n", /*124*/ "unexpected token, expected method or property\n", @@ -179,6 +179,8 @@ static char *errmsg[] = { /*133*/ "cannot coerce unrelated object types %s and %s\n", /*134*/ "type mismatch (%s and %s)\n", /*135*/ "cannot use an object in a multi-tag selector\n", +/*136*/ "arrays are not supported as return types\n", +/*137*/ "cannot mix reference and array types\n", #else "\321e\307\232\264k\213:\233\316bu\201fo\223\205\220\012", "\202l\224\245s\204g\367\346e\331\201(\240\321\222\270\202) \260 follow ea\272 \042c\343e\042\012", diff --git a/sourcepawn/compiler/scvars.c b/sourcepawn/compiler/scvars.c index 7eed33e1..c9a440be 100644 --- a/sourcepawn/compiler/scvars.c +++ b/sourcepawn/compiler/scvars.c @@ -92,6 +92,7 @@ SC_VDEFINE int sc_curstates=0; /* ID of the current state list */ SC_VDEFINE int pc_optimize=sOPTIMIZE_NOMACRO; /* (peephole) optimization level */ SC_VDEFINE int pc_memflags=0; /* special flags for the stack/heap usage */ SC_VDEFINE int sc_showincludes=0; /* show include files */ +SC_VDEFINE int sc_require_newdecls=0; /* Require new-style declarations */ SC_VDEFINE constvalue sc_automaton_tab = { NULL, "", 0, 0}; /* automaton table */ SC_VDEFINE constvalue sc_state_tab = { NULL, "", 0, 0}; /* state table */