case's c compiler
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

grammar.peg 14KB


  1. %{
  2. #include "ast/ast.h"
  3. #include "ast/expr.h"
  4. #include "ast/stmt.h"
  5. #include "ast/type.h"
  6. #include "list.h"
  7. #include "token.h"
  8. #include "util.h"
  9. #define YYSTYPE token
  10. #define YY_CTX_LOCAL
  11. #define YY_CTX_MEMBERS \
  12. FILE *fp; \
  13. prgm *p;
  14. #define YY_INPUT(ctx, buf, result, max_size) { \
  15. int yyc = fgetc(ctx->fp); \
  16. result = (EOF == yyc) ? 0 : (*(buf) = yyc, 1); \
  17. }
  18. %}
  19. # Top-level definitions
  20. Program
  21. = prog:TranslationUnit !. { yy->p = prog.data.prgm; }
  22. # TODO: Fix append of decl once decl type declared
  23. TranslationUnit
  24. = decl:ExternalDeclaration prog:TranslationUnit? {
  25. prgm *p = prgm_add_decl(prog.data.prgm, &decl);
  26. $$ = token_prgm(p);
  27. }
  28. ExternalDeclaration
  29. = FunctionDefinition
  30. | Declaration
  31. FunctionDefinition
  32. = spec:DeclarationSpecifiers declr:Declarator decls:DeclarationList?
  33. body:CompoundStatement
  34. DeclarationList
  35. = Declaration DeclarationList?
  36. # Declarations
  37. Declaration
  38. = DeclarationSpecifiers InitDeclaratorList? SEMI
  39. DeclarationSpecifiers
  40. = DeclarationSpecifier DeclarationSpecifiers?
  41. InitDeclaratorList
  42. = InitDeclarator (COMMA t:InitDeclaratorList)?
  43. InitDeclarator
  44. = Declarator EQ Initializer
  45. | Declarator
  46. DeclarationSpecifier
  47. = StorageClassSpecifier
  48. | TypeSpecifier
  49. | TypeQualifier
  50. | FunctionSpecifier
  51. StorageClassSpecifier
  52. = TYPEDEF { $$ = token_int(STORAGE_TYPEDEF); }
  53. | EXTERN { $$ = token_int(STORAGE_EXTERN); }
  54. | STATIC { $$ = token_int(STORAGE_STATIC); }
  55. | AUTO { $$ = token_int(STORAGE_AUTO); }
  56. | REGISTER { $$ = token_int(STORAGE_REGISTER); }
  57. TypeSpecifier
  58. = VOID
  59. | CHAR
  60. | SHORT
  61. | INT
  62. | LONG
  63. | FLOAT
  64. | DOUBLE
  65. | SIGNED
  66. | UNSIGNED
  67. | BOOL
  68. | COMPLEX
  69. | StructOrUnionSpecifier
  70. | EnumSpecifier
  71. | Identifier
  72. StructOrUnionSpecifier
  73. = StructOrUnion Identifier? LBRACE StructDeclarationList RBRACE
  74. | StructOrUnion Identifier
  75. StructOrUnion
  76. = STRUCT
  77. | UNION
  78. StructDeclarationList
  79. = StructDeclaration StructDeclarationList
  80. | StructDeclaration
  81. StructDeclaration
  82. = SpecifierQualifierList StructDeclaratorList SEMI
  83. SpecifierQualifierList
  84. = TypeSpecifier SpecifierQualifierList?
  85. | TypeQualifier SpecifierQualifierList?
  86. StructDeclaratorList
  87. = StructDeclarator (COMMA StructDeclaratorList)?
  88. StructDeclarator
  89. = Declarator
  90. | Declarator? COLON ConstantExpression
  91. EnumSpecifier
  92. = ENUM Identifier? LBRACE EnumeratorList? COMMA RBRACE
  93. | ENUM Identifier
  94. EnumeratorList
  95. = Enumerator (COMMA EnumeratorList)?
  96. Enumerator
  97. = EnumerationConstant EQ ConstantExpression
  98. | EnumerationConstant
  99. TypeQualifier
  100. = CONST
  101. | RESTRICT
  102. | VOLATILE
  103. FunctionSpecifier
  104. = INLINE
  105. Declarator
  106. = Pointer? DirectDeclarator
  107. DirectDeclarator
  108. = Identifier DirectDeclaratorSuffix*
  109. | LPAREN Declarator RPAREN DirectDeclaratorSuffix*
  110. DirectDeclaratorSuffix
  111. = LBRACKET STATIC TypeQualifierList? AssignmentExpression RBRACKET
  112. | LBRACKET TypeQualifierList STATIC AssignmentExpression RBRACKET
  113. | LBRACKET TypeQualifierList? STAR RBRACKET
  114. | LBRACKET TypeQualifierList? AssignmentExpression? RBRACKET
  115. | LPAREN ParameterTypeList RPAREN
  116. | LPAREN IdentifierList? RPAREN
  117. Pointer
  118. = STAR TypeQualifierList? Pointer?
  119. TypeQualifierList
  120. = TypeQualifier TypeQualifierList?
  121. ParameterTypeList
  122. = ParameterList (COMMA ELLIPSIS)?
  123. ParameterList
  124. = ParameterDeclaration (COMMA ParameterList)?
  125. ParameterDeclaration
  126. = DeclarationSpecifiers Declarator
  127. | DeclarationSpecifiers AbstractDeclarator?
  128. IdentifierList
  129. = Identifier (COMMA IdentifierList)?
  130. TypeName
  131. = SpecifierQualifierList AbstractDeclarator?
  132. AbstractDeclarator
  133. = Pointer? DirectAbstractDeclarator
  134. | Pointer
  135. DirectAbstractDeclarator
  136. = DADHead? DADTail*
  137. DADHead
  138. = LPAREN AbstractDeclarator RPAREN
  139. DADTail
  140. = LBRACKET STAR RBRACKET
  141. | LBRACKET AssignmentExpression? RBRACKET
  142. | LPAREN ParameterTypeList? RPAREN
  143. Initializer
  144. = LBRACE InitializerList COMMA? RBRACE
  145. | AssignmentExpression
  146. InitializerList
  147. = Designation? Initializer (COMMA InitializerList)?
  148. Designation
  149. = DesignatorList EQ
  150. DesignatorList
  151. = Designator DesignatorList?
  152. Designator
  153. = LBRACKET ConstantExpression RBRACKET
  154. | DOT Identifier
  155. # Statements
  156. Statement
  157. = LabeledStatement
  158. | CompoundStatement
  159. | ExpressionStatement
  160. | SelectionStatement
  161. | IterationStatement
  162. | JumpStatement
  163. LabeledStatement
  164. = CASE ConstantExpression COLON Statement
  165. | DEFAULT COLON Statement
  166. | Identifier COLON Statement
  167. CompoundStatement
  168. = LBRACE body:BlockItemList? RBRACE {
  169. stmt *stmt = stmt_compound(body.data.list);
  170. $$ = token_stmt(stmt);
  171. }
  172. BlockItemList
  173. = h:BlockItem t:BlockItemList? {
  174. list *l = list_cons(h, t);
  175. $$ = token_list(l);
  176. }
  177. BlockItem
  178. = decl:Declaration { $$ = token_list(list_single(decl)); }
  179. | stmt:Statement { $$ = token_list(list_single(stmt)); }
  180. ExpressionStatement
  181. = e:Expression? SEMI {
  182. stmt *stmt = stmt_expr(e.data.expr);
  183. $$ = token_stmt(stmt);
  184. }
  185. SelectionStatement
  186. = IF LPAREN predicate:Expression RPAREN true_stmt:Statement ELSE false_stmt:Statement {
  187. stmt *stmt = stmt_if_else(predicate.data.expr, true_stmt.data.stmt, false_stmt.data.stmt);
  188. $$ = token_stmt(stmt);
  189. } | IF LPAREN predicate:Expression RPAREN true_stmt:Statement {
  190. stmt *stmt = stmt_if(predicate.data.expr, true_stmt.data.stmt);
  191. $$ = token_stmt(stmt);
  192. } | SWITCH LPAREN discriminator:Expression RPAREN body:Statement {
  193. stmt *stmt = stmt_switch(discriminator.data.expr, body.data.stmt);
  194. $$ = token_stmt(stmt);
  195. }
  196. IterationStatement
  197. = WHILE LPAREN Expression RPAREN Statement {
  198. stmt *stmt = stmt_while(predicate.data.expr, body.data.stmt);
  199. $$ = token_stmt(stmt);
  200. } | DO body:Statement WHILE LPAREN predicate:Expression RPAREN SEMI {
  201. stmt *stmt = stmt_do_while(predicate.data.expr, body.data.stmt);
  202. $$ = token_stmt(stmt);
  203. } | FOR LPAREN init_expr:Expression? SEMI predicate:Expression? SEMI incr_expr:Expression? RPAREN Statement {
  204. stmt *stmt = stmt_for(init_expr.data.expr, predicate.data.expr, incr_expr.data.expr, body.data.stmt);
  205. $$ = token_stmt(stmt);
  206. } | FOR Declaration Expression? SEMI Expression? LPAREN Statement
  207. JumpStatement
  208. = GOTO label:Identifier SEMI {
  209. stmt *stmt = stmt_goto(label.data.string);
  210. $$ = token_stmt(stmt);
  211. } | CONTINUE SEMI {
  212. stmt *stmt = stmt_continue();
  213. $$ = token_stmt(stmt);
  214. } | BREAK SEMI {
  215. stmt *stmt = stmt_break();
  216. $$ = token_stmt(stmt);
  217. } | RETURN e:Expression? SEMI {
  218. stmt *stmt = stmt_return(e.data.expr);
  219. $$ = token_stmt(stmt);
  220. }
  221. # Expressions
  222. ConstantExpression
  223. = ConditionalExpression
  224. Expression
  225. = lhs:AssignmentExpression (
  226. COMMA rhs:Expression {
  227. expr *expr = expr_seq(lhs.data.expr, rhs.data.expr);
  228. $$ = token_expr(expr);
  229. }
  230. )* { $$ = lhs; }
  231. AssignmentExpression
  232. = rval:ConditionalExpression
  233. (op:AssignmentOperator lval:AssignmentExpression {
  234. expr *expr = expr_assign(op.data.intval, lval.data.expr, rval.data.expr);
  235. rval = token_expr(expr);
  236. })* { $$ = rval; }
  237. AssignmentOperator
  238. = EQ { $$ = token_int(ASSIGN_EQ); }
  239. | MUL_EQ { $$ = token_int(ASSIGN_MUL); }
  240. | DIV_EQ { $$ = token_int(ASSIGN_DIV); }
  241. | MOD_EQ { $$ = token_int(ASSIGN_MOD); }
  242. | PLUS_EQ { $$ = token_int(ASSIGN_ADD); }
  243. | MINUS_EQ { $$ = token_int(ASSIGN_SUB); }
  244. | LSHIFT_EQ { $$ = token_int(ASSIGN_LSHIFT); }
  245. | RSHIFT_EQ { $$ = token_int(ASSIGN_RSHIFT); }
  246. | AND_EQ { $$ = token_int(ASSIGN_AND); }
  247. | XOR_EQ { $$ = token_int(ASSIGN_XOR); }
  248. | OR_EQ { $$ = token_int(ASSIGN_OR); }
  249. ConditionalExpression
  250. = lhs:LogicalOrExpression (
  251. QMARK true_expr:Expression COLON false_expr:ConditionalExpression {
  252. expr *expr = expr_ternary(lhs.data.expr, true_expr.data.expr, false_expr.data.expr);
  253. lhs = token_expr(expr);
  254. })* { $$ = lhs; }
  255. LogicalOrExpression
  256. = lhs:LogicalAndExpression
  257. (PIPES rhs:LogicalAndExpression {
  258. lhs = token_binop(BINOP_OR, lhs, rhs);
  259. })* { $$ = lhs; }
  260. LogicalAndExpression
  261. = lhs:InclusiveOrExpression
  262. (AMPS rhs:InclusiveOrExpression {
  263. lhs = token_binop(BINOP_AND, lhs, rhs);
  264. })* { $$ = lhs; }
  265. InclusiveOrExpression
  266. = lhs:ExclusiveOrExpression
  267. (PIPE rhs:ExclusiveOrExpression {
  268. lhs = token_binop(BINOP_BIT_OR, lhs, rhs);
  269. })* { $$ = lhs; }
  270. ExclusiveOrExpression
  271. = lhs:AndExpression
  272. (CARET rhs:AndExpression {
  273. lhs = token_binop(BINOP_BIT_XOR, lhs, rhs);
  274. })* { $$ = lhs; }
  275. AndExpression
  276. = lhs:EqualityExpression
  277. (AMP rhs:EqualityExpression {
  278. lhs = token_binop(BINOP_BIT_AND, lhs, rhs);
  279. })* { $$ = lhs; }
  280. EqualityExpression
  281. = lhs:RelationalExpression
  282. (EQ rhs:RelationalExpression {
  283. lhs = token_binop(BINOP_EQ, lhs, rhs);
  284. } | NEQ rhs:RelationalExpression {
  285. lhs = token_binop(BINOP_NEQ, lhs, rhs);
  286. })* { $$ = lhs; }
  287. RelationalExpression
  288. = lhs:ShiftExpression
  289. (op:RelationalOperator rhs:ShiftExpression {
  290. lhs = token_binop(op.data.intval, lhs, rhs);
  291. })* { $$ = lhs; }
  292. RelationalOperator
  293. = LTE { $$ = token_int(BINOP_LTE); }
  294. | GTE { $$ = token_int(BINOP_GTE); }
  295. | LT { $$ = token_int(BINOP_LT); }
  296. | GT { $$ = token_int(BINOP_GT); }
  297. ShiftExpression
  298. = lhs:AdditiveExpression
  299. (LL rhs:AdditiveExpression {
  300. lhs = token_binop(BINOP_LSHIFT, lhs, rhs);
  301. } | RR rhs:AdditiveExpression {
  302. lhs = token_binop(BINOP_RSHIFT, lhs, rhs);
  303. })* { $$ = lhs; }
  304. AdditiveExpression
  305. = lhs:MultiplyExpression
  306. (PLUS rhs:MultiplyExpression {
  307. lhs = token_binop(BINOP_PLUS, lhs, rhs);
  308. } | MINUS rhs:MultiplyExpression {
  309. lhs = token_binop(BINOP_MINUS, lhs, rhs);
  310. })* { $$ = lhs; }
  311. MultiplyExpression
  312. = lhs:CastExpression
  313. (op:MultiplyOperator rhs:CastExpression {
  314. lhs = token_binop(op.data.intval, lhs, rhs);
  315. })* { $$ = lhs; }
  316. MultiplyOperator
  317. = STAR { $$ = token_int(BINOP_MULT); }
  318. | SLASH { $$ = token_int(BINOP_DIV); }
  319. | PCT { $$ = token_int(BINOP_MOD); }
  320. CastExpression
  321. = (LPAREN ty:TypeName RPAREN {
  322. expr *expr = expr_cast(rhs.data.expr, ty.data.string);
  323. rhs = token_expr(expr);
  324. })* rhs:UnaryExpression { $$ = rhs; }
  325. UnaryExpression
  326. = (PLUSES {
  327. expr *expr = expr_unop(UNOP_PREINC, rhs.data.expr);
  328. rhs = token_expr(expr);
  329. } | MINUSES {
  330. expr *expr = expr_unop(UNOP_PREDEC, rhs.data.expr);
  331. rhs = token_expr(expr);
  332. } | SIZEOF {
  333. expr *expr = expr_unop(UNOP_SIZEOF, rhs.data.expr);
  334. rhs = token_expr(expr);
  335. })* rhs:UnaryValue { $$ = rhs; }
  336. UnaryOperator
  337. = AMP { $$ = token_int(UNOP_ADDRESSOF); }
  338. | STAR { $$ = token_int(UNOP_DEREF); }
  339. | PLUS { $$ = token_int(UNOP_POSITIVE); }
  340. | MINUS { $$ = token_int(UNOP_NEGATE_ADD); }
  341. | TILDE { $$ = token_int(UNOP_NEGATE_BIT); }
  342. | BANG { $$ = token_int(UNOP_NEGATE_LOGIC); }
  343. UnaryValue
  344. = SIZEOF LPAREN ty:TypeName RPAREN
  345. | op:UnaryOperator e:CastExpression {
  346. expr *expr = expr_unop(op.data.intval, e.data.expr);
  347. $$ = token_expr(expr);
  348. } | PostfixExpression
  349. PostfixExpression
  350. = lhs:PrimaryExpression
  351. (LBRACKET rhs:Expression RBRACKET {
  352. expr *expr = expr_index(lhs.data.expr, rhs.data.expr);
  353. lhs = token_expr(expr);
  354. } | LPAREN args:ArgumentExpressionList? RPAREN {
  355. expr *expr = expr_funcall(lhs.data.expr, args.data.list);
  356. lhs = token_expr(expr);
  357. } | DOT name:Identifier {
  358. expr *expr = expr_attr(lhs.data.expr, name.data.string);
  359. lhs = token_expr(expr);
  360. } | PLUSES {
  361. expr *expr = expr_unop(UNOP_POSTINC, lhs.data.expr);
  362. lhs = token_expr(expr);
  363. } | MINUSES {
  364. expr *expr = expr_unop(UNOP_POSTDEC, lhs.data.expr);
  365. lhs = token_expr(expr);
  366. } | LPAREN name:TypeName RPAREN LBRACE InitializerList COMMA? RBRACE
  367. )* { $$ = lhs; }
  368. ArgumentExpressionList
  369. = h:AssignmentExpression (COMMA t:ArgumentExpressionList)?
  370. PrimaryExpression
  371. = Identifier
  372. | Constant
  373. | StringLiteral
  374. | LPAREN expr:Expression RPAREN { $$ = expr; }
  375. EnumerationConstant = "?" WS*
  376. Identifier = <[a-zA-Z_][a-zA-Z_0-9]*> WS* {
  377. char *ident = string_dup(yytext);
  378. $$ = token_string(ident);
  379. }
  380. Constant = "?" WS*
  381. StringLiteral = "?" WS*
  382. # Punctuation
  383. AND_EQ = "&=" WS*
  384. AMP = "&" WS*
  385. AMPS = "&&" WS*
  386. BANG = "!" WS*
  387. CARET = "^" WS*
  388. COLON = ":" WS*
  389. COMMA = "," WS*
  390. DOT = "." WS*
  391. DIV_EQ = "/=" WS*
  392. ELLIPSIS = "..." WS*
  393. EQ = "==" WS*
  394. GT = ">" WS*
  395. GTE = ">=" WS*
  396. LBRACE = "{" WS*
  397. LBRACKET = "[" WS*
  398. LPAREN = "(" WS*
  399. LL = "<<" WS*
  400. LSHIFT_EQ = "<<=" WS*
  401. LT = "<" WS*
  402. LTE = "<=" WS*
  403. MINUS = "-" WS*
  404. MINUS_EQ = "-=" WS*
  405. MINUSES = "--" WS*
  406. MOD_EQ = "%=" WS*
  407. MUL_EQ = "*=" WS*
  408. NEQ = "!=" WS*
  409. OR_EQ = "|=" WS*
  410. PCT = "%" WS*
  411. PIPE = "|" WS*
  412. PIPES = "||" WS*
  413. PLUS = "+" WS*
  414. PLUS_EQ = "+=" WS*
  415. PLUSES = "++" WS*
  416. QMARK = "?" WS*
  417. RBRACE = "}" WS*
  418. RBRACKET = "]" WS*
  419. RPAREN = ")" WS*
  420. RR = ">>" WS*
  421. RSHIFT_EQ = ">>=" WS*
  422. SEMI = ";" WS*
  423. SLASH = "/" WS*
  424. STAR = "*" WS*
  425. TILDE = "~" WS*
  426. XOR_EQ = "^=" WS*
  427. # Keywords
  428. AUTO = "auto" WS*
  429. BOOL = "_Bool" WS*
  430. BREAK = "break" WS*
  431. CASE = "case" WS*
  432. CHAR = "char" WS*
  433. COMPLEX = "_Complex" WS*
  434. CONTINUE = "continue" WS*
  435. CONST = "const" WS*
  436. DEFAULT = "default" WS*
  437. DO = "do" WS*
  438. DOUBLE = "double" WS*
  439. ELSE = "else" WS*
  440. ENUM = "enum" WS*
  441. EXTERN = "extern" WS*
  442. FLOAT = "float" WS*
  443. IF = "if" WS*
  444. INLINE = "inline" WS*
  445. INT = "int" WS*
  446. LONG = "long" WS*
  447. FOR = "for" WS*
  448. GOTO = "goto" WS*
  449. REGISTER = "register" WS*
  450. RESTRICT = "restrict" WS*
  451. RETURN = "return" WS*
  452. WHILE = "while" WS*
  453. SHORT = "short" WS*
  454. SIGNED = "signed" WS*
  455. SIZEOF = "sizeof" WS*
  456. STATIC = "static" WS*
  457. STRUCT = "struct" WS*
  458. SWITCH = "switch" WS*
  459. TYPEDEF = "typedef" WS*
  460. UNION = "union" WS*
  461. UNSIGNED = "unsigned" WS*
  462. VOID = "void" WS*
  463. VOLATILE = "volatile" WS*
  464. WS = [ \r\n]