# regression.at   [plain text]

```# Bison Regressions.                               -*- Autotest -*-

# Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 Free Software
# Foundation, Inc.

# This program is free software; you can redistribute it and/or modify
# the Free Software Foundation; either version 2, or (at your option)
# any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

AT_BANNER([[Regression tests.]])

## ------------------ ##
## Trivial grammars.  ##
## ------------------ ##

AT_SETUP([Trivial grammars])

AT_DATA_GRAMMAR([input.y],
[[%{
void yyerror (char const *);
int yylex (void);
#define YYSTYPE int *
%}

%error-verbose

%%

program: 'x';
]])

AT_CHECK([bison -o input.c input.y])
AT_COMPILE([input.o], [-c input.c])
AT_COMPILE([input.o], [-DYYDEBUG -c input.c])

AT_CLEANUP

## ------------------------- ##
## Early token definitions.  ##
## ------------------------- ##

AT_SETUP([Early token definitions])

# Found in GCJ: they expect the tokens to be defined before the user
# prologue, so that they can use the token definitions in it.

AT_DATA_GRAMMAR([input.y],
[[%{
void yyerror (const char *s);
int yylex (void);
%}

%union
{
int val;
};
%{
#ifndef MY_TOKEN
# error "MY_TOKEN not defined."
#endif
%}
%token MY_TOKEN
%%
exp: MY_TOKEN;
%%
]])

AT_CHECK([bison -o input.c input.y])
AT_COMPILE([input.o], [-c input.c])

AT_CLEANUP

## ---------------- ##
## Braces parsing.  ##
## ---------------- ##

AT_SETUP([Braces parsing])

AT_DATA([input.y],
[[/* Bison used to swallow the character after `}'. */

%%
exp: { tests = {{{{{{{{{{}}}}}}}}}}; };
%%
]])

AT_CHECK([bison -v -o input.c input.y])

AT_CHECK([grep 'tests = {{{{{{{{{{}}}}}}}}}};' input.c], 0, [ignore])

AT_CLEANUP

## ------------------ ##
## Duplicate string.  ##
## ------------------ ##

AT_SETUP([Duplicate string])

AT_DATA([input.y],
[[/* `Bison -v' used to dump core when two tokens are defined with the same
string, as LE and GE below. */

%token NUM
%token LE "<="
%token GE "<="

%%
exp: '(' exp ')' | NUM ;
%%
]])

AT_CHECK([bison -v -o input.c input.y], 0, [],
[[input.y:6.8-14: warning: symbol `"<="' used more than once as a literal string
]])

AT_CLEANUP

## ------------------- ##
## Rule Line Numbers.  ##
## ------------------- ##

AT_SETUP([Rule Line Numbers])

AT_KEYWORDS([report])

AT_DATA([input.y],
[[%%
expr:
'a'

{

}

'b'

{

}

|

{

}

'c'

{

};
]])

AT_CHECK([bison -o input.c -v input.y])

# Check the contents of the report.
AT_CHECK([cat input.output], [],
[[Grammar

0 \$accept: expr \$end

1 @1: /* empty */

2 expr: 'a' @1 'b'

3 @2: /* empty */

4 expr: @2 'c'

Terminals, with rules where they appear

\$end (0) 0
'a' (97) 2
'b' (98) 2
'c' (99) 4
error (256)

Nonterminals, with rules where they appear

\$accept (6)
on left: 0
expr (7)
on left: 2 4, on right: 0
@1 (8)
on left: 1, on right: 2
@2 (9)
on left: 3, on right: 4

state 0

0 \$accept: . expr \$end

'a'  shift, and go to state 1

\$default  reduce using rule 3 (@2)

expr  go to state 2
@2    go to state 3

state 1

2 expr: 'a' . @1 'b'

\$default  reduce using rule 1 (@1)

@1  go to state 4

state 2

0 \$accept: expr . \$end

\$end  shift, and go to state 5

state 3

4 expr: @2 . 'c'

'c'  shift, and go to state 6

state 4

2 expr: 'a' @1 . 'b'

'b'  shift, and go to state 7

state 5

0 \$accept: expr \$end .

\$default  accept

state 6

4 expr: @2 'c' .

\$default  reduce using rule 4 (expr)

state 7

2 expr: 'a' @1 'b' .

\$default  reduce using rule 2 (expr)
]])

AT_CLEANUP

## ---------------------- ##
## Mixing %token styles.  ##
## ---------------------- ##

AT_SETUP([Mixing %token styles])

# Taken from the documentation.
AT_DATA([input.y],
[[%token  <operator>  OR      "||"
%token  <operator>  LE 134  "<="
%left  OR  "<="
%%
exp: ;
%%
]])

AT_CHECK([bison -v -o input.c input.y])

AT_CLEANUP

## ---------------- ##
## Invalid inputs.  ##
## ---------------- ##

AT_SETUP([Invalid inputs])

AT_DATA([input.y],
[[%%
?
default: 'a' }
%&
%a-does-not-exist
%-
%{
]])

AT_CHECK([bison input.y], [1], [],
[[input.y:2.1: invalid character: `?'
input.y:3.14: invalid character: `}'
input.y:4.1: invalid character: `%'
input.y:4.2: invalid character: `&'
input.y:5.1-17: invalid directive: `%a-does-not-exist'
input.y:6.1: invalid character: `%'
input.y:6.2: invalid character: `-'
input.y:7.1-8.0: missing `%}' at end of file
]])

AT_CLEANUP

AT_SETUP([Invalid inputs with {}])

AT_DATA([input.y],
[[
%destructor
%initial-action
%lex-param
%parse-param
%printer
%union
]])

AT_CHECK([bison input.y], [1], [],
[[input.y:3.1: missing `{' in "%destructor {...}"
input.y:4.1: missing `{' in "%initial-action {...}"
input.y:4.1: syntax error, unexpected %initial-action {...}, expecting string or identifier
]])

AT_CLEANUP

## ------------------- ##
## Token definitions.  ##
## ------------------- ##

AT_SETUP([Token definitions])

# Bison managed, when fed with `%token 'f' "f"' to #define 'f'!
AT_DATA_GRAMMAR([input.y],
[%{
#include <stdio.h>
void yyerror (const char *s);
int yylex (void);
%}
[%error-verbose
%token MYEOF 0 "end of file"
%token 'a' "a"
%token B_TOKEN "b"
%token C_TOKEN 'c'
%token 'd' D_TOKEN
%token SPECIAL "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!"
%%
exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!";
%%
void
yyerror (char const *s)
{
fprintf (stderr, "%s\n", s);
}

int
yylex (void)
{
return SPECIAL;
}

int
main (void)
{
return yyparse ();
}
]])

AT_CHECK([bison -o input.c input.y])
AT_COMPILE([input])
AT_DATA([experr],
[[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201?\?!", expecting a
]])
AT_PARSER_CHECK([./input], 1, [], [experr])
AT_CLEANUP

## -------------------- ##
## Characters Escapes.  ##
## -------------------- ##

AT_SETUP([Characters Escapes])

AT_DATA_GRAMMAR([input.y],
[%{
void yyerror (const char *s);
int yylex (void);
%}
[%%
exp:
'\'' "\'"
| '\"' "\""
| '"'  "'"
;
]])
# Pacify font-lock-mode: "

AT_CHECK([bison -o input.c input.y])
AT_COMPILE([input.o], [-c input.c])
AT_CLEANUP

## -------------- ##
## Web2c Report.  ##
## -------------- ##

# The generation of the reduction was once wrong in Bison, and made it
# miss some reductions.  In the following test case, the reduction on
# `undef_id_tok' in state 1 was missing.  This is stripped down from
# the actual web2c.y.

AT_SETUP([Web2c Report])

AT_KEYWORDS([report])

AT_DATA([input.y],
[[%token	undef_id_tok const_id_tok

%start CONST_DEC_PART

%%
CONST_DEC_PART:
CONST_DEC_LIST
;

CONST_DEC_LIST:
CONST_DEC
| CONST_DEC_LIST CONST_DEC
;

CONST_DEC:
{ } undef_id_tok '=' const_id_tok ';'
;
%%
]])

AT_CHECK([bison -v input.y])
AT_CHECK([cat input.output], 0,
[[Grammar

0 \$accept: CONST_DEC_PART \$end

1 CONST_DEC_PART: CONST_DEC_LIST

2 CONST_DEC_LIST: CONST_DEC
3               | CONST_DEC_LIST CONST_DEC

4 @1: /* empty */

5 CONST_DEC: @1 undef_id_tok '=' const_id_tok ';'

Terminals, with rules where they appear

\$end (0) 0
';' (59) 5
'=' (61) 5
error (256)
undef_id_tok (258) 5
const_id_tok (259) 5

Nonterminals, with rules where they appear

\$accept (7)
on left: 0
CONST_DEC_PART (8)
on left: 1, on right: 0
CONST_DEC_LIST (9)
on left: 2 3, on right: 1 3
CONST_DEC (10)
on left: 5, on right: 2 3
@1 (11)
on left: 4, on right: 5

state 0

0 \$accept: . CONST_DEC_PART \$end

\$default  reduce using rule 4 (@1)

CONST_DEC_PART  go to state 1
CONST_DEC_LIST  go to state 2
CONST_DEC       go to state 3
@1              go to state 4

state 1

0 \$accept: CONST_DEC_PART . \$end

\$end  shift, and go to state 5

state 2

1 CONST_DEC_PART: CONST_DEC_LIST .
3 CONST_DEC_LIST: CONST_DEC_LIST . CONST_DEC

undef_id_tok  reduce using rule 4 (@1)
\$default      reduce using rule 1 (CONST_DEC_PART)

CONST_DEC  go to state 6
@1         go to state 4

state 3

2 CONST_DEC_LIST: CONST_DEC .

\$default  reduce using rule 2 (CONST_DEC_LIST)

state 4

5 CONST_DEC: @1 . undef_id_tok '=' const_id_tok ';'

undef_id_tok  shift, and go to state 7

state 5

0 \$accept: CONST_DEC_PART \$end .

\$default  accept

state 6

3 CONST_DEC_LIST: CONST_DEC_LIST CONST_DEC .

\$default  reduce using rule 3 (CONST_DEC_LIST)

state 7

5 CONST_DEC: @1 undef_id_tok . '=' const_id_tok ';'

'='  shift, and go to state 8

state 8

5 CONST_DEC: @1 undef_id_tok '=' . const_id_tok ';'

const_id_tok  shift, and go to state 9

state 9

5 CONST_DEC: @1 undef_id_tok '=' const_id_tok . ';'

';'  shift, and go to state 10

state 10

5 CONST_DEC: @1 undef_id_tok '=' const_id_tok ';' .

\$default  reduce using rule 5 (CONST_DEC)
]])

AT_CLEANUP

## --------------- ##
## Web2c Actions.  ##
## --------------- ##

# The generation of the mapping `state -> action' was once wrong in
# extremely specific situations.  web2c.y exhibits this situation.
# Below is a stripped version of the grammar.  It looks like one can
# simplify it further, but just don't: it is tuned to exhibit a bug,
# which disapears when applying sane grammar transformations.
#
# It used to be wrong on yydefact only:
#
# static const yytype_uint8 yydefact[] =
#  {
# -       2,     0,     1,     0,     0,     2,     3,     2,     5,     4,
# +       2,     0,     1,     0,     0,     0,     3,     2,     5,     4,
#         0,     0
#  };
#
# but let's check all the tables.

AT_SETUP([Web2c Actions])

AT_KEYWORDS([report])

AT_DATA([input.y],
[[%%
statement:  struct_stat;
struct_stat:  /* empty. */ | if else;
if: "if" "const" "then" statement;
else: "else" statement;
%%
]])

AT_CHECK([bison -v -o input.c input.y])

# Check only the tables.  We don't use --no-parser, because it is
# still to be implemented in the experimental branch of Bison.
[sed -n 's/  *\$//;/^static const.*\[\] =/,/^}/p' input.c >tables.c]

AT_CHECK([[cat tables.c]], 0,
[[static const yytype_uint8 yytranslate[] =
{
0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
2,     2,     2,     2,     2,     2,     1,     2,     3,     4,
5,     6
};
static const yytype_uint8 yyprhs[] =
{
0,     0,     3,     5,     6,     9,    14
};
static const yytype_int8 yyrhs[] =
{
8,     0,    -1,     9,    -1,    -1,    10,    11,    -1,     3,
4,     5,     8,    -1,     6,     8,    -1
};
static const yytype_uint8 yyrline[] =
{
0,     2,     2,     3,     3,     4,     5
};
static const char *const yytname[] =
{
"\$end", "error", "\$undefined", "\"if\"", "\"const\"", "\"then\"",
"\"else\"", "\$accept", "statement", "struct_stat", "if", "else", 0
};
static const yytype_uint16 yytoknum[] =
{
0,   256,   257,   258,   259,   260,   261
};
static const yytype_uint8 yyr1[] =
{
0,     7,     8,     9,     9,    10,    11
};
static const yytype_uint8 yyr2[] =
{
0,     2,     1,     0,     2,     4,     2
};
static const yytype_uint8 yydefact[] =
{
3,     0,     0,     2,     0,     0,     1,     3,     4,     3,
6,     5
};
static const yytype_int8 yydefgoto[] =
{
-1,     2,     3,     4,     8
};
static const yytype_int8 yypact[] =
{
-2,    -1,     4,    -8,     0,     2,    -8,    -2,    -8,    -2,
-8,    -8
};
static const yytype_int8 yypgoto[] =
{
-8,    -7,    -8,    -8,    -8
};
static const yytype_uint8 yytable[] =
{
10,     1,    11,     5,     6,     0,     7,     9
};
static const yytype_int8 yycheck[] =
{
7,     3,     9,     4,     0,    -1,     6,     5
};
static const yytype_uint8 yystos[] =
{
0,     3,     8,     9,    10,     4,     0,     6,    11,     5,
8,     8
};
]])

AT_CLEANUP

## ------------------------- ##
## yycheck Bound Violation.  ##
## ------------------------- ##

# _AT_DATA_DANCER_Y(BISON-OPTIONS)
# --------------------------------
# The following grammar, taken from Andrew Suffield's GPL'd implementation
# of DGMTP, the Dancer Generic Message Transport Protocol, used to violate
# yycheck's bounds where issuing a verbose error message.  Keep this test
# so that possible bound checking compilers could check all the skeletons.
m4_define([_AT_DATA_DANCER_Y],
[AT_DATA_GRAMMAR([dancer.y],
[%{
static int yylex (AT_LALR1_CC_IF([int *], [void]));
AT_LALR1_CC_IF([],
[#include <stdio.h>
static void yyerror (const char *);])
%}
\$1
%token ARROW INVALID NUMBER STRING DATA
%defines
%verbose
%error-verbose
/* Grammar follows */
%%
;

header: '<' from ARROW to '>' type ':'
| '<' ARROW to '>' type ':'
| ARROW to type ':'
| type ':'
| '<' '>'
;

from: DATA
| STRING
| INVALID
;

to: DATA
| STRING
| INVALID
;

type: DATA
| STRING
| INVALID
;

body: /* empty */
| body member
;

member: STRING
| DATA
| '+' NUMBER
| '-' NUMBER
| NUMBER
| INVALID
;
%%
AT_LALR1_CC_IF(
[/* A C++ error reporting function. */
void
yy::parser::error (const location&, const std::string& m)
{
std::cerr << m << std::endl;
}

int
yyparse ()
{
yy::parser parser;
parser.set_debug_level (!!YYDEBUG);
return parser.parse ();
}
],
[static void
yyerror (const char *s)
{
fprintf (stderr, "%s\n", s);
}])

static int
yylex (AT_LALR1_CC_IF([int *lval], [void]))
[{
static int toknum = 0;
static int tokens[] =
{
':', -1
};
]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC.  */])[
}]

int
main (void)
{
return yyparse ();
}
])
])# _AT_DATA_DANCER_Y

# AT_CHECK_DANCER(BISON-OPTIONS)
# ------------------------------
# Generate the grammar, compile it, run it.
m4_define([AT_CHECK_DANCER],
[AT_SETUP([Dancer \$1])
AT_BISON_OPTION_PUSHDEFS([\$1])
_AT_DATA_DANCER_Y([\$1])
AT_CHECK([bison -o dancer.c dancer.y])
AT_LALR1_CC_IF(
[AT_CHECK([bison -o dancer.cc dancer.y])
AT_COMPILE_CXX([dancer])],
[AT_CHECK([bison -o dancer.c dancer.y])
AT_COMPILE([dancer])])
AT_PARSER_CHECK([./dancer], 1, [],
[syntax error, unexpected ':'
])
AT_BISON_OPTION_POPDEFS
AT_CLEANUP
])

AT_CHECK_DANCER()
AT_CHECK_DANCER([%glr-parser])
AT_CHECK_DANCER([%skeleton "lalr1.cc"])

## ------------------------------------------ ##
## Diagnostic that expects two alternatives.  ##
## ------------------------------------------ ##

# _AT_DATA_EXPECT2_Y(BISON-OPTIONS)
# --------------------------------
m4_define([_AT_DATA_EXPECT2_Y],
[AT_DATA_GRAMMAR([expect2.y],
[%{
static int yylex (AT_LALR1_CC_IF([int *], [void]));
AT_LALR1_CC_IF([],
[#include <stdio.h>
static void yyerror (const char *);])
%}
\$1
%defines
%error-verbose
%token A 1000
%token B

%%
program: /* empty */
| program e ';'
| program error ';';

e: e '+' t | t;
t: A | B;

%%
AT_LALR1_CC_IF(
[/* A C++ error reporting function. */
void
yy::parser::error (const location&, const std::string& m)
{
std::cerr << m << std::endl;
}

int
yyparse ()
{
yy::parser parser;
return parser.parse ();
}
],
[static void
yyerror (const char *s)
{
fprintf (stderr, "%s\n", s);
}])

static int
yylex (AT_LALR1_CC_IF([int *lval], [void]))
[{
static int toknum = 0;
static int tokens[] =
{
1000, '+', '+', -1
};
]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC.  */])[
}]

int
main (void)
{
return yyparse ();
}
])
])# _AT_DATA_EXPECT2_Y

# AT_CHECK_EXPECT2(BISON-OPTIONS)
# ------------------------------
# Generate the grammar, compile it, run it.
m4_define([AT_CHECK_EXPECT2],
[AT_SETUP([Expecting two tokens \$1])
AT_BISON_OPTION_PUSHDEFS([\$1])
_AT_DATA_EXPECT2_Y([\$1])
AT_CHECK([bison -o expect2.c expect2.y])
AT_LALR1_CC_IF(
[AT_CHECK([bison -o expect2.cc expect2.y])
AT_COMPILE_CXX([expect2])],
[AT_CHECK([bison -o expect2.c expect2.y])
AT_COMPILE([expect2])])
AT_PARSER_CHECK([./expect2], 1, [],
[syntax error, unexpected '+', expecting A or B
])
AT_BISON_OPTION_POPDEFS
AT_CLEANUP
])

AT_CHECK_EXPECT2()
AT_CHECK_EXPECT2([%glr-parser])
AT_CHECK_EXPECT2([%skeleton "lalr1.cc"])
```