From 0f2cd44f5473716f8025c870155a25bf0360dbe9 Mon Sep 17 00:00:00 2001 From: Michael Schwarz Date: Thu, 17 Feb 2022 09:25:09 +0100 Subject: [PATCH 1/5] Support for universal character names Co-authored-by: Baris Coslu --- Makefile.in | 2 + config.h.in | 9 + configure | 852 ++++++++++++++++++++ configure.ac | 4 +- m4/cil.m4 | 10 + src/cil.ml | 41 +- src/cil.mli | 13 +- src/ext/pta/ptranal.ml | 2 +- src/formatparse.mly | 4 +- src/frontc/cabs.ml | 9 +- src/frontc/cabs2cil.ml | 52 +- src/frontc/clexer.mll | 45 +- src/frontc/cparser.mly | 150 ++-- src/frontc/cprint.ml | 13 +- src/machdep-ml.c.in | 6 +- src/machdepenv.ml | 2 + test/small1/c99-universal-character-names.c | 1 + 17 files changed, 1111 insertions(+), 104 deletions(-) diff --git a/Makefile.in b/Makefile.in index ed27235b6..e7242ef92 100644 --- a/Makefile.in +++ b/Makefile.in @@ -175,6 +175,8 @@ $(OBJDIR)/machdep.ml : src/machdep-ml.c configure.ac Makefile.in @echo " sizeof_fun: int; (* Size of function *)" >> $@ @echo " size_t: string; (* Type of \"sizeof(T)\" *)" >> $@ @echo " wchar_t: string; (* Type of \"wchar_t\" *)" >> $@ + @echo " char16_t: string; (* Type of \"char16_t\" *)" >> $@ + @echo " char32_t: string; (* Type of \"char32_t\" *)" >> $@ @echo " alignof_short: int; (* Alignment of \"short\" *)" >> $@ @echo " alignof_int: int; (* Alignment of \"int\" *)" >> $@ @echo " alignof_bool: int; (* Alignment of \"_Bool\" *)" >> $@ diff --git a/config.h.in b/config.h.in index 1f96ca00f..ee5d048af 100644 --- a/config.h.in +++ b/config.h.in @@ -42,6 +42,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H +/* Define to 1 if you have the header file. */ +#undef HAVE_UCHAR_H + /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H @@ -74,6 +77,12 @@ backward compatibility; new code need not use it. */ #undef STDC_HEADERS +/* Real integer type corresponding to char16_t. */ +#undef TYPE_CHAR16_T + +/* Real integer type corresponding to char32_t. */ +#undef TYPE_CHAR32_T + /* Real integer type corresponding to size_t. */ #undef TYPE_SIZE_T diff --git a/configure b/configure index 30fca34ee..b15d4eb40 100755 --- a/configure +++ b/configure @@ -5299,6 +5299,12 @@ then : printf "%s\n" "#define HAVE_STDBOOL_H 1" >>confdefs.h fi +ac_fn_c_check_header_compile "$LINENO" "uchar.h" "ac_cv_header_uchar_h" "$ac_includes_default" +if test "x$ac_cv_header_uchar_h" = xyes +then : + printf "%s\n" "#define HAVE_UCHAR_H 1" >>confdefs.h + +fi # checks for typedefs, structures, and compiler characteristics @@ -5571,6 +5577,16 @@ printf %s "checking for real definition of size_t... " >&6; } #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5591,6 +5607,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5613,6 +5639,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5633,6 +5669,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5655,6 +5701,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5675,6 +5731,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5697,6 +5763,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5717,6 +5793,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5739,6 +5825,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5759,6 +5855,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5794,6 +5900,16 @@ printf %s "checking for real definition of wchar_t... " >&6; } #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5814,6 +5930,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5836,6 +5962,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5856,6 +5992,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5878,6 +6024,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5898,6 +6054,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5920,6 +6086,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5940,6 +6116,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5962,6 +6148,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -5982,6 +6178,16 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ @@ -6006,6 +6212,652 @@ printf "%s\n" "#define TYPE_WCHAR_T \"$real_type\"" >>confdefs.h printf "%s\n" "$real_type" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for real definition of char16_t" >&5 +printf %s "checking for real definition of char16_t... " >&6; } + real_type='' + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +int foo(int x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='int' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned int foo(unsigned int x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned int' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +long foo(long x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='long' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned long foo(unsigned long x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned long' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +long long foo(long long x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='long long' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned long long foo(unsigned long long x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned long long' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +short foo(short x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='short' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned short foo(unsigned short x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned short' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +char foo(char x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='char' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned char foo(unsigned char x); +char16_t foo(char16_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned char' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + as_fn_error $? "cannot find definition of char16_t" "$LINENO" 5 + fi + +printf "%s\n" "#define TYPE_CHAR16_T \"$real_type\"" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $real_type" >&5 +printf "%s\n" "$real_type" >&6; } + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for real definition of char32_t" >&5 +printf %s "checking for real definition of char32_t... " >&6; } + real_type='' + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +int foo(int x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='int' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned int foo(unsigned int x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned int' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +long foo(long x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='long' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned long foo(unsigned long x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned long' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +long long foo(long long x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='long long' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned long long foo(unsigned long long x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned long long' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +short foo(short x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='short' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned short foo(unsigned short x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned short' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +char foo(char x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='char' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif +/* We define a prototype with one type and the function with + another type. This will result in compilation error + unless the types are really identical. */ +unsigned char foo(unsigned char x); +char32_t foo(char32_t x) { return x; } +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + real_type='unsigned char' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + fi + + + if test -z "$real_type"; then + as_fn_error $? "cannot find definition of char32_t" "$LINENO" 5 + fi + +printf "%s\n" "#define TYPE_CHAR32_T \"$real_type\"" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $real_type" >&5 +printf "%s\n" "$real_type" >&6; } + + # ----------------- finish up ------------------- # names of the variables that get substituted in files; for example, # write @CIL_VERSION@ somewhere in a written file to get it substituted diff --git a/configure.ac b/configure.ac index 948e0f911..138002d5f 100644 --- a/configure.ac +++ b/configure.ac @@ -146,7 +146,7 @@ AC_MSG_RESULT($UNDERSCORE_NAME) # (autoscan is part of the autoconf distribution) # checks for header files -AC_CHECK_HEADERS(stdlib.h strings.h sys/time.h unistd.h wchar.h stdbool.h) +AC_CHECK_HEADERS(stdlib.h strings.h sys/time.h unistd.h wchar.h stdbool.h uchar.h) # checks for typedefs, structures, and compiler characteristics AC_C_CONST @@ -160,6 +160,8 @@ AC_CHECK_FUNCS(mkdir select socket __sysv_signal) # Find out the true definitions of some integer types CIL_CHECK_INTEGER_TYPE(size_t, TYPE_SIZE_T) CIL_CHECK_INTEGER_TYPE(wchar_t, TYPE_WCHAR_T) +CIL_CHECK_INTEGER_TYPE(char16_t, TYPE_CHAR16_T) +CIL_CHECK_INTEGER_TYPE(char32_t, TYPE_CHAR32_T) # ----------------- finish up ------------------- # names of the variables that get substituted in files; for example, diff --git a/m4/cil.m4 b/m4/cil.m4 index 8ca22021c..584e32606 100644 --- a/m4/cil.m4 +++ b/m4/cil.m4 @@ -5,6 +5,16 @@ AC_DEFUN([__CIL_CHECK_INTEGER_TYPE_TYPE], [ AC_COMPILE_IFELSE([AC_LANG_SOURCE([ #include #include +#include +#if __APPLE__ + // C11 7.28 defines these to be the same as uint_least16_t and uint_least32_t. + // The standard mandates a uchar.h file to contain these typedefs, but Mac does + // not have that header file + typedef uint_least16_t char16_t; + typedef uint_least32_t char32_t; +#else + #include +#endif /* We define a prototype with one type and the function with another type. This will result in compilation error unless the types are really identical. */ diff --git a/src/cil.ml b/src/cil.ml index ee22b4d88..f7f26abd1 100755 --- a/src/cil.ml +++ b/src/cil.ml @@ -543,6 +543,8 @@ and exp = * [TArray(T)] produces an expression of type * [TPtr(T)]. *) +and wstring_type = | Wchar_t | Char16_t | Char32_t +and encoding = No_encoding | Utf8 (** Literal constants *) and constant = @@ -550,8 +552,8 @@ and constant = (** Integer constant. Give the ikind (see ISO9899 6.1.3.2) * and the textual representation, if available. Use * {!Cil.integer} or {!Cil.kinteger} to create these. *) - | CStr of string (** String constant (of pointer type) *) - | CWStr of int64 list (** Wide string constant (of type "wchar_t *") *) + | CStr of string * encoding (** String constant (of pointer type) *) + | CWStr of int64 list * wstring_type (** Wide string constant (of type "wchar_t *") *) | CChr of char (** Character constant. This has type int, so use * charConstToInt to read the value in case * sign-extension is needed. *) @@ -1296,9 +1298,13 @@ let upointType = ref voidType (* An integer type that fits a pointer difference. Initialized by initCIL *) let ptrdiffType = ref voidType -(* An integer type that fits wchar_t. Initialized by initCIL *) +(* Integer types that fit wchar_t, char16_t, and char32_t. Initialized by initCIL *) let wcharKind = ref IChar let wcharType = ref voidType +let char16Kind = ref IChar +let char16Type = ref voidType +let char32Kind = ref IChar +let char32Type = ref voidType (* An integer type that is the type of sizeof. Initialized by initCIL *) @@ -1666,7 +1672,7 @@ let getComplexFkind = function let var vi : lval = (Var vi, NoOffset) (* let assign vi e = Instrs(Set (var vi, e), lu) *) -let mkString s = Const(CStr s) +let mkString s = Const(CStr (s, No_encoding)) let mkWhile ~(guard:exp) ~(body: stmt list) : stmt list = @@ -1807,9 +1813,10 @@ let d_const () c = text (prefix ^ (string_of_cilint i ^ suffix)) ) - | CStr(s) -> text ("\"" ^ escape_string s ^ "\"") - | CWStr(s) -> + | CStr(s, enc) -> let prefix = match enc with No_encoding -> "" | Utf8 -> "u8" in text (prefix ^ "\"" ^ escape_string s ^ "\"") + | CWStr(s, st) -> (* text ("L\"" ^ escape_string s ^ "\"") *) + let prefix = match st with Wchar_t -> "L" | Char16_t -> "u" | Char32_t -> "U" in (List.fold_left (fun acc elt -> acc ++ if (elt >= Int64.zero && @@ -1818,7 +1825,7 @@ let d_const () c = else ( text (Printf.sprintf "\\x%LX\"" elt) ++ break ++ (text "\"")) - ) (text "L\"") s ) ++ text "\"" + ) (text (prefix ^ "\"")) s ) ++ text "\"" (* we cannot print L"\xabcd" "feedme" as L"\xabcdfeedme" -- * the former has 7 wide characters and the later has 3. *) @@ -1938,9 +1945,9 @@ let rec typeOf (e: exp) : typ = (* The type of a string is a pointer to characters ! The only case when * you would want it to be an array is as an argument to sizeof, but we * have SizeOfStr for that *) - | Const(CStr s) -> stringLiteralType + | Const(CStr (_, _)) -> stringLiteralType - | Const(CWStr s) -> TPtr(!wcharType,[]) + | Const(CWStr (s,st)) -> TPtr((match st with Wchar_t -> !wcharType | Char16_t -> !char16Type | Char32_t -> !char32Type), []) | Const(CReal (_, fk, _)) -> TFloat(fk, []) @@ -3364,7 +3371,7 @@ class defaultCilPrinterClass : cilPrinter = object (self) | Real e -> text "__real__(" ++ self#pExp () e ++ chr ')' | SizeOfStr s -> - text "sizeof(" ++ d_const () (CStr s) ++ chr ')' + text "sizeof(" ++ d_const () (CStr (s, No_encoding)) ++ chr ')' | AlignOf (t) -> text "__alignof__(" ++ self#pType None () t ++ chr ')' @@ -4607,16 +4614,16 @@ class plainCilPrinterClass = let d_plainconst () c = match c with CInt(i, ik, so) -> - let fmt = if isSigned ik then "%d" else "%x" in + let fmt = if isSigned ik then "%d" else "%x" in dprintf "Int(%s,%a,%s)" (Z.format fmt i) d_ikind ik (match so with Some s -> s | _ -> "None") - | CStr(s) -> - text ("CStr(\"" ^ escape_string s ^ "\")") - | CWStr(s) -> + | CStr(s, enc) -> + let enc_string = match enc with No_encoding -> "_" | Utf8 -> "UTF8" in + text ("CStr(\"" ^ escape_string s ^ "\"," ^ enc_string ^ ")") + | CWStr(s,_) -> dprintf "CWStr(%a)" d_const c - | CChr(c) -> text ("CChr('" ^ escape_char c ^ "')") | CReal(f, fk, so) -> dprintf "CReal(%f, %a, %s)" @@ -4664,7 +4671,7 @@ class plainCilPrinterClass = | SizeOfE (e) -> text "sizeofE(" ++ self#pExp () e ++ chr ')' | SizeOfStr (s) -> - text "sizeofStr(" ++ d_const () (CStr s) ++ chr ')' + text "sizeofStr(" ++ d_const () (CStr (s, No_encoding)) ++ chr ')' | AlignOf (t) -> text "__alignof__(" ++ self#pType None () t ++ chr ')' | AlignOfE (e) -> @@ -5965,7 +5972,7 @@ let typeSigAttrs = function let dExp: doc -> exp = - fun d -> Const(CStr(sprint ~width:!lineLength d)) + fun d -> Const(CStr(sprint ~width:!lineLength d, No_encoding)) let dInstr: doc -> location -> instr = fun d l -> Asm([], [sprint ~width:!lineLength d], [], [], [], l) diff --git a/src/cil.mli b/src/cil.mli index 528672525..b9984a7ce 100644 --- a/src/cil.mli +++ b/src/cil.mli @@ -636,6 +636,9 @@ and exp = (** {b Constants.} *) +and wstring_type = | Wchar_t | Char16_t | Char32_t +and encoding = No_encoding | Utf8 + (** Literal constants *) and constant = | CInt of cilint * ikind * string option @@ -643,13 +646,13 @@ and constant = * textual representation, if available. (This allows us to print a * constant as, for example, 0xF instead of 15.) Use {!Cil.integer} or * {!Cil.kinteger} to create these. *) - | CStr of string + | CStr of string * encoding (** String constant. The escape characters inside the string have been * already interpreted. This constant has pointer to character type! The * only case when you would like a string literal to have an array type * is when it is an argument to sizeof. In that case you should use * SizeOfStr. *) - | CWStr of int64 list + | CWStr of int64 list * wstring_type (** Wide character string constant. Note that the local interpretation * of such a literal depends on {!Cil.wcharType} and {!Cil.wcharKind}. * Such a constant has type pointer to {!Cil.wcharType}. The @@ -1338,10 +1341,14 @@ val charPtrType: typ (** Type of string literals *) val stringLiteralType: typ -(** wchar_t (depends on architecture) and is set when you call +(** wchar_t, char16_t and char32_t depend on architecture and are set when you call * {!Cil.initCIL}. *) val wcharKind: ikind ref val wcharType: typ ref +val char16Kind: ikind ref +val char16Type: typ ref +val char32Kind: ikind ref +val char32Type: typ ref (** char const * *) val charConstPtrType: typ diff --git a/src/ext/pta/ptranal.ml b/src/ext/pta/ptranal.ml index 550760189..1e6d7ccb2 100644 --- a/src/ext/pta/ptranal.ml +++ b/src/ext/pta/ptranal.ml @@ -231,7 +231,7 @@ and analyze_expr_as_lval (e : exp) : A.lvalue = and analyze_expr (e : exp ) : A.tau = let result = match e with - Const (CStr s) -> + Const (CStr (s,_)) -> if !model_strings then A.address (A.make_lvalue false diff --git a/src/formatparse.mly b/src/formatparse.mly index aba2abf04..a6db95544 100644 --- a/src/formatparse.mly +++ b/src/formatparse.mly @@ -559,11 +559,11 @@ constant: | ARG_g { let currentArg = $1 in ((fun args -> match getArg currentArg args with - Fg s -> Const(CStr s) + Fg s -> Const(CStr (s, No_encoding)) | a -> wrongArgType currentArg "string" a), fun e -> match e with - Const(CStr s) -> + Const(CStr (s,_)) -> Some [ Fg s ] | _ -> None) } diff --git a/src/frontc/cabs.ml b/src/frontc/cabs.ml index 449f22895..7ce22c1d4 100644 --- a/src/frontc/cabs.ml +++ b/src/frontc/cabs.ml @@ -296,15 +296,18 @@ and constant = | CONST_FLOAT of string (* the textual representaton *) | CONST_COMPLEX of string (* the textual representation *) | CONST_CHAR of int64 list - | CONST_WCHAR of int64 list - | CONST_STRING of string - | CONST_WSTRING of int64 list + | CONST_WCHAR of int64 list * wchar_type + | CONST_STRING of string * encoding + | CONST_WSTRING of int64 list * wchar_type (* ww: wstrings are stored as an int64 list at this point because * we might need to feed the wide characters piece-wise into an * array initializer (e.g., wchar_t foo[] = L"E\xabcd";). If that * doesn't happen we will convert it to an (escaped) string before * passing it to Cil. *) +and wchar_type = WCHAR_T | CHAR16_T | CHAR32_T | CHAR | CHAR_UTF8 +and encoding = NO_ENCODING | UTF8 + and init_expression = | NO_INIT | SINGLE_INIT of expression diff --git a/src/frontc/cabs2cil.ml b/src/frontc/cabs2cil.ml index 96b400c34..52dc6a3c4 100644 --- a/src/frontc/cabs2cil.ml +++ b/src/frontc/cabs2cil.ml @@ -2670,7 +2670,7 @@ let rec doSpecList (suggestedAnonName: string) (* This string will be part of match e' with StartOf(lv) -> typeOfLval lv (* If this is a string literal, then we treat it as in sizeof*) - | Const (CStr s) -> begin + | Const (CStr (s,_)) -> begin match typeOf e' with TPtr(bt, _) -> (* This is the type of array elements *) TArray(bt, Some (SizeOfStr s), []) @@ -2802,7 +2802,7 @@ and doAttr (a: A.attribute) : attribute list = | _ -> ACons (n', []) with Not_found -> ACons(n', []) end - | A.CONSTANT (A.CONST_STRING s) -> AStr s + | A.CONSTANT (A.CONST_STRING (s,_)) -> AStr s | A.CONSTANT (A.CONST_INT str) -> begin match parseInt str with Const (CInt (v64,_,_)) -> @@ -3403,7 +3403,7 @@ and doExp (asconst: bool) (* This expression is used as a constant *) | A.PAREN e -> E.s (bug "stripParen") | A.NOTHING when what = ADrop -> finishExp empty (integer 0) intType | A.NOTHING -> - let res = Const(CStr "exp_nothing") in + let res = Const(CStr ("exp_nothing", No_encoding)) in finishExp empty res (typeOf res) (* Do the potential lvalues first *) @@ -3568,11 +3568,16 @@ and doExp (asconst: bool) (* This expression is used as a constant *) (TPtr(wchar_t, [])) *) - | A.CONST_WSTRING (ws: int64 list) -> - let res = Const(CWStr ((* intlist_to_wstring *) ws)) in + | A.CONST_WSTRING (ws, wst) -> + let cil_wst = + match wst with + WCHAR_T -> Wchar_t | CHAR16_T -> Char16_t | CHAR32_T -> Char32_t + | _ -> E.s ("Error in CONST_WSTRING: Not a wchar type"); + in + let res = Const(CWStr ((* intlist_to_wstring *) ws, cil_wst)) in finishExp empty res (typeOf res) - | A.CONST_STRING s -> + | A.CONST_STRING (s,enc) -> (* Maybe we burried __FUNCTION__ in there *) let s' = try @@ -3589,14 +3594,15 @@ and doExp (asconst: bool) (* This expression is used as a constant *) s with Not_found -> s in - let res = Const(CStr s') in + let enc' = match enc with NO_ENCODING -> No_encoding | UTF8 -> Utf8 in + let res = Const(CStr (s', enc')) in finishExp empty res (typeOf res) | A.CONST_CHAR char_list -> let a, b = (interpret_character_constant char_list) in finishExp empty (Const a) b - | A.CONST_WCHAR char_list -> + | A.CONST_WCHAR (char_list,wct) -> (* matth: I can't see a reason for a list of more than one char * here, since the kinteger64 below will take only the lower 16 * bits of value. ('abc' makes sense, because CHAR constants have @@ -3604,8 +3610,14 @@ and doExp (asconst: bool) (* This expression is used as a constant *) * the value. But L'abc' has type wchar, and so is equivalent to * L'c'). But gcc allows L'abc', so I'll leave this here in case * I'm missing some architecture dependent behavior. *) - let value = reduce_multichar !wcharType char_list in - let result = kintegerCilint !wcharKind value in + let wcType, wcKind = match wct with + | WCHAR_T -> !wcharType, !wcharKind + | CHAR16_T -> !char16Type, !char16Kind + | CHAR32_T -> !char32Type, !char32Kind + | _ -> E.s ("Error in CONST_WCHAR: not a wchar type"); + in + let value = reduce_multichar wcType char_list in + let result = kintegerCilint !wcharKind value in finishExp empty result (typeOf result) | A.CONST_FLOAT str -> begin @@ -3634,7 +3646,7 @@ and doExp (asconst: bool) (* This expression is used as a constant *) ignore (E.log "float_of_string %s (%s)\n" str (Printexc.to_string e)); E.hadErrors := true; - let res = Const(CStr "booo CONS_FLOAT") in + let res = Const(CStr ("booo CONS_FLOAT", No_encoding)) in finishExp empty res (typeOf res) end end @@ -3664,7 +3676,7 @@ and doExp (asconst: bool) (* This expression is used as a constant *) ignore (E.log "float_of_string_2 %s (%s)\n" baseint (Printexc.to_string e)); E.hadErrors := true; - let res = Const(CStr "booo CONS_FLOAT") in + let res = Const(CStr ("booo CONS_FLOAT", No_encoding)) in finishExp empty res (typeOf res) end end @@ -3675,10 +3687,10 @@ and doExp (asconst: bool) (* This expression is used as a constant *) finishExp empty (SizeOf(typ)) !typeOfSizeOf (* Intercept the sizeof("string") *) - | A.EXPR_SIZEOF (A.CONSTANT (A.CONST_STRING s)) -> begin + | A.EXPR_SIZEOF (A.CONSTANT (A.CONST_STRING (s,enc))) -> begin (* Process the string first *) - match doExp asconst (A.CONSTANT (A.CONST_STRING s)) (AExp None) with - _, Const(CStr s), _ -> + match doExp asconst (A.CONSTANT (A.CONST_STRING (s,enc))) (AExp None) with + _, Const(CStr (s,enc)), _ -> finishExp empty (SizeOfStr s) !typeOfSizeOf | _ -> E.s (bug "cabs2cil: sizeOfStr") end @@ -5223,11 +5235,11 @@ and doInit * string into characters *) | TArray(bt, leno, _), (A.NEXT_INIT, - (A.SINGLE_INIT(A.CONSTANT (A.CONST_STRING s))| + (A.SINGLE_INIT(A.CONSTANT (A.CONST_STRING (s,enc)))| A.COMPOUND_INIT [(A.NEXT_INIT, A.SINGLE_INIT(A.CONSTANT - (A.CONST_STRING s)))])) :: restil + (A.CONST_STRING (s,enc))))])) :: restil when (match unrollType bt with TInt((IChar|IUChar|ISChar), _) -> true | TInt _ -> @@ -5275,11 +5287,11 @@ and doInit * important. *) | TArray(bt, leno, _), (A.NEXT_INIT, - (A.SINGLE_INIT(A.CONSTANT (A.CONST_WSTRING s)) | + (A.SINGLE_INIT(A.CONSTANT (A.CONST_WSTRING (s,enc))) | A.COMPOUND_INIT [(A.NEXT_INIT, A.SINGLE_INIT(A.CONSTANT - (A.CONST_WSTRING s)))])) :: restil + (A.CONST_WSTRING (s,enc))))])) :: restil when(let bt' = unrollType bt in match bt' with (* compare bt to wchar_t, ignoring signed vs. unsigned *) @@ -5813,7 +5825,7 @@ and createLocal ?allow_var_decl:(allow_var_decl=true) ((_, sto, _, _) as specs) TArray(_,None, _), _, TArray(_, Some _, _) -> vi.vtype <- et (* Initializing a local array *) | TArray(TInt((IChar|IUChar|ISChar), _) as bt, None, a), - SingleInit(Const(CStr s)), _ -> + SingleInit(Const(CStr (s,enc))), _ -> vi.vtype <- TArray(bt, Some (integer (String.length s + 1)), a) diff --git a/src/frontc/clexer.mll b/src/frontc/clexer.mll index b6b7a6a43..13beb6bab 100644 --- a/src/frontc/clexer.mll +++ b/src/frontc/clexer.mll @@ -330,6 +330,23 @@ let scan_oct_escape str = done; !the_value +(* For a given Unicode Code-point of type Int64, calculates the UTF-8 representation and returns the bytes + * in a list of 1-4 int64 values in reverse order, such that the first byte is the last element of the list *) +let utf8_representation value = + let generate_bytes n = + let first_byte = + let first_byte_prefix = match n with 1 -> 0L | 2 -> 0xC0L | 3 -> 0xE0L | 4 -> 0xF0L | _ -> E.s(error "error in utf8_representation"); in + Int64.logor first_byte_prefix (Int64.shift_right_logical value (6*(n-1))) + in + let rec generate_one bytes n = + if n = 1 then bytes + else generate_one ((Int64.logor 0x80L (Int64.logand (Int64.shift_right_logical value (6*(n-2))) 0x3FL)) :: bytes) (n-1) + in + generate_one [first_byte] n + in + let num_bytes = if value <= 127L then 1 else if value <= 2047L then 2 else if value <= 65535L then 3 else 4 in + generate_bytes num_bytes + let lex_hex_escape remainder lexbuf = let prefix = scan_hex_escape (Lexing.lexeme lexbuf) in prefix :: remainder lexbuf @@ -343,6 +360,14 @@ let lex_simple_escape remainder lexbuf = let prefix = scan_escape lexchar in prefix :: remainder lexbuf +let lex_universal_escape ischar remainder lexbuf = + let value = scan_hex_escape (Lexing.lexeme lexbuf) in + if ischar then + value :: remainder lexbuf + else + let prefix = utf8_representation value in + List.rev_append prefix (remainder lexbuf) + let lex_unescaped remainder lexbuf = let prefix = Int64.of_int (Char.code (Lexing.lexeme_char lexbuf 0)) in prefix :: remainder lexbuf @@ -432,11 +457,13 @@ let floatnum = (decfloat | hexfloat) floatsuffix? let complexnum = (decfloat | hexfloat) ((['i' 'I'] floatsuffix) | (floatsuffix? ['i' 'I'])) -let ident = (letter|'_'|'$')(letter|decdigit|'_'|'$')* let blank = [' ' '\t' '\012' '\r']+ let escape = '\\' _ let hex_escape = '\\' ['x' 'X'] hexdigit+ let oct_escape = '\\' octdigit octdigit? octdigit? +let hexquad = hexdigit hexdigit hexdigit hexdigit +let universal_escape = '\\' ('u' hexquad | 'U' hexquad hexquad) +let ident = (letter|'_'|'$'|universal_escape)(letter|decdigit|'_'|'$'|universal_escape)* (* Pragmas that are not parsed by CIL. We lex them as PRAGMA_LINE tokens *) let no_parse_pragma = @@ -480,6 +507,8 @@ rule initial = | "_Pragma" { PRAGMA (currentLoc ()) } | '\'' { CST_CHAR (chr lexbuf, currentLoc ())} | "L'" { CST_WCHAR (chr lexbuf, currentLoc ()) } +| "u'" { CST_CHAR16 (chr lexbuf, currentLoc ()) } +| "U'" { CST_CHAR32 (chr lexbuf, currentLoc ()) } | '"' { addLexeme lexbuf; (* '"' *) (* matth: BUG: this could be either a regular string or a wide string. * e.g. if it's the "world" in @@ -491,12 +520,24 @@ rule initial = raise (InternalError ("str: " ^ Printexc.to_string e))} +| "u8\"" { addLexeme lexbuf; (* '"' *) + try CST_U8STRING (str lexbuf, currentLoc ()) + with e -> + raise (InternalError + ("str: " ^ + Printexc.to_string e))} | "L\"" { (* weimer: wchar_t string literal *) try CST_WSTRING(str lexbuf, currentLoc ()) with e -> raise (InternalError ("wide string: " ^ Printexc.to_string e))} +| "u\"" {try CST_STRING16(str lexbuf, currentLoc ()) + with e -> + raise (InternalError ("wide string: " ^ Printexc.to_string e))} +| "U\"" {try CST_STRING32(str lexbuf, currentLoc ()) + with e -> + raise (InternalError ("wide string: " ^ Printexc.to_string e))} | floatnum {CST_FLOAT (Lexing.lexeme lexbuf, currentLoc ())} | complexnum {CST_COMPLEX (Lexing.lexeme lexbuf, currentLoc ())} | hexnum {CST_INT (Lexing.lexeme lexbuf, currentLoc ())} @@ -656,6 +697,7 @@ and str = parse | hex_escape {addLexeme lexbuf; lex_hex_escape str lexbuf} | oct_escape {addLexeme lexbuf; lex_oct_escape str lexbuf} | escape {addLexeme lexbuf; lex_simple_escape str lexbuf} +| universal_escape {addLexeme lexbuf; lex_universal_escape false str lexbuf} | _ {addLexeme lexbuf; lex_unescaped str lexbuf} and chr = parse @@ -663,6 +705,7 @@ and chr = parse | hex_escape {lex_hex_escape chr lexbuf} | oct_escape {lex_oct_escape chr lexbuf} | escape {lex_simple_escape chr lexbuf} +| universal_escape {lex_universal_escape true chr lexbuf} | _ {lex_unescaped chr lexbuf} and msasm = parse diff --git a/src/frontc/cparser.mly b/src/frontc/cparser.mly index fd616f36c..de59a11b7 100644 --- a/src/frontc/cparser.mly +++ b/src/frontc/cparser.mly @@ -233,12 +233,25 @@ let transformOffsetOf (speclist, dtype) member = let resultExpr = CAST (sizeofType, SINGLE_INIT addrExpr) in resultExpr + let queue_to_int64_list queue = + List.rev (Queue.fold (fun l e -> List.rev_append e l) [] queue) + + let queue_to_string queue = + let buffer = Buffer.create (Queue.length queue) in + Queue.iter + (List.iter + (fun value -> + let char = int64_to_char value in + Buffer.add_char buffer char)) + queue; + Buffer.contents buffer + %} %token IDENT %token QUALIFIER %token CST_CHAR -%token CST_WCHAR +%token CST_WCHAR CST_CHAR16 CST_CHAR32 %token CST_INT %token CST_FLOAT %token CST_COMPLEX @@ -247,7 +260,7 @@ let transformOffsetOf (speclist, dtype) member = /* Each character is its own list element, and the terminating nul is not included in this list. */ %token CST_STRING -%token CST_WSTRING +%token CST_WSTRING CST_STRING16 CST_STRING32 CST_U8STRING %token EOF %token CHAR INT BOOL DOUBLE FLOAT VOID INT64 INT32 @@ -342,7 +355,7 @@ let transformOffsetOf (speclist, dtype) member = %type attributes attributes_with_asm asmattr %type statement %type constant -%type string_constant +%type string_constant %type expression %type opt_expression %type init_expression @@ -350,8 +363,8 @@ let transformOffsetOf (speclist, dtype) member = %type paren_comma_expression %type arguments %type bracket_comma_expression -%type string_list -%type wstring_list +/* %type string_list */ +/* %type wstring_list */ %type initializer %type <(Cabs.initwhat * Cabs.init_expression) list> initializer_list @@ -410,11 +423,11 @@ global: | function_def { $1 } /*(* Some C header files ar shared with the C++ compiler and have linkage * specification *)*/ -| EXTERN string_constant declaration { LINKAGE (fst $2, (*handleLoc*) (snd $2), [ $3 ]) } +| EXTERN string_constant declaration { let q,t,l = $2 in LINKAGE (queue_to_string q, (*handleLoc*) l, [ $3 ]) } | EXTERN string_constant LBRACE globals RBRACE - { LINKAGE (fst $2, (*handleLoc*) (snd $2), $4) } + { let q,t,l = $2 in LINKAGE (queue_to_string q, (*handleLoc*) l, $4) } | ASM LPAREN string_constant RPAREN SEMICOLON - { GLOBASM (fst $3, (*handleLoc*) $1) } + { let q,t,l = $3 in GLOBASM (queue_to_string q, (*handleLoc*) $1) } | pragma { $1 } /* (* Old-style function prototype. This should be somewhere else, like in * "declaration". For now we keep it at global scope only because in local @@ -704,57 +717,93 @@ constant: CST_INT {CONST_INT (fst $1), snd $1} | CST_FLOAT {CONST_FLOAT (fst $1), snd $1} | CST_COMPLEX {CONST_COMPLEX (fst $1), snd $1} -| CST_CHAR {CONST_CHAR (fst $1), snd $1} -| CST_WCHAR {CONST_WCHAR (fst $1), snd $1} -| string_constant {CONST_STRING (fst $1), snd $1} -| wstring_list {CONST_WSTRING (fst $1), snd $1} +| CST_CHAR {CONST_CHAR (fst $1), snd $1} +| CST_WCHAR {CONST_WCHAR (fst $1, WCHAR_T), snd $1} +| CST_CHAR16 {CONST_WCHAR (fst $1, CHAR16_T), snd $1} +| CST_CHAR32 {CONST_WCHAR (fst $1, CHAR32_T), snd $1} +| string_constant { + let queue, typ, location = $1 in + match typ with + | CHAR -> CONST_STRING (queue_to_string queue, NO_ENCODING), location + | CHAR_UTF8 -> CONST_STRING (queue_to_string queue, UTF8), location + | _ -> CONST_WSTRING (queue_to_int64_list queue, typ), location + } ; -string_constant: -/* Now that we know this constant isn't part of a wstring, convert it - back to a string for easy viewing. */ - string_list { - let queue, location = $1 in - let buffer = Buffer.create (Queue.length queue) in - Queue.iter - (List.iter - (fun value -> - let char = int64_to_char value in - Buffer.add_char buffer char)) - queue; - Buffer.contents buffer, location - } -; one_string_constant: /* Don't concat multiple strings. For asm templates. */ CST_STRING {intlist_to_string (fst $1) } ; -string_list: + +string_constant: one_string { + let queue = Queue.create () in + let str, typ, loc = $1 in + Queue.add str queue; + queue, typ, loc + } +| CST_WSTRING { + let queue = Queue.create () in + Queue.add (fst $1) queue; + queue, WCHAR_T, snd $1 + } +| CST_STRING16 { + let queue = Queue.create () in + Queue.add (fst $1) queue; + queue, CHAR16_T, snd $1 + } +| CST_STRING32 { let queue = Queue.create () in Queue.add (fst $1) queue; - queue, snd $1 + queue, CHAR32_T, snd $1 + } +| string_constant one_string { + let queue, typ, loc = $1 in + let str, typ2, _ = $2 in + Queue.add str queue; + if typ2 = CHAR_UTF8 && typ <> CHAR && typ <> CHAR_UTF8 then ( + parse_error "Incompatible string literals"; + raise Parsing.Parse_error) + else + let typ3 = if typ2 = CHAR_UTF8 then CHAR_UTF8 else typ in + queue, typ3, loc + } +| string_constant CST_WSTRING { + let queue, typ, loc = $1 in + Queue.add (fst $2) queue; + if typ <> CHAR && typ <> WCHAR_T then ( + parse_error "Incompatible string literals"; + raise Parsing.Parse_error) + else + queue, WCHAR_T, loc } -| string_list one_string { - Queue.add (fst $2) (fst $1); - $1 +| string_constant CST_STRING16 { + let queue, typ, loc = $1 in + Queue.add (fst $2) queue; + if typ <> CHAR && typ <> CHAR16_T then ( + parse_error "Incompatible string literals"; + raise Parsing.Parse_error) + else + queue, CHAR16_T, loc + } +| string_constant CST_STRING32 { + let queue, typ, loc = $1 in + Queue.add (fst $2) queue; + if typ <> CHAR && typ <> CHAR32_T then ( + parse_error "Incompatible string literals"; + raise Parsing.Parse_error) + else + queue, CHAR32_T, loc } ; -wstring_list: - CST_WSTRING { $1 } -| wstring_list one_string { (fst $1) @ (fst $2), snd $1 } -| wstring_list CST_WSTRING { (fst $1) @ (fst $2), snd $1 } -/* Only the first string in the list needs an L, so L"a" "b" is the same - * as L"ab" or L"a" L"b". */ - one_string: - CST_STRING {$1} + CST_STRING {fst $1, CHAR, snd $1} +| CST_U8STRING {fst $1, CHAR_UTF8, snd $1} | FUNCTION__ {(Cabshelper.explodeStringToInts - !currentFunctionName), $1} + !currentFunctionName), CHAR, $1} | PRETTY_FUNCTION__ {(Cabshelper.explodeStringToInts - !currentFunctionName), $1} -; + !currentFunctionName), CHAR, $1} init_expression: expression { SINGLE_INIT (fst $1) } @@ -949,7 +998,8 @@ static_assert_declaration: } | STATIC_ASSERT LPAREN expression COMMA string_constant RPAREN { - (fst $3, fst $5, $1) + let q,t,l = $5 in + (fst $3, queue_to_string q, $1) } ; @@ -1365,8 +1415,8 @@ attributes_with_asm: /* empty */ { [] } | attribute attributes_with_asm { fst $1 :: $2 } | ASM LPAREN string_constant RPAREN attributes - { ("__asm__", - [CONSTANT(CONST_STRING (fst $3))]) :: $5 } + { let q,t,l = $3 in ("__asm__", + [CONSTANT(CONST_STRING (queue_to_string q, NO_ENCODING))]) :: $5 } ; /* things like __attribute__, but no const/volatile */ @@ -1432,7 +1482,7 @@ primary_attr: | LPAREN attr RPAREN { $2 } | IDENT IDENT { CALL(VARIABLE (fst $1), [VARIABLE (fst $2)]) } | CST_INT { CONSTANT(CONST_INT (fst $1)) } -| string_constant { CONSTANT(CONST_STRING (fst $1)) } +| string_constant { let q,t,l = $1 in CONSTANT(CONST_STRING (queue_to_string q, NO_ENCODING)) } /*(* Const when it appears in * attribute lists, is translated * to aconst *)*/ @@ -1602,10 +1652,12 @@ asmoperandsne: asmoperand { [$1] } | asmoperandsne COMMA asmoperand { $3 :: $1 } ; + asmoperand: - asmopname string_constant LPAREN expression RPAREN { ($1, fst $2, fst $4) } -| asmopname string_constant LPAREN error RPAREN { ($1, fst $2, NOTHING ) } + asmopname string_constant LPAREN expression RPAREN { let q,t,l = $2 in ($1, queue_to_string q, fst $4) } +| asmopname string_constant LPAREN error RPAREN { let q,t,l = $2 in ($1, queue_to_string q, NOTHING ) } ; + asminputs: /* empty */ { ([], []) } | COLON asmoperands asmclobber diff --git a/src/frontc/cprint.ml b/src/frontc/cprint.ml index 3279ca43a..7bbe700ae 100644 --- a/src/frontc/cprint.ml +++ b/src/frontc/cprint.ml @@ -123,8 +123,9 @@ let print_commas nl fct lst = let print_string (s:string) = print ("\"" ^ escape_string s ^ "\"") -let print_wstring (s: int64 list ) = - print ("L\"" ^ escape_wstring s ^ "\"") +let print_wstring (s: int64 list ) (wst: Cabs.wchar_type) = + let prefix = match wst with WCHAR_T -> "L" | CHAR16_T -> "u" | CHAR32_T -> "U" | _ -> Errormsg.s ("Error in print_wstring: not a wchar type") in + print (prefix ^ "\"" ^ escape_wstring s ^ "\"") (* ** Base Type Printing @@ -517,9 +518,11 @@ and print_expression_level (lvl: int) (exp : expression) = | CONST_FLOAT r -> print r | CONST_COMPLEX r -> print r | CONST_CHAR c -> print ("'" ^ escape_wstring c ^ "'") - | CONST_WCHAR c -> print ("L'" ^ escape_wstring c ^ "'") - | CONST_STRING s -> print_string s - | CONST_WSTRING ws -> print_wstring ws) + | CONST_WCHAR (c, wct) -> + let prefix = match wct with WCHAR_T -> "L'" | CHAR16_T -> "u'" | CHAR32_T -> "U'" | CHAR_UTF8 -> "u8'" | CHAR -> "" in + print (prefix ^ escape_wstring c ^ "'") + | CONST_STRING (s, enc) -> print_string s + | CONST_WSTRING (ws, wst) -> print_wstring ws wst) | VARIABLE name -> comprint "variable"; print name diff --git a/src/machdep-ml.c.in b/src/machdep-ml.c.in index 37bd93468..dd763e37f 100644 --- a/src/machdep-ml.c.in +++ b/src/machdep-ml.c.in @@ -272,7 +272,7 @@ int main(int argc, char **argv) printf("short=%d,%d int=%d,%d long=%d,%d long_long=%d,%d pointer=%d,%d " "alignof_enum=%d float=%d,%d float32x=%d,%d float64x=%d,%d double=%d,%d long_double=%d,%d float_complex=%d,%d double_complex=%d,%d long_double_complex=%d,%d void=%d " "bool=%d,%d fun=%d,%d alignof_string=%d max_alignment=%d size_t=%s " - "wchar_t=%s char_signed=%s " + "wchar_t=%s char16_t=%s char32_t=%s char_signed=%s " "big_endian=%s __thread_is_keyword=%s __builtin_va_list=%s " "underscore_name=%s\n", (int)sizeof(short), alignof_short, (int)sizeof(int), alignof_int, @@ -290,7 +290,7 @@ int main(int argc, char **argv) (int)sizeof(long double _Complex), alignof_longdoublecomplex, (int)sizeof(void), (int)sizeof(bool), alignof_bool, sizeof_fun, alignof_fun, alignof_str, alignof_aligned, - underscore(TYPE_SIZE_T), underscore(TYPE_WCHAR_T), + underscore(TYPE_SIZE_T), underscore(TYPE_WCHAR_T), underscore(TYPE_CHAR16_T), underscore(TYPE_CHAR32_T), char_is_unsigned ? "false" : "true", little_endian ? "false" : "true", THREAD_IS_KEYWORD, HAVE_BUILTIN_VA_LIST, UNDERSCORE_NAME); @@ -327,6 +327,8 @@ int main(int argc, char **argv) printf("\t sizeof_fun = %d;\n", (int)sizeof_fun); printf("\t size_t = \"%s\";\n", TYPE_SIZE_T); printf("\t wchar_t = \"%s\";\n", TYPE_WCHAR_T); + printf("\t char16_t = \"%s\";\n", TYPE_CHAR16_T); + printf("\t char32_t = \"%s\";\n", TYPE_CHAR32_T); printf("\t alignof_short = %d;\n", alignof_short); printf("\t alignof_int = %d;\n", alignof_int); printf("\t alignof_bool = %d;\n", alignof_bool); diff --git a/src/machdepenv.ml b/src/machdepenv.ml index 06265a79c..171dd68a8 100644 --- a/src/machdepenv.ml +++ b/src/machdepenv.ml @@ -92,6 +92,8 @@ let modelParse (s:string) : mach = alignof_aligned = getInt entries "max_alignment"; size_t = respace (getNthString 0 entries "size_t"); wchar_t = respace (getNthString 0 entries "wchar_t"); + char16_t = respace (getNthString 0 entries "char16_t"); + char32_t = respace (getNthString 0 entries "char32_t"); char_is_unsigned = not (getBool entries "char_signed"); little_endian = not (getBool entries "big_endian"); __thread_is_keyword = getBool entries "__thread_is_keyword"; diff --git a/test/small1/c99-universal-character-names.c b/test/small1/c99-universal-character-names.c index d9a502644..a91ce415f 100644 --- a/test/small1/c99-universal-character-names.c +++ b/test/small1/c99-universal-character-names.c @@ -1,3 +1,4 @@ int main() { int \u03B1 = 5; + char* arr = "\u2019"; } From 3bd49f90920682e7738bcdf080c3a909ab00e3ff Mon Sep 17 00:00:00 2001 From: Michael Schwarz Date: Thu, 17 Feb 2022 10:38:10 +0100 Subject: [PATCH 2/5] c99-universal-character-names now succeeds --- test/testcil.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testcil.pl b/test/testcil.pl index 893ffc570..958e3f89f 100644 --- a/test/testcil.pl +++ b/test/testcil.pl @@ -692,7 +692,7 @@ sub addToGroup { addTest("testrunc99/c99-struct"); addTest("testrunc99/c99-complex"); addTest("testrunc99/c99-universal-character-names"); -addBadComment("testrunc99/c99-universal-character-names", "Universal character names are not yet supported"); + addTest("testrunc99/c99-tgmath"); addTest("testrunc99/c99-float-pragma"); addTest("combinec99inline"); From 1dc77ea04974605774433b5e678c51fb9d480249 Mon Sep 17 00:00:00 2001 From: Michael Schwarz Date: Fri, 18 Feb 2022 09:24:13 +0100 Subject: [PATCH 3/5] Parser: Use one_string_constant inside asmoperand --- src/frontc/cparser.mly | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/frontc/cparser.mly b/src/frontc/cparser.mly index de59a11b7..3d652d4f4 100644 --- a/src/frontc/cparser.mly +++ b/src/frontc/cparser.mly @@ -732,7 +732,7 @@ constant: one_string_constant: /* Don't concat multiple strings. For asm templates. */ - CST_STRING {intlist_to_string (fst $1) } + CST_STRING { intlist_to_string (fst $1) } ; string_constant: @@ -1654,8 +1654,8 @@ asmoperandsne: ; asmoperand: - asmopname string_constant LPAREN expression RPAREN { let q,t,l = $2 in ($1, queue_to_string q, fst $4) } -| asmopname string_constant LPAREN error RPAREN { let q,t,l = $2 in ($1, queue_to_string q, NOTHING ) } + asmopname one_string_constant LPAREN expression RPAREN { ($1, $2, fst $4) } +| asmopname one_string_constant LPAREN error RPAREN { ($1, $2, NOTHING ) } ; asminputs: From c30ddd8d80a0de19a376488ea6ede02417a07326 Mon Sep 17 00:00:00 2001 From: Michael Schwarz Date: Fri, 18 Feb 2022 10:12:49 +0100 Subject: [PATCH 4/5] Parser: Make names involving strings more obvious --- src/frontc/cparser.mly | 53 +++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/src/frontc/cparser.mly b/src/frontc/cparser.mly index 3d652d4f4..6222d0ad5 100644 --- a/src/frontc/cparser.mly +++ b/src/frontc/cparser.mly @@ -355,7 +355,7 @@ let transformOffsetOf (speclist, dtype) member = %type attributes attributes_with_asm asmattr %type statement %type constant -%type string_constant +%type string_list %type expression %type opt_expression %type init_expression @@ -423,11 +423,11 @@ global: | function_def { $1 } /*(* Some C header files ar shared with the C++ compiler and have linkage * specification *)*/ -| EXTERN string_constant declaration { let q,t,l = $2 in LINKAGE (queue_to_string q, (*handleLoc*) l, [ $3 ]) } -| EXTERN string_constant LBRACE globals RBRACE - { let q,t,l = $2 in LINKAGE (queue_to_string q, (*handleLoc*) l, $4) } -| ASM LPAREN string_constant RPAREN SEMICOLON - { let q,t,l = $3 in GLOBASM (queue_to_string q, (*handleLoc*) $1) } +| EXTERN const_raw_string declaration { LINKAGE (fst $2, (*handleLoc*) snd $2, [ $3 ]) } +| EXTERN const_raw_string LBRACE globals RBRACE + { LINKAGE (fst $2, (*handleLoc*) snd $2, $4) } +| ASM LPAREN const_raw_string RPAREN SEMICOLON + { GLOBASM (fst $3, (*handleLoc*) $1) } | pragma { $1 } /* (* Old-style function prototype. This should be somewhere else, like in * "declaration". For now we keep it at global scope only because in local @@ -721,21 +721,34 @@ constant: | CST_WCHAR {CONST_WCHAR (fst $1, WCHAR_T), snd $1} | CST_CHAR16 {CONST_WCHAR (fst $1, CHAR16_T), snd $1} | CST_CHAR32 {CONST_WCHAR (fst $1, CHAR32_T), snd $1} -| string_constant { +| const_string_or_wstring { $1 } +; + +const_string_or_wstring: +| string_list { let queue, typ, location = $1 in match typ with | CHAR -> CONST_STRING (queue_to_string queue, NO_ENCODING), location | CHAR_UTF8 -> CONST_STRING (queue_to_string queue, UTF8), location | _ -> CONST_WSTRING (queue_to_int64_list queue, typ), location } -; + +const_raw_string: +/* Allow only CONST_STRING, represent as (string * location) */ +| string_list { + let queue, typ, location = $1 in + match typ with + | CHAR -> queue_to_string queue, location + | CHAR_UTF8 -> queue_to_string queue, location + | _ -> parse_error "wstring in illegal place (conmst_raw_string)"; raise Parsing.Parse_error + } one_string_constant: -/* Don't concat multiple strings. For asm templates. */ +/* Don't concat multiple strings, or allow other encoding of string. For asm templates. */ CST_STRING { intlist_to_string (fst $1) } ; -string_constant: +string_list: one_string { let queue = Queue.create () in let str, typ, loc = $1 in @@ -757,7 +770,7 @@ string_constant: Queue.add (fst $1) queue; queue, CHAR32_T, snd $1 } -| string_constant one_string { +| string_list one_string { let queue, typ, loc = $1 in let str, typ2, _ = $2 in Queue.add str queue; @@ -768,7 +781,7 @@ string_constant: let typ3 = if typ2 = CHAR_UTF8 then CHAR_UTF8 else typ in queue, typ3, loc } -| string_constant CST_WSTRING { +| string_list CST_WSTRING { let queue, typ, loc = $1 in Queue.add (fst $2) queue; if typ <> CHAR && typ <> WCHAR_T then ( @@ -777,7 +790,7 @@ string_constant: else queue, WCHAR_T, loc } -| string_constant CST_STRING16 { +| string_list CST_STRING16 { let queue, typ, loc = $1 in Queue.add (fst $2) queue; if typ <> CHAR && typ <> CHAR16_T then ( @@ -786,7 +799,7 @@ string_constant: else queue, CHAR16_T, loc } -| string_constant CST_STRING32 { +| string_list CST_STRING32 { let queue, typ, loc = $1 in Queue.add (fst $2) queue; if typ <> CHAR && typ <> CHAR32_T then ( @@ -996,10 +1009,9 @@ static_assert_declaration: { (fst $3, "", $1) } -| STATIC_ASSERT LPAREN expression COMMA string_constant RPAREN +| STATIC_ASSERT LPAREN expression COMMA const_raw_string RPAREN { - let q,t,l = $5 in - (fst $3, queue_to_string q, $1) + (fst $3, fst $5, $1) } ; @@ -1414,9 +1426,8 @@ attributes: attributes_with_asm: /* empty */ { [] } | attribute attributes_with_asm { fst $1 :: $2 } -| ASM LPAREN string_constant RPAREN attributes - { let q,t,l = $3 in ("__asm__", - [CONSTANT(CONST_STRING (queue_to_string q, NO_ENCODING))]) :: $5 } +| ASM LPAREN const_string_or_wstring RPAREN attributes + { ("__asm__", [CONSTANT(fst $3)]) :: $5 } ; /* things like __attribute__, but no const/volatile */ @@ -1482,7 +1493,7 @@ primary_attr: | LPAREN attr RPAREN { $2 } | IDENT IDENT { CALL(VARIABLE (fst $1), [VARIABLE (fst $2)]) } | CST_INT { CONSTANT(CONST_INT (fst $1)) } -| string_constant { let q,t,l = $1 in CONSTANT(CONST_STRING (queue_to_string q, NO_ENCODING)) } +| const_string_or_wstring { CONSTANT(fst $1) } /*(* Const when it appears in * attribute lists, is translated * to aconst *)*/ From 61b65e5ca4701e040f36de075ab1da994d51aa1c Mon Sep 17 00:00:00 2001 From: Michael Schwarz Date: Fri, 18 Feb 2022 10:25:58 +0100 Subject: [PATCH 5/5] rm commented out types --- src/frontc/cparser.mly | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/frontc/cparser.mly b/src/frontc/cparser.mly index 6222d0ad5..5e4267140 100644 --- a/src/frontc/cparser.mly +++ b/src/frontc/cparser.mly @@ -363,8 +363,6 @@ let transformOffsetOf (speclist, dtype) member = %type paren_comma_expression %type arguments %type bracket_comma_expression -/* %type string_list */ -/* %type wstring_list */ %type initializer %type <(Cabs.initwhat * Cabs.init_expression) list> initializer_list