Mailing-List: contact issues-help@stdcxx.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@stdcxx.apache.org
Message-ID: <13197265.1202930710880.JavaMail.jira@brutus>
Date: Wed, 13 Feb 2008 11:25:10 -0800 (PST)
From: "Travis Vitek (JIRA)" <jira@apache.org>
To: issues@stdcxx.apache.org
Subject: [jira] Issue Comment Edited: (STDCXX-722) [gcc] use math __builtins
In-Reply-To: <2997608.1202766368568.JavaMail.jira@brutus>
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable


    [ https://issues.apache.org/jira/browse/STDCXX-722?page=3Dcom.atlassian=
.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=3D1256=
8684#action_12568684 ]=20

vitek edited comment on STDCXX-722 at 2/13/08 11:25 AM:
---------------------------------------------------------------

Martin, a few comments on your patch.

{noformat}
+#if 4 <=3D __GNUG__
+
+// define helpers to call gcc 4.x math builtins
+
+_RWSTD_NAMESPACE (__rw) {
+
+inline float __rw_atan2 (float __x, float __y)
+{
+    return __builtin_atan2f (__x, __y);
+}
+
+inline float __rw_cos (float __x)
+{
+    return __builtin_cosf (__x);
+}

+// use gcc 4 builtins for efficiency and namespace cleanliness
+#  define _RWSTD_ATAN2   _RW::__rw_atan2
+#  define _RWSTD_COS     _RW::__rw_cos
+#  define _RWSTD_COSH    _RW::__rw_cosh
+#  define _RWSTD_EXP     _RW::__rw_exp
+#  define _RWSTD_LOG     _RW::__rw_log
+#  define _RWSTD_POW     _RW::__rw_pow
+#  define _RWSTD_SIN     _RW::__rw_sin
+#  define _RWSTD_SINH    _RW::__rw_sinh
+#  define _RWSTD_SQRT    _RW::__rw_sqrt
+
+#else   // gcc < 4.0
+#  include _RWSTD_CMATH
+
+#  define _RWSTD_ATAN2   _RWSTD_C::atan2
+#  define _RWSTD_COS     _RWSTD_C::cos
+#  define _RWSTD_COSH    _RWSTD_C::cosh
+#  define _RWSTD_EXP     _RWSTD_C::exp
+#  define _RWSTD_LOG     _RWSTD_C::log
+#  define _RWSTD_POW     _RWSTD_C::pow
+#  define _RWSTD_SIN     _RWSTD_C::sin
+#  define _RWSTD_SINH    _RWSTD_C::sinh
+#  define _RWSTD_SQRT    _RWSTD_C::sqrt
+
+#endif   // gcc 4.0
{noformat}

It would be nice if these definitions and macros could go into a common hea=
der. That way the valarray transcendentals could take advantage of them.

{noformat}
+#if 4 <=3D __GNUG__
+
+    // use gcc 4.x builtins
+    complex (const float __complex__ &__rhs)
+        : _C_re (__builtin_crealf (__rhs)),
+          _C_im (__builtin_cimagf (__rhs)) { }
+
+#endif   // gcc >=3D 4.0
{noformat}

Should these new constructors be guarded by a \_RWSTD_NO_EXT_* macro? They =
are extensions, so it seems that we should provide a way to disable them in=
 strict mode. Also, C99 adds _Complex and functions for manipulating them. =
Should we add an enhancement to support these since you've opened the door =
here?

{noformat}
+#if 4 <=3D __GNUG__
+
+// use gcc 4.x C99 complex builtins
+
+#  define _RWSTD_COMPLEX_CAST(T, arg)                   \
+    _RWSTD_REINTERPRET_CAST (const T __complex__&, arg)
+
+
+_RWSTD_SPECIALIZED_FUNCTION inline float
+abs (const complex<float> &__x)
+{
+    return __builtin_cabsf (_RWSTD_COMPLEX_CAST (float, __x));
+}
+
{noformat}

I think \_RWSTD_COMPLEX_CAST is dangerous at best. I don't know for sure ho=
w a GCC \_\_complex__ is laid out in memory. If I were to guess, I'd expect=
 a `float \_\_complex\_\_' to be two contiguous floats like `float \_C_comp=
lex [2]', which may be completely different from `float \_C_real, \_C_imag'=
 if any padding is inserted. Fortunately the [documentation|http://gcc.gnu.=
org/onlinedocs/gcc-4.2.3/gcc/Complex.html#Complex] says some interesting th=
ings about the layout of a \_\_complex__.=20

It also appears that the IBM compiler supports a similar set of [functions|=
http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=3D/co=
m.ibm.xlcpp8a.doc/compiler/ref/bif_fp.htm]. It might be nice to add support=
 for them while we're in there.

      was (Author: vitek):
    Martin, a few comments on your patch.

{noformat}
+#if 4 <=3D __GNUG__
+
+// define helpers to call gcc 4.x math builtins
+
+_RWSTD_NAMESPACE (__rw) {
+
+inline float __rw_atan2 (float __x, float __y)
+{
+    return __builtin_atan2f (__x, __y);
+}
+
+inline float __rw_cos (float __x)
+{
+    return __builtin_cosf (__x);
+}

+// use gcc 4 builtins for efficiency and namespace cleanliness
+#  define _RWSTD_ATAN2   _RW::__rw_atan2
+#  define _RWSTD_COS     _RW::__rw_cos
+#  define _RWSTD_COSH    _RW::__rw_cosh
+#  define _RWSTD_EXP     _RW::__rw_exp
+#  define _RWSTD_LOG     _RW::__rw_log
+#  define _RWSTD_POW     _RW::__rw_pow
+#  define _RWSTD_SIN     _RW::__rw_sin
+#  define _RWSTD_SINH    _RW::__rw_sinh
+#  define _RWSTD_SQRT    _RW::__rw_sqrt
+
+#else   // gcc < 4.0
+#  include _RWSTD_CMATH
+
+#  define _RWSTD_ATAN2   _RWSTD_C::atan2
+#  define _RWSTD_COS     _RWSTD_C::cos
+#  define _RWSTD_COSH    _RWSTD_C::cosh
+#  define _RWSTD_EXP     _RWSTD_C::exp
+#  define _RWSTD_LOG     _RWSTD_C::log
+#  define _RWSTD_POW     _RWSTD_C::pow
+#  define _RWSTD_SIN     _RWSTD_C::sin
+#  define _RWSTD_SINH    _RWSTD_C::sinh
+#  define _RWSTD_SQRT    _RWSTD_C::sqrt
+
+#endif   // gcc 4.0
{noformat}

It would be nice if these definitions and macros could go into a common hea=
der. That way the valarray transcendentals could take advantage of them.

{noformat}
+#if 4 <=3D __GNUG__
+
+    // use gcc 4.x builtins
+    complex (const float __complex__ &__rhs)
+        : _C_re (__builtin_crealf (__rhs)),
+          _C_im (__builtin_cimagf (__rhs)) { }
+
+#endif   // gcc >=3D 4.0
{noformat}

Should these new constructors be guarded by a _RWSTD_NO_EXT_* macro? They a=
re extensions, so it seems that we should provide a way to disable them in =
strict mode. Also, C99 adds _Complex and functions for manipulating them. S=
hould we add an enhancement to support these since you've opened the door h=
ere?

{noformat}
+#if 4 <=3D __GNUG__
+
+// use gcc 4.x C99 complex builtins
+
+#  define _RWSTD_COMPLEX_CAST(T, arg)                   \
+    _RWSTD_REINTERPRET_CAST (const T __complex__&, arg)
+
+
+_RWSTD_SPECIALIZED_FUNCTION inline float
+abs (const complex<float> &__x)
+{
+    return __builtin_cabsf (_RWSTD_COMPLEX_CAST (float, __x));
+}
+
{noformat}

I think _RWSTD_COMPLEX_CAST() is dangerous at best. I don't know for sure h=
ow a __complex__ is laid out in memory. If I were to guess, I'd expect a 'f=
loat __complex__' to be two contiguous floats like 'float _C_complex [2]', =
which may be completely different from 'float _C_real, _C_imag' if any padd=
ing is inserted. Fortunately the [documentation|http://gcc.gnu.org/onlinedo=
cs/gcc-4.2.3/gcc/Complex.html#Complex] says some interesting things about t=
he layout of a __complex__.=20

It also appears that the IBM compiler supports a similar set of [functions|=
http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=3D/co=
m.ibm.xlcpp8a.doc/compiler/ref/bif_fp.htm]. It might be nice to add support=
 for them while we're in there.
 =20
> [gcc] use math __builtins
> -------------------------
>
>                 Key: STDCXX-722
>                 URL: https://issues.apache.org/jira/browse/STDCXX-722
>             Project: C++ Standard Library
>          Issue Type: Sub-task
>          Components: 26. Numerics
>    Affects Versions: 4.2.0
>         Environment: gcc 4
>            Reporter: Martin Sebor
>            Assignee: Martin Sebor
>            Priority: Minor
>             Fix For: 4.2.1
>
>         Attachments: stdcxx-722.diff, stdcxx-722.log
>
>   Original Estimate: 2h
>          Time Spent: 4h
>  Remaining Estimate: 0h
>
> For better efficiency and to reduce namespace pollution we can replace al=
l the math functions used in [<complex>|http://svn.apache.org/repos/asf/std=
cxx/trunk/include/complex] with gcc's built-in  equivalents.
> Quoting from section [5.46 Other built-in functions provided by GCC|http:=
//gcc.gnu.org/onlinedocs/gcc-4.1.2/gcc/Other-Builtins.html#Other-Builtins] =
of the gcc online manual:
> {quote}
> The ISO C99 functions {{_Exit}}, {{acoshf}}, {{acoshl}}, {{acosh}}, {{asi=
nhf}}, {{asinhl}}, {{asinh}}, {{atanhf}}, {{atanhl}}, {{atanh}}, {{cabsf}},=
 {{cabsl}}, {{cabs}}, {{cacosf}}, {{cacoshf}}, {{cacoshl}}, {{cacosh}}, {{c=
acosl}}, {{cacos}}, {{cargf}}, {{cargl}}, {{carg}}, {{casinf}}, {{casinhf}}=
, {{casinhl}}, {{casinh}}, {{casinl}}, {{casin}}, {{catanf}}, {{catanhf}}, =
{{catanhl}}, {{catanh}}, {{catanl}}, {{catan}}, {{cbrtf}}, {{cbrtl}}, {{cbr=
t}}, {{ccosf}}, {{ccoshf}}, {{ccoshl}}, {{ccosh}}, {{ccosl}}, {{ccos}}, {{c=
expf}}, {{cexpl}}, {{cexp}}, {{cimagf}}, {{cimagl}}, {{cimag}}, {{clogf}}, =
{{clogl}}, {{clog}}, {{conjf}}, {{conjl}}, {{conj}}, {{copysignf}}, {{copys=
ignl}}, {{copysign}}, {{cpowf}}, {{cpowl}}, {{cpow}}, {{cprojf}}, {{cprojl}=
}, {{cproj}}, {{crealf}}, {{creall}}, {{creal}}, {{csinf}}, {{csinhf}}, {{c=
sinhl}}, {{csinh}}, {{csinl}}, {{csin}}, {{csqrtf}}, {{csqrtl}}, {{csqrt}},=
 {{ctanf}}, {{ctanhf}}, {{ctanhl}}, {{ctanh}}, {{ctanl}}, {{ctan}}, {{erfcf=
}}, {{erfcl}}, {{erfc}}, {{erff}}, {{erfl}}, {{erf}}, {{exp2f}}, {{exp2l}},=
 {{exp2}}, {{expm1f}}, {{expm1l}}, {{expm1}}, {{fdimf}}, {{fdiml}}, {{fdim}=
}, {{fmaf}}, {{fmal}}, {{fmaxf}}, {{fmaxl}}, {{fmax}}, {{fma}}, {{fminf}}, =
{{fminl}}, {{fmin}}, {{hypotf}}, {{hypotl}}, {{hypot}}, {{ilogbf}}, {{ilogb=
l}}, {{ilogb}}, {{imaxabs}}, {{isblank}}, {{iswblank}}, {{lgammaf}}, {{lgam=
mal}}, {{lgamma}}, {{llabs}}, {{llrintf}}, {{llrintl}}, {{llrint}}, {{llrou=
ndf}}, {{llroundl}}, {{llround}}, {{log1pf}}, {{log1pl}}, {{log1p}}, {{log2=
f}}, {{log2l}}, {{log2}}, {{logbf}}, {{logbl}}, {{logb}}, {{lrintf}}, {{lri=
ntl}}, {{lrint}}, {{lroundf}}, {{lroundl}}, {{lround}}, {{nearbyintf}}, {{n=
earbyintl}}, {{nearbyint}}, {{nextafterf}}, {{nextafterl}}, {{nextafter}}, =
{{nexttowardf}}, {{nexttowardl}}, {{nexttoward}}, {{remainderf}}, {{remaind=
erl}}, {{remainder}}, {{remquof}}, {{remquol}}, {{remquo}}, {{rintf}}, {{ri=
ntl}}, {{rint}}, {{roundf}}, {{roundl}}, {{round}}, {{scalblnf}}, {{scalbln=
l}}, {{scalbln}}, {{scalbnf}}, {{scalbnl}}, {{scalbn}}, {{snprintf}}, {{tga=
mmaf}}, {{tgammal}}, {{tgamma}}, {{truncf}}, {{truncl}}, {{trunc}}, {{vfsca=
nf}}, {{vscanf}}, {{vsnprintf}} and {{vsscanf}} are handled as built-in fun=
ctions except in strict ISO C90 mode ({{-ansi}} or {{-std=3Dc89}}).
> There are also built-in versions of the ISO C99 functions {{acosf}}, {{ac=
osl}}, {{asinf}}, {{asinl}}, {{atan2f}}, {{atan2l}}, {{atanf}}, {{atanl}}, =
{{ceilf}}, {{ceill}}, {{cosf}}, {{coshf}}, {{coshl}}, {{cosl}}, {{expf}}, {=
{expl}}, {{fabsf}}, {{fabsl}}, {{floorf}}, {{floorl}}, {{fmodf}}, {{fmodl}}=
, {{frexpf}}, {{frexpl}}, {{ldexpf}}, {{ldexpl}}, {{log10f}}, {{log10l}}, {=
{logf}}, {{logl}}, {{modfl}}, {{modf}}, {{powf}}, {{powl}}, {{sinf}}, {{sin=
hf}}, {{sinhl}}, {{sinl}}, {{sqrtf}}, {{sqrtl}}, {{tanf}}, {{tanhf}}, {{tan=
hl}} and {{tanl}} that are recognized in any mode since ISO C90 reserves th=
ese names for the purpose to which ISO C99 puts them. All these functions h=
ave corresponding versions prefixed with {{__builtin_}}.
> {quote}

--=20
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.