Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add rsqrt with prec step on NEON float 32 #59

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/mipp.h
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,7 @@ template <typename T> inline reg neg (const reg, const msk)
template <typename T> inline reg abs (const reg) { errorMessage<T>("abs"); exit(-1); }
template <typename T> inline reg sqrt (const reg) { errorMessage<T>("sqrt"); exit(-1); }
template <typename T> inline reg rsqrt (const reg) { errorMessage<T>("rsqrt"); exit(-1); }
template <typename T> inline reg rsqrt_prec (const reg v) { return rsqrt<T>(v); }
template <typename T> inline reg log (const reg) { errorMessage<T>("log"); exit(-1); }
template <typename T> inline reg exp (const reg) { errorMessage<T>("exp"); exit(-1); }
template <typename T> inline reg sin (const reg) { errorMessage<T>("sin"); exit(-1); }
Expand Down
7 changes: 7 additions & 0 deletions include/mipp_impl_NEON.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -2815,6 +2815,13 @@
return vrsqrteq_f32(v1);
}

template <>
inline reg rsqrt_prec<float>(const reg v1) {
float32x4_t approx = vrsqrteq_f32(v1);
return vrsqrtsq_f32(v1 * approx, approx) * approx;
}


// ----------------------------------------------------------------------------------------------------------- sqrt
#ifdef __aarch64__
template <>
Expand Down
3 changes: 3 additions & 0 deletions include/mipp_object.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ public:
inline Reg<T> abs () const { return mipp::abs <T>(r); }
inline Reg<T> sqrt () const { return mipp::sqrt <T>(r); }
inline Reg<T> rsqrt () const { return mipp::rsqrt <T>(r); }
inline Reg<T> rsqrt_prec () const { return mipp::rsqrt_prec <T>(r); }
inline Reg<T> log () const { return mipp::log <T>(r); }
inline Reg<T> exp () const { return mipp::exp <T>(r); }
inline Reg<T> sin () const { return mipp::sin <T>(r); }
Expand Down Expand Up @@ -305,6 +306,7 @@ public:
inline Reg<T> abs () const { return std::abs(r); }
inline Reg<T> sqrt () const { return (T)std::sqrt(r); }
inline Reg<T> rsqrt () const { return (T)(1 / std::sqrt(r)); }
inline Reg<T> rsqrt_prec () const { return (T)(1 / std::sqrt(r)); }
inline Reg<T> log () const { return (T)std::log(r); }
inline Reg<T> exp () const { return (T)std::exp(r); }
inline Reg<T> sin () const { return (T)std::sin(r); }
Expand Down Expand Up @@ -920,6 +922,7 @@ template <typename T> inline Reg<T> copysign (const Reg<T> v1, c
template <typename T> inline Reg<T> abs (const Reg<T> v) { return v.abs(); }
template <typename T> inline Reg<T> sqrt (const Reg<T> v) { return v.sqrt(); }
template <typename T> inline Reg<T> rsqrt (const Reg<T> v) { return v.rsqrt(); }
template <typename T> inline Reg<T> rsqrt_prec (const Reg<T> v) { return v.rsqrt_prec(); }
template <typename T> inline Reg<T> log (const Reg<T> v) { return v.log(); }
template <typename T> inline Reg<T> exp (const Reg<T> v) { return v.exp(); }
template <typename T> inline Reg<T> sin (const Reg<T> v) { return v.sin(); }
Expand Down