Perform unaligned writes via FFI when necessary (#587)

* Perform unaligned writes via FFI when necessary * Be a bit less ignorant about the C preprocessor * Test unaligned uses of Builders a bit more * Attempt to remove Float-related unaligned accesses * fix new old-ghc stuff * todo: figure out how to get cpp to warn about this mistake * Fix CPP mistakes * Fix another stupid CPP mistake cpp-options: -Werror=undef is pulling its weight already. * Shut up cabal check about -Werror=undef * Omit conditionally-used C bindings when unused * Revert stimes-related changes * Add question about lowerTable * Revert "Omit conditionally-used C bindings when unused" This reverts commit 145cdac. * Lots of mostly Float/Double-related tweaks - Haskell unaligned write functions now live in a new module: Data.ByteString.Utils.UnalignedWrite - The word*HexFixed functions now use unaligned writes; likewise Data.ByteString.Builder.RealFloat.Internal.copyWord16. - An FFI workaround for unaligned Float/Double writes was added. - The data tables in Data.ByteString.Builder.Prim.Internal.Base16 and Data.ByteString.Builder.RealFloat.{D,F}2S now live in the new file cbits/aligned-static-hs-data.c so that we can fearlessly perform aligned reads from them. - The static Word64 data tables are now stored in host-byte-order instead of always little-endian. - Data.ByteString.Builder.RealFloat.Internal.digit_table is now a static data blob instead of a CAF. - All CPP around castFloatToWord32/castDoubleToWord64 now lives in Data.ByteString.Builder.Prim.Internal.Floating. * Update comment about locaiton of RealFloat tables * Remove useless temporary CPP guard * Re-add words "source code" in comment
haskell · Sep 15, 2023 · 16d6b7e · 16d6b7e
1 parent 6e6b115
commit 16d6b7e
Show file tree

Hide file tree

Showing 16 changed files with 1,085 additions and 907 deletions.
diff --git a/Data/ByteString/Builder/Prim/ASCII.hs b/Data/ByteString/Builder/Prim/ASCII.hs
@@ -81,6 +81,7 @@ import Data.ByteString.Builder.Prim.Binary
 import Data.ByteString.Builder.Prim.Internal
 import Data.ByteString.Builder.Prim.Internal.Floating
 import Data.ByteString.Builder.Prim.Internal.Base16
+import Data.ByteString.Utils.UnalignedWrite
 
 import Data.Char (ord)
 
@@ -231,8 +232,9 @@ wordHex = caseWordSize_32_64
 -- | Encode a 'Word8' using 2 nibbles (hexadecimal digits).
 {-# INLINE word8HexFixed #-}
 word8HexFixed :: FixedPrim Word8
-word8HexFixed = fixedPrim 2 $
-    \x op -> poke (castPtr op) =<< encode8_as_16h lowerTable x
+word8HexFixed = fixedPrim 2 $ \x op -> do
+  enc <- encode8_as_16h lowerTable x
+  unalignedWriteU16 enc op
 
 -- | Encode a 'Word16' using 4 nibbles.
 {-# INLINE word16HexFixed #-}
@@ -247,6 +249,7 @@ word32HexFixed :: FixedPrim Word32
 word32HexFixed =
     (\x -> (fromIntegral $ x `shiftR` 16, fromIntegral x))
       >$< pairF word16HexFixed word16HexFixed
+
 -- | Encode a 'Word64' using 16 nibbles.
 {-# INLINE word64HexFixed #-}
 word64HexFixed :: FixedPrim Word64

diff --git a/Data/ByteString/Builder/Prim/Binary.hs b/Data/ByteString/Builder/Prim/Binary.hs
@@ -1,5 +1,12 @@
 {-# LANGUAGE CPP #-}
 {-# LANGUAGE Trustworthy #-}
+
+{-# LANGUAGE TypeApplications #-}
+
+#include "MachDeps.h"
+#include "bytestring-cpp-macros.h"
+
+
 -- | Copyright   : (c) 2010-2011 Simon Meier
 -- License       : BSD3-style (see LICENSE)
 --
@@ -54,11 +61,10 @@ module Data.ByteString.Builder.Prim.Binary (
 
 import Data.ByteString.Builder.Prim.Internal
 import Data.ByteString.Builder.Prim.Internal.Floating
+import Data.ByteString.Utils.UnalignedWrite
 
 import Foreign
 
-#include "MachDeps.h"
-
 ------------------------------------------------------------------------------
 -- Binary encoding
 ------------------------------------------------------------------------------
@@ -70,7 +76,7 @@ import Foreign
 --
 {-# INLINE word8 #-}
 word8 :: FixedPrim Word8
-word8 = storableToF
+word8 = fixedPrim 1 (flip poke) -- Word8 is always aligned
 
 --
 -- We rely on the fromIntegral to do the right masking for us.
@@ -143,23 +149,25 @@ word64LE = word64Host
 --
 {-# INLINE wordHost #-}
 wordHost :: FixedPrim Word
-wordHost = storableToF
+wordHost = case finiteBitSize (0 :: Word) of
+  32 -> fromIntegral @Word @Word32 >$< word32Host
+  64 -> fromIntegral @Word @Word64 >$< word64Host
+  _ -> error "Data.ByteString.Builder.Prim.Binary.wordHost: unexpected word size"
 
 -- | Encoding 'Word16's in native host order and host endianness.
 {-# INLINE word16Host #-}
 word16Host :: FixedPrim Word16
-word16Host = storableToF
+word16Host = fixedPrim 2 unalignedWriteU16
 
 -- | Encoding 'Word32's in native host order and host endianness.
 {-# INLINE word32Host #-}
 word32Host :: FixedPrim Word32
-word32Host = storableToF
+word32Host = fixedPrim 4 unalignedWriteU32
 
 -- | Encoding 'Word64's in native host order and host endianness.
 {-# INLINE word64Host #-}
 word64Host :: FixedPrim Word64
-word64Host = storableToF
-
+word64Host = fixedPrim 8 unalignedWriteU64
 
 ------------------------------------------------------------------------------
 -- Int encodings
@@ -215,22 +223,22 @@ int64LE = fromIntegral >$< word64LE
 --
 {-# INLINE intHost #-}
 intHost :: FixedPrim Int
-intHost = storableToF
+intHost = fromIntegral @Int @Word >$< wordHost
 
 -- | Encoding 'Int16's in native host order and host endianness.
 {-# INLINE int16Host #-}
 int16Host :: FixedPrim Int16
-int16Host = storableToF
+int16Host = fromIntegral @Int16 @Word16 >$< word16Host
 
 -- | Encoding 'Int32's in native host order and host endianness.
 {-# INLINE int32Host #-}
 int32Host :: FixedPrim Int32
-int32Host = storableToF
+int32Host = fromIntegral @Int32 @Word32 >$< word32Host
 
 -- | Encoding 'Int64's in native host order and host endianness.
 {-# INLINE int64Host #-}
 int64Host :: FixedPrim Int64
-int64Host = storableToF
+int64Host = fromIntegral @Int64 @Word64 >$< word64Host
 
 -- IEEE Floating Point Numbers
 ------------------------------
@@ -261,9 +269,9 @@ doubleLE = encodeDoubleViaWord64F word64LE
 --
 {-# INLINE floatHost #-}
 floatHost :: FixedPrim Float
-floatHost = storableToF
+floatHost = fixedPrim (sizeOf @Float 0) unalignedWriteFloat
 
 -- | Encode a 'Double' in native host order and host endianness.
 {-# INLINE doubleHost #-}
 doubleHost :: FixedPrim Double
-doubleHost = storableToF
+doubleHost = fixedPrim (sizeOf @Double 0) unalignedWriteDouble
diff --git a/Data/ByteString/Builder/Prim/Internal.hs b/Data/ByteString/Builder/Prim/Internal.hs
@@ -73,6 +73,7 @@ import Foreign
 import Prelude hiding (maxBound)
 
 #include "MachDeps.h"
+#include "bytestring-cpp-macros.h"
 
 ------------------------------------------------------------------------------
 -- Supporting infrastructure
@@ -199,13 +200,7 @@ liftFixedToBounded = toB
 
 {-# INLINE CONLIKE storableToF #-}
 storableToF :: forall a. Storable a => FixedPrim a
--- Not all architectures are forgiving of unaligned accesses; whitelist ones
--- which are known not to trap (either to the kernel for emulation, or crash).
-#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
-    || ((defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) \
-        && defined(__ARM_FEATURE_UNALIGNED)) \
-    || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
-    || defined(powerpc64le_HOST_ARCH)
+#if HS_UNALIGNED_POKES_OK
 storableToF = FP (sizeOf (undefined :: a)) (\x op -> poke (castPtr op) x)
 #else
 storableToF = FP (sizeOf (undefined :: a)) $ \x op ->

diff --git a/Data/ByteString/Builder/Prim/Internal/Base16.hs b/Data/ByteString/Builder/Prim/Internal/Base16.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE Trustworthy #-}
 {-# LANGUAGE MagicHash #-}
 -- |
 -- Copyright   : (c) 2011 Simon Meier
@@ -22,37 +21,25 @@ module Data.ByteString.Builder.Prim.Internal.Base16 (
   , encode8_as_16h
   ) where
 
-import           Foreign
-import           GHC.Exts (Addr#, Ptr(..))
+import Foreign
+import Foreign.C.Types
+import GHC.Exts (Addr#, Ptr(..))
 
 -- Creating the encoding table
 ------------------------------
 
 -- | An encoding table for Base16 encoding.
 data EncodingTable = EncodingTable Addr#
 
+foreign import ccall "&hs_bytestring_lower_hex_table"
+  c_lower_hex_table :: Ptr CChar
+
 -- | The encoding table for hexadecimal values with lower-case characters;
 -- e.g., deadbeef.
 {-# NOINLINE lowerTable #-}
 lowerTable :: EncodingTable
-lowerTable = EncodingTable
-    "000102030405060708090a0b0c0d0e0f\
-    \101112131415161718191a1b1c1d1e1f\
-    \202122232425262728292a2b2c2d2e2f\
-    \303132333435363738393a3b3c3d3e3f\
-    \404142434445464748494a4b4c4d4e4f\
-    \505152535455565758595a5b5c5d5e5f\
-    \606162636465666768696a6b6c6d6e6f\
-    \707172737475767778797a7b7c7d7e7f\
-    \808182838485868788898a8b8c8d8e8f\
-    \909192939495969798999a9b9c9d9e9f\
-    \a0a1a2a3a4a5a6a7a8a9aaabacadaeaf\
-    \b0b1b2b3b4b5b6b7b8b9babbbcbdbebf\
-    \c0c1c2c3c4c5c6c7c8c9cacbcccdcecf\
-    \d0d1d2d3d4d5d6d7d8d9dadbdcdddedf\
-    \e0e1e2e3e4e5e6e7e8e9eaebecedeeef\
-    \f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"#
-
+lowerTable = case c_lower_hex_table of
+  Ptr p# -> EncodingTable p#
 
 -- | Encode an octet as 16bit word comprising both encoded nibbles ordered
 -- according to the host endianness. Writing these 16bit to memory will write

diff --git a/Data/ByteString/Builder/Prim/Internal/Floating.hs b/Data/ByteString/Builder/Prim/Internal/Floating.hs
@@ -1,5 +1,8 @@
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE Trustworthy #-}
+{-# LANGUAGE CPP #-}
+
+#include "MachDeps.h"
+#include "bytestring-cpp-macros.h"
+
 -- |
 -- Copyright   : (c) 2010 Simon Meier
 --
@@ -12,45 +15,74 @@
 -- Conversion of 'Float's and 'Double's to 'Word32's and 'Word64's.
 --
 module Data.ByteString.Builder.Prim.Internal.Floating
-    (
-      -- coerceFloatToWord32
-    -- , coerceDoubleToWord64
-    encodeFloatViaWord32F
+  ( castFloatToWord32
+  , castDoubleToWord64
+  , encodeFloatViaWord32F
   , encodeDoubleViaWord64F
   ) where
 
-import Foreign
 import Data.ByteString.Builder.Prim.Internal
+import Data.Word
+
+#if HS_CAST_FLOAT_WORD_OPS_AVAILABLE
+import GHC.Float (castFloatToWord32, castDoubleToWord64)
+#else
+import Foreign.Marshal.Utils
+import Foreign.Storable
+import Foreign.Ptr
 
+import Data.ByteString.Internal.Type (unsafeDupablePerformIO)
 {-
-We work around ticket http://ghc.haskell.org/trac/ghc/ticket/4092 using the
-FFI to store the Float/Double in the buffer and peek it out again from there.
+We work around ticket http://ghc.haskell.org/trac/ghc/ticket/4092 by
+storing the Float/Double in a temp buffer and peeking it out again from there.
 -}
 
+-- | Interpret a 'Float' as a 'Word32' as if through a bit-for-bit copy.
+-- (fallback if not available through GHC.Float)
+--
+-- e.g
+--
+-- > showHex (castFloatToWord32 1.0) [] = "3f800000"
+{-# NOINLINE castFloatToWord32 #-}
+castFloatToWord32 :: Float -> Word32
+#if (SIZEOF_HSFLOAT != SIZEOF_WORD32) || (ALIGNMENT_HSFLOAT < ALIGNMENT_WORD32)
+  #error "don't know how to cast Float to Word32"
+#endif
+castFloatToWord32 x = unsafeDupablePerformIO (with x (peek . castPtr))
 
--- | Encode a 'Float' using a 'Word32' encoding.
+-- | Interpret a 'Double' as a 'Word64' as if through a bit-for-bit copy.
+-- (fallback if not available through GHC.Float)
 --
--- PRE: The 'Word32' encoding must have a size of at least 4 bytes.
+-- e.g
+--
+-- > showHex (castDoubleToWord64 1.0) [] = "3ff0000000000000"
+{-# NOINLINE castDoubleToWord64 #-}
+castDoubleToWord64 :: Double -> Word64
+#if (SIZEOF_HSDOUBLE != SIZEOF_WORD64) || (ALIGNMENT_HSDOUBLE < ALIGNMENT_WORD64)
+  #error "don't know how to cast Double to Word64"
+#endif
+castDoubleToWord64 x = unsafeDupablePerformIO (with x (peek . castPtr))
+#endif
+
+
+-- | Encode a 'Float' using a 'Word32' encoding.
 {-# INLINE encodeFloatViaWord32F #-}
 encodeFloatViaWord32F :: FixedPrim Word32 -> FixedPrim Float
-encodeFloatViaWord32F w32fe
-  | size w32fe < sizeOf (undefined :: Float) =
-      error "encodeFloatViaWord32F: encoding not wide enough"
-  | otherwise = fixedPrim (size w32fe) $ \x op -> do
-      poke (castPtr op) x
-      x' <- peek (castPtr op)
-      runF w32fe x' op
+#if HS_CAST_FLOAT_WORD_OPS_AVAILABLE
+encodeFloatViaWord32F = (castFloatToWord32 >$<)
+#else
+encodeFloatViaWord32F w32fe = fixedPrim (size w32fe) $ \x op -> do
+  x' <- with x (peek . castPtr)
+  runF w32fe x' op
+#endif
 
 -- | Encode a 'Double' using a 'Word64' encoding.
---
--- PRE: The 'Word64' encoding must have a size of at least 8 bytes.
 {-# INLINE encodeDoubleViaWord64F #-}
 encodeDoubleViaWord64F :: FixedPrim Word64 -> FixedPrim Double
-encodeDoubleViaWord64F w64fe
-  | size w64fe < sizeOf (undefined :: Float) =
-      error "encodeDoubleViaWord64F: encoding not wide enough"
-  | otherwise = fixedPrim (size w64fe) $ \x op -> do
-      poke (castPtr op) x
-      x' <- peek (castPtr op)
-      runF w64fe x' op
-
+#if HS_CAST_FLOAT_WORD_OPS_AVAILABLE
+encodeDoubleViaWord64F = (castDoubleToWord64 >$<)
+#else
+encodeDoubleViaWord64F w64fe = fixedPrim (size w64fe) $ \x op -> do
+  x' <- with x (peek . castPtr)
+  runF w64fe x' op
+#endif