Вы находитесь на странице: 1из 31

Stream Fusion for Haskell Arrays

Don Stewart
Galois Inc
Haskell's Data Types
● Beautiful algebraic data types:

data Set a
= Tip
| Bin !Int a !(Set a) !(Set a)

● Concise notation, inductive reasoning, type math!


● Polymorphic, strongly typed, side effect free
● Efficient. GCd. Strict, or lazy, or roll your own
● Pointers, pointers...
But for real speed...
Sometimes we need unboxed, flat structures:
Arrays in Haskell
biodiversity!
Data.Array Foreign.ForeignPtr
Data.Array.Diff Data.ByteString
Data.Array.IO Data.ByteString.Lazy
Data.Array.Storable Data.PackedString
Data.Array.ST Data.StorableVector
Data.Array.Unboxed Data.Vec
Data.Array.CArray BLAS.Matrix
Data.ArrayBZ Data.Packed
Foreign.Array Data.Packed.Vector
Foreign.Ptr Data.Packed.Matrix
The Perfect Array Type

1.Very, very efficient. Ruthlessly fast.


2.Polymorphic
3.Pure
4.Rich list-like API
5.Compatible with C arrays, other arrays
Data Parallel Haskell
● Project to target large multicore systems:
Chakravarty, Leshchinksiy, Peyton-Jones, Keller, Marlow
● Parallel, distributed arrays, with good interface
● Built from flat, unlifted arrays
● The core of a better array type for mortals
● Built around array fusion
“Stream Fusion: From Lists to Streams to Nothing at All”
Coutts, Leshchinskiy, Stewar.t 2007.
● Key technique for making arrays flexible and fast
uvector: fast, flat, fused arrays
Two data types: mutable arrays and pure arrays
data BUArr e =
BUArr !Int
!Int
ByteArray#

data MBUArr s e =
MBUArr !Int
(MutableByteArray# s)

● Fill the mutable array, freeze it, and get free substrings,
and persistance.
● Low level Haskell
Primitive operations
length :: BUArr e -> Int
length (BUArr _ n _) = n

class UAE e where


sizeBU :: Int -> e -> Int
indexBU :: BUArr e -> Int -> e

readMBU :: MBUArr s e -> Int -> ST s e


writeMBU :: MBUArr s e -> Int -> e -> ST s ()

newMBU :: UAE e => Int -> ST s (MBUArr s e)


Conversions
Zero-copying conversion from mutable to pure

unsafeFreezeMBU
:: MBUArr s e -> Int -> ST s (BUArr e)

unsafeFreezeMBU (MBUArr m mba) n =


checkLen "unsafeFreezeMBU" m n $
ST $ \s ->
(# s, BUArr 0 n (unsafeCoerce# mba) #)

Bounds checking compiled out if -funsafe


Array element instances
Simple per-type representation choices
instance UAE () where
sizeBU _ _ = 0
indexBU (BUArr _ _ _) (I# _) = ()

readMBU (MBUArr _ _) (I# _) = ST $ \s ->


(# s, () #)
writeMBU (MBUArr _ _) (I# _) () = ST $ \s ->
(# s, () #)
Goal 1: Efficiency
Can be a bit fancier...
instance UAE Bool where
readMBU (MBUArr n mba) i@(I# i#) = ST $ \s ->
case readWordArray# mba (bOOL_INDEX i#) s of
(# s2, r# #) ->
(# s2, (r# `and#` bOOL_BIT i#)
`neWord#` int2Word# 0# #

bOOL_INDEX :: Int# -> Int#


#if SIZEOF_HSWORD == 4
bOOL_INDEX i# = i# `uncheckedIShiftRA#` 5#
#elif SIZEOF_HSWORD == 8
bOOL_INDEX i# = i# `uncheckedIShiftRA#` 6#
#endif
Relax. Low level stuff done.
Goal 2: polymorphic
Abstract over the primitive arrays
class UA e where

data UArr e
data MUArr e :: * -> *

lengthU :: UArr e -> Int


indexU :: UArr e -> Int -> e

lengthMU :: MUArr e s -> Int


newMU :: Int -> ST s (MUArr e s)
freezeMU :: MUArr e s -> Int -> ST s (UArr e)

readMU :: MUArr e s -> Int -> ST s e


writeMU :: MUArr e s -> Int -> e -> ST s ()
Goal 3a: Pure
Introducing UArr .. purely!

newU :: UA e
=> Int
-> (forall s. MUArr e s -> ST s Int)
-> UArr e

newU n init =
runST (do
ma <- newMU n
n' <- init ma
freezeMU ma n'
)

Mutation encapsulate in ST monad.


Flexible array representations
instance UA () where
newtype UArr () = UAUnit Int
newtype MUArr () s = MUAUnit Int

lengthU (UAUnit n) = n
indexU (UAUnit _) _ = ()
sliceU (UAUnit _) _ n = UAUnit n

lengthMU (MUAUnit n) = n
newMU n = return $ MUAUnit n
readMU (MUAUnit _) _ = return ()
writeMU (MUAUnit _) _ _= return ()

freezeMU (MUAUnit _) n = return $ UAUnit n


Goal 4: list-like operations
data (:*:) a b = !a :*: !b

instance (UA a, UA b) => UA (a :*: b) where

data UArr (a :*: b)


= UAProd !(UArr a) !(UArr b)

data MUArr (a :*: b) s


= MUAProd !(MUArr a s) !(MUArr b s)

indexU (UAProd l r) i =
indexU l i :*: indexU r i
Support for numeric stuff
instance (RealFloat a, UA a)
=> UA (Complex a) where

newtype UArr (Complex a)


= UAComplex (UArr (a :*: a))
newtype MUArr (Complex a) s
= MUAComplex (MUArr (a :*: a) s)

indexU (UAComplex arr) i =


case indexU arr i of
(a :*: b) -> a :+ b
But that's not the end

• Strict, pure arrays are a bit too inefficient


• Too much copying, not enough sharing
• Impure languages would just mutate inplace
• But we need to find some other way to deforest.
Goal 1&2: Efficiency
Stream Fusion
data Step s a = Done
| Skip !s
| Yield !a !s

data Stream a = exists s.


Stream (s -> Step s a) !s Int
● Abstract sequence transformers
● Non-recursive

● General fusion rule for removing intermediates

● We'll convert arrays into abstract sequences

● Non-recursive things we can optimise ruthlessly


Conversion to and from arrays
streamU :: UA a => UArr a -> Stream a

streamU arr = Stream next 0 n


where
n = lengthU arr

next i | i == n = Done
| otherwise = Yield (arr `indexU` i) (i+1)

unstreamU :: UA a => Stream a -> UArr a

unstreamU st@(Stream next s n) =


newDynU n (\marr -> unstreamMU marr st)
Convert recursive array loops
to non-recursive streams

mapU :: (UA e, UA e')


=> (e -> e') -> UArr e -> UArr e'
mapU f = unstreamU . mapS f . streamU

headU :: UA e => UArr e -> e


headU = headS . StreamU

lastU :: UA e => UArr e -> e


lastU = foldlU (flip const)
The fusion rule
● Use rules to remove redundant conversions

"streamU/unstreamU" forall s.
streamU (unstreamU s) = s

● Compositions of non-recursive functions left over


● Then combine streams using general

optimisations
● Arrays at the end will be fused from the

combined stream pipeline


Filling a mutable array
unstreamMU ::
UA a => MUArr a s -> Stream a -> ST s Int

unstreamMU marr (Stream next s n) = fill s 0


where
fill s !i = case next s of
Done -> return i
Skip s' -> s' `seq` fill s' i
Yield x s' -> s' `seq` do
writeMU marr i x
fill s' (i+1)
New streams
emptyS :: Stream a
emptyS = Stream (const Done) () 0

replicateS :: Int -> a -> Stream a


replicateS n x = Stream next 0 n
where
next i | i == n = Done
| otherwise = Yield x (i+1)
enumFromToS
:: (Ord a, RealFrac a) => a -> a -> Stream a

enumFromToS n m = Stream next n (truncate (m - n))


where
lim = m + 1/2
next s | s > lim = Done
| otherwise = Yield s (s+1)
Transforming streams
mapS :: (a -> b) -> Stream a -> Stream b
mapS f (Stream next s n) = Stream next' s n
where
next' s = case next s of
Done -> Done
Skip s' -> Skip s'
Yield x s' -> Yield (f x) s'

foldS :: (b -> a -> b) -> b -> Stream a -> b


foldS f z (Stream next s _) = fold z s
where
fold !z s = case next s of
Yield x !s' -> fold (f z x) s'
Skip !s' -> fold z s'
Done -> z
Zipping streams

zipWithS
:: (a -> b -> c) -> Stream a -> Stream b -> Stream c

zipWithS f (Stream next1 s m)


(Stream next2 t n) = Stream next (s :*: t) m
where
next (s :*: t) = case next1 s of
Done -> Done
Skip s' -> Skip (s' :*: t)
Yield x s' -> case next2 t of
Done -> Done
Skip t' -> Skip (s :*: t')
Yield y t' -> Yield (f x y) (s' :*: t')
Arrays to streams to nothing at all ...
Future
● Allow users to pick and choose between fused
or direct implementations
● Write some big programs in this style
● Goal 4: more conversions from other array
types (e.g. ByteStrings, Ptr a)
● Conversions to and from other sequence types
via streams – no overhead for the conversion
● DPH's goals: parallel nested arrays, fusible
mutable arrays.
OM NOM NOM NOM

It's on hackage.haskell.org

Вам также может понравиться