Skip to content

Commit

Permalink
Use streamly-core-0.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
harendra-kumar committed Dec 7, 2023
1 parent dfa6502 commit e598da7
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 44 deletions.
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ environment:
# version.
#STACKVER: "1.6.5"
STACK_UPGRADE: "y"
RESOLVER: "nightly-2023-04-18"
RESOLVER: "nightly-2023-12-07"
STACK_ROOT: "c:\\sr"

# ------------------------------------------------------------------------
Expand Down
7 changes: 4 additions & 3 deletions benchmark/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import System.Random (randomRIO)
import qualified Streamly.Data.Fold as Fold
import qualified Streamly.Data.Stream as Stream
import qualified Streamly.Data.Array as Array
import qualified Streamly.Internal.Data.Ring.Unboxed as Ring
import qualified Streamly.Internal.Data.Ring as Ring
import qualified Streamly.Statistics as Statistics

import Gauge
Expand Down Expand Up @@ -169,8 +169,9 @@ mkBenchmarks mkBench =

, mkBench numElements "ewma (entire stream)"
(Statistics.ewma 0.5)
, mkBench numElements "ewmaAfterMean (entire stream)"
(Statistics.ewmaAfterMean 10 0.5)
-- XXX Disabled because this is not scannable
-- , mkBench numElements "ewmaAfterMean (entire stream)"
-- (Statistics.ewmaAfterMean 10 0.5)
, mkBench numElements "ewmaRampUpSmoothing (entire stream)"
(Statistics.ewmaRampUpSmoothing 0.5 0.5)

Expand Down
100 changes: 64 additions & 36 deletions src/Streamly/Statistics.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@

-- Resources:
--
-- Related:
-- https://hackage.haskell.org/package/foldl-statistics
-- https://hackage.haskell.org/package/foldl-incremental
--
-- This may be another useful resource for incremental (non-windowed)
-- computation:
--
Expand Down Expand Up @@ -80,17 +84,17 @@ module Streamly.Statistics
-- window folds by keeping the second element of the input tuple as
-- @Nothing@.
--
Window.lmap
lmap
, Window.cumulative

-- * Summary Statistics
-- | See https://en.wikipedia.org/wiki/Summary_statistics .

-- ** Sums
, Window.length
, Window.sum
, Window.sumInt
, Window.powerSum
, length
, sum
, sumInt
, powerSum

-- ** Location
-- | See https://en.wikipedia.org/wiki/Location_parameter .
Expand Down Expand Up @@ -183,25 +187,23 @@ import Data.Function ((&))
import Data.Functor.Identity (runIdentity, Identity)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Streamly.Data.Array (Array, length, Unbox)
import Streamly.Data.Array (Array, Unbox)
import Streamly.Data.Fold (Tee(..))
import Streamly.Data.Stream (Stream)
import Streamly.Internal.Data.Array.Type (unsafeIndexIO)
import Streamly.Internal.Data.Fold.Type (Fold(..), Step(..))
import Streamly.Internal.Data.Stream.StreamD.Step (Step(..))
import Streamly.Internal.Data.Array (unsafeIndexIO)
import Streamly.Internal.Data.Fold (Fold(..), Step(..))
import Streamly.Internal.Data.Stream (Step(..))
import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..))
import Streamly.Internal.Data.Unfold.Type (Unfold(..))
import Streamly.Internal.Data.Unfold (Unfold(..))
import System.Random.MWC (createSystemRandom, uniformRM)

import qualified Data.Map.Strict as Map
import qualified Deque.Strict as Deque
import qualified Streamly.Data.Fold as Fold
import qualified Streamly.Data.Array as Array hiding (read)
import qualified Streamly.Internal.Data.Array as Array (read)
import qualified Streamly.Data.Array as Array
import qualified Streamly.Data.MutArray as MA
import qualified Streamly.Internal.Data.Array.Mut as MA
(getIndexUnsafe, putIndexUnsafe, unsafeSwapIndices)
import qualified Streamly.Internal.Data.Fold.Window as Window
import qualified Streamly.Internal.Data.MutArray as MA (unsafeSwapIndices)
import qualified Streamly.Internal.Data.Fold as Window
import qualified Streamly.Data.Stream as Stream

import Prelude hiding (length, sum, minimum, maximum)
Expand All @@ -215,6 +217,25 @@ import Prelude hiding (length, sum, minimum, maximum)
-- TODO We have many of these functions in Streamly.Data.Fold as well. Need to
-- think about deduplication.

-------------------------------------------------------------------------------
-- Re-exports
-------------------------------------------------------------------------------

lmap :: (c -> a) -> Fold m (a, Maybe a) b -> Fold m (c, Maybe c) b
lmap = Window.windowLmap

length :: (Monad m, Num b) => Fold m (a, Maybe a) b
length = Window.windowLength

sum :: (Monad m, Num a) => Fold m (a, Maybe a) a
sum = Window.windowSum

sumInt :: (Monad m, Integral a) => Fold m (a, Maybe a) a
sumInt = Window.windowSumInt

powerSum :: (Monad m, Num a) => Int -> Fold m (a, Maybe a) a
powerSum = Window.windowPowerSum

-------------------------------------------------------------------------------
-- Transforms
-------------------------------------------------------------------------------
Expand Down Expand Up @@ -329,15 +350,15 @@ fft marr
--
-- | The minimum element in a rolling window.
--
-- For smaller window sizes (< 30) Streamly.Data.Fold.Window.minimum performs
-- For smaller window sizes (< 30) Streamly.Internal.Data.Fold.windowMinimum performs
-- better. If you want to compute the minimum of the entire stream Fold.min
-- from streamly package would be much faster.
--
-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size.
--
{-# INLINE minimum #-}
minimum :: (Monad m, Ord a) => Fold m (a, Maybe a) a
minimum = Fold step initial extract
minimum = Fold step initial extract extract

where

Expand Down Expand Up @@ -389,15 +410,15 @@ minimum = Fold step initial extract
--
-- | The maximum element in a rolling window.
--
-- For smaller window sizes (< 30) Streamly.Data.Fold.Window.maximum performs
-- For smaller window sizes (< 30) Streamly.Internal.Data.Fold.windowMaximum performs
-- better. If you want to compute the maximum of the entire stream
-- Streamly.Data.Fold.maximum from streamly package would be much faster.
--
-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size.
--
{-# INLINE maximum #-}
maximum :: (Monad m, Ord a) => Fold m (a, Maybe a) a
maximum = Fold step initial extract
maximum = Fold step initial extract extract

where

Expand Down Expand Up @@ -469,7 +490,7 @@ maximum = Fold step initial extract
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE mean #-}
mean :: forall m a. (Monad m, Fractional a) => Fold m (a, Maybe a) a
mean = Window.mean
mean = Window.windowMean

-- | Recompute mean from old mean when an item is removed from the sample.
{-# INLINE _meanSubtract #-}
Expand Down Expand Up @@ -511,7 +532,7 @@ meanReplace n oldMean oldItem newItem =
-- /Internal/
{-# INLINE welfordMean #-}
welfordMean :: forall m a. (Monad m, Fractional a) => Fold m (a, Maybe a) a
welfordMean = Fold step initial extract
welfordMean = Fold step initial extract extract

where

Expand Down Expand Up @@ -543,7 +564,7 @@ welfordMean = Fold step initial extract
--
-- \(\mu'_k = \frac{\sum_{i=1}^n x_{i}^k}{n}\)
--
-- >>> rawMoment k = Fold.teeWith (/) (powerSum p) length
-- >>> rawMoment k = Fold.teeWith (/) (Fold.windowPowerSum p) Fold.windowLength
--
-- See https://en.wikipedia.org/wiki/Moment_(mathematics) .
--
Expand All @@ -552,16 +573,17 @@ welfordMean = Fold step initial extract
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE rawMoment #-}
rawMoment :: (Monad m, Fractional a) => Int -> Fold m (a, Maybe a) a
rawMoment k = Fold.teeWith (/) (Window.powerSum k) Window.length
rawMoment k = Fold.teeWith (/) (Window.windowPowerSum k) Window.windowLength

-- | Like 'rawMoment' but powers can be negative or fractional. This is
-- slower than 'rawMoment' for positive intergal powers.
--
-- >>> rawMomentFrac p = Fold.teeWith (/) (powerSumFrac p) length
-- >>> rawMomentFrac p = Fold.teeWith (/) (Fold.windowPowerSumFrac p) Fold.windowLength
--
{-# INLINE rawMomentFrac #-}
rawMomentFrac :: (Monad m, Floating a) => a -> Fold m (a, Maybe a) a
rawMomentFrac k = Fold.teeWith (/) (Window.powerSumFrac k) Window.length
rawMomentFrac k =
Fold.teeWith (/) (Window.windowPowerSumFrac k) Window.windowLength

-- XXX Overflow can happen when large powers or large numbers are used. We can
-- keep a running mean instead of running sum but that won't mitigate the
Expand Down Expand Up @@ -608,7 +630,9 @@ powerMeanFrac k = (** (1 / k)) <$> rawMomentFrac k
--
{-# INLINE harmonicMean #-}
harmonicMean :: (Monad m, Fractional a) => Fold m (a, Maybe a) a
harmonicMean = Fold.teeWith (/) Window.length (Window.lmap recip Window.sum)
harmonicMean =
Fold.teeWith (/)
Window.windowLength (Window.windowLmap recip Window.windowSum)

-- | Geometric mean, defined as:
--
Expand All @@ -625,7 +649,7 @@ harmonicMean = Fold.teeWith (/) Window.length (Window.lmap recip Window.sum)
-- See https://en.wikipedia.org/wiki/Geometric_mean .
{-# INLINE geometricMean #-}
geometricMean :: (Monad m, Floating a) => Fold m (a, Maybe a) a
geometricMean = exp <$> Window.lmap log mean
geometricMean = exp <$> Window.windowLmap log mean

-- | The quadratic mean or root mean square (rms) of the numbers
-- \(x_1, x_2, \ldots, x_n\) is defined as:
Expand Down Expand Up @@ -685,15 +709,17 @@ ewma k = extract <$> Fold.foldl' step (Tuple' 0 1)

extract (Tuple' x _) = x

-- XXX It can perhaps perform better if implemented as a custom fold?
--
-- XXX It can perhaps perform better if implemented as a custom fold? We can
-- also enable this to be used as a scan that way.

-- | @ewma n k@ is like 'ewma' but uses the mean of the first @n@ values and
-- then uses that as the initial value for the @ewma@ of the rest of the
-- values.
--
-- This can be used to reduce the effect of volatility of the initial value
-- when k is too small.
--
-- Note that this cannot be used as a scan.
{-# INLINE ewmaAfterMean #-}
ewmaAfterMean :: Monad m => Int -> Double -> Fold m Double Double
ewmaAfterMean n k =
Expand Down Expand Up @@ -897,7 +923,8 @@ kurtosis =
--
{-# INLINE sampleVariance #-}
sampleVariance :: (Monad m, Fractional a) => Fold m (a, Maybe a) a
sampleVariance = Fold.teeWith (\n s2 -> n * s2 / (n - 1)) Window.length variance
sampleVariance =
Fold.teeWith (\n s2 -> n * s2 / (n - 1)) Window.windowLength variance

-- | Sample standard deviation:
--
Expand All @@ -923,7 +950,8 @@ sampleStdDev = sqrt <$> sampleVariance
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE stdErrMean #-}
stdErrMean :: (Monad m, Floating a) => Fold m (a, Maybe a) a
stdErrMean = Fold.teeWith (\sd n -> sd / sqrt n) sampleStdDev Window.length
stdErrMean =
Fold.teeWith (\sd n -> sd / sqrt n) sampleStdDev Window.windowLength

-------------------------------------------------------------------------------
-- Resampling
Expand All @@ -943,7 +971,7 @@ foldArray f = runIdentity . Stream.fold f . Array.read
{-# INLINE jackKnifeMean #-}
jackKnifeMean :: (Monad m, Fractional a, Unbox a) => Array a -> Stream m a
jackKnifeMean arr = do
let len = fromIntegral (length arr - 1)
let len = fromIntegral (Array.length arr - 1)
s = foldArray Fold.sum arr
in fmap (\b -> (s - b) / len) $ Array.read arr

Expand All @@ -955,10 +983,10 @@ jackKnifeMean arr = do
jackKnifeVariance :: (Monad m, Fractional a, Unbox a) =>
Array a -> Stream m a
jackKnifeVariance arr = do
let len = fromIntegral $ length arr - 1
let len = fromIntegral $ Array.length arr - 1
foldSums (s, s2) x = (s + x, s2 + x ^ (2 :: Int))
(sum, sum2) = foldArray (Fold.foldl' foldSums (0.0, 0.0)) arr
var x = (sum2 - x ^ (2 :: Int)) / len - ((sum - x) / len) ^ (2::Int)
(sum1, sum2) = foldArray (Fold.foldl' foldSums (0.0, 0.0)) arr
var x = (sum2 - x ^ (2 :: Int)) / len - ((sum1 - x) / len) ^ (2::Int)
in fmap var $ Array.read arr

-- | Standard deviation computed from 'jackKnifeVariance'.
Expand All @@ -981,7 +1009,7 @@ resample = Unfold step inject

inject arr = liftIO $ do
g <- createSystemRandom
return $ (g, arr, length arr, 0)
return $ (g, arr, Array.length arr, 0)

chooseOne g arr len = do
i <- uniformRM (0, len - 1) g
Expand Down
6 changes: 3 additions & 3 deletions streamly-statistics.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ library
import: ghc-options
exposed-modules: Streamly.Statistics
build-depends: base >= 4.9 && < 5
, streamly-core == 0.1.0
, streamly-core >= 0.2.0
, containers >= 0.5 && < 0.7
, random >= 1.2 && < 1.3
, mwc-random >= 0.15 && < 0.16
Expand All @@ -122,7 +122,7 @@ test-suite test
hs-source-dirs: test
main-is: Main.hs
build-depends: streamly-statistics
, streamly-core == 0.1.0
, streamly-core >= 0.2.0
, base >= 4.9 && < 5
, QuickCheck >= 2.10 && < 2.15
, hspec >= 2.0 && < 3
Expand All @@ -140,7 +140,7 @@ benchmark benchmark
hs-source-dirs: benchmark
main-is: Main.hs
build-depends: streamly-statistics
, streamly-core == 0.1.0
, streamly-core >= 0.2.0
, base >= 4.9 && < 5
, random >= 1.0.0 && < 2
, deepseq >= 1.4.1 && < 1.5
Expand Down
2 changes: 1 addition & 1 deletion test/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import qualified Statistics.Transform as STAT
import qualified Streamly.Data.Array as Array
import qualified Streamly.Data.Fold as Fold
import qualified Streamly.Data.MutArray as MA
import qualified Streamly.Internal.Data.Ring.Unboxed as Ring
import qualified Streamly.Internal.Data.Ring as Ring
import qualified Streamly.Data.Stream as Stream
import qualified Streamly.Data.Stream as S

Expand Down

0 comments on commit e598da7

Please sign in to comment.