-
Notifications
You must be signed in to change notification settings - Fork 71
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make union*
, difference*
, and intersect*
run linearly by sorting elements first
#203
Changes from 6 commits
615f2f4
f1ffa4a
d82c947
b3be7f4
a9c9514
d04167b
72e556d
b1577bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -109,6 +109,7 @@ module Data.Array | |||||||||
, deleteBy | ||||||||||
|
||||||||||
, (\\), difference | ||||||||||
, differenceBy | ||||||||||
, intersect | ||||||||||
, intersectBy | ||||||||||
|
||||||||||
|
@@ -135,12 +136,14 @@ import Control.Alternative (class Alternative) | |||||||||
import Control.Lazy (class Lazy, defer) | ||||||||||
import Control.Monad.Rec.Class (class MonadRec, Step(..), tailRecM2) | ||||||||||
import Control.Monad.ST as ST | ||||||||||
import Control.Monad.ST.Ref as STRef | ||||||||||
import Data.Array.NonEmpty.Internal (NonEmptyArray(..)) | ||||||||||
import Data.Array.ST as STA | ||||||||||
import Data.Array.ST.Iterator as STAI | ||||||||||
import Data.Foldable (class Foldable, foldl, foldr, traverse_) | ||||||||||
import Data.Foldable (foldl, foldr, foldMap, fold, intercalate) as Exports | ||||||||||
import Data.Maybe (Maybe(..), maybe, isJust, fromJust, isNothing) | ||||||||||
import Data.Ordering (invert) | ||||||||||
import Data.Traversable (sequence, traverse) | ||||||||||
import Data.Tuple (Tuple(..), fst, snd) | ||||||||||
import Data.Unfoldable (class Unfoldable, unfoldr) | ||||||||||
|
@@ -1071,8 +1074,8 @@ nubByEq eq xs = ST.run do | |||||||||
-- | union [1, 2, 1, 1] [3, 3, 3, 4] = [1, 2, 1, 1, 3, 4] | ||||||||||
-- | ``` | ||||||||||
-- | | ||||||||||
union :: forall a. Eq a => Array a -> Array a -> Array a | ||||||||||
union = unionBy (==) | ||||||||||
union :: forall a. Ord a => Array a -> Array a -> Array a | ||||||||||
union = unionBy compare | ||||||||||
|
||||||||||
-- | Calculate the union of two arrays, using the specified function to | ||||||||||
-- | determine equality of elements. Note that duplicates in the first array | ||||||||||
|
@@ -1083,8 +1086,33 @@ union = unionBy (==) | |||||||||
-- | unionBy mod3eq [1, 5, 1, 2] [3, 4, 3, 3] = [1, 5, 1, 2, 3] | ||||||||||
-- | ``` | ||||||||||
-- | | ||||||||||
unionBy :: forall a. (a -> a -> Boolean) -> Array a -> Array a -> Array a | ||||||||||
unionBy eq xs ys = xs <> foldl (flip (deleteBy eq)) (nubByEq eq ys) xs | ||||||||||
unionBy :: forall a. (a -> a -> Ordering) -> Array a -> Array a -> Array a | ||||||||||
unionBy cmp left right = map snd $ ST.run do | ||||||||||
result <- STA.new | ||||||||||
ST.foreach indexedAndSorted \(Tuple fromLeftArray pair@(Tuple _ x')) -> do | ||||||||||
if fromLeftArray then do | ||||||||||
void $ STA.push pair result | ||||||||||
else do | ||||||||||
maybePreviousValue <- last <$> STA.unsafeFreeze result | ||||||||||
case maybePreviousValue of | ||||||||||
Just (Tuple _ y) | ||||||||||
| cmp y x' /= EQ -> void $ STA.push pair result | ||||||||||
| otherwise -> pure unit | ||||||||||
Nothing -> do | ||||||||||
void $ STA.push pair result | ||||||||||
_ <- STA.sortWith fst result | ||||||||||
STA.unsafeFreeze result | ||||||||||
where | ||||||||||
-- Note: when elements are equal, left array elements | ||||||||||
-- (i.e. `Tuple true (Tuple _ _)`) appear "before" right array elements | ||||||||||
-- (i.e. `Tuple false (Tuple _ _)`) in the resulting array. | ||||||||||
-- This is different from `differenceBy` and `intersectBy` | ||||||||||
valueThenLeftFirst :: Tuple Boolean (Tuple Int a) -> Tuple Boolean (Tuple Int a) -> Ordering | ||||||||||
valueThenLeftFirst (Tuple lb (Tuple _ lv)) (Tuple rb (Tuple _ rv)) = | ||||||||||
cmp lv rv <> invert (compare lb rb) | ||||||||||
|
||||||||||
indexedAndSorted = sortBy valueThenLeftFirst $ combineIndex left right | ||||||||||
|
||||||||||
|
||||||||||
-- | Delete the first element of an array which is equal to the specified value, | ||||||||||
-- | creating a new array. | ||||||||||
|
@@ -1118,13 +1146,82 @@ deleteBy eq x ys = maybe ys (\i -> unsafePartial $ fromJust (deleteAt i ys)) (fi | |||||||||
-- | difference [2, 1] [2, 3] = [1] | ||||||||||
-- | ``` | ||||||||||
-- | | ||||||||||
-- | Running time: `O(n*m)`, where n is the length of the first array, and m is | ||||||||||
-- | Running time: `O(n+m)`, where n is the length of the first array, and m is | ||||||||||
-- | the length of the second. | ||||||||||
difference :: forall a. Eq a => Array a -> Array a -> Array a | ||||||||||
difference = foldr delete | ||||||||||
difference :: forall a. Ord a => Array a -> Array a -> Array a | ||||||||||
difference = differenceBy compare | ||||||||||
|
||||||||||
infix 5 difference as \\ | ||||||||||
|
||||||||||
differenceBy :: forall a. (a -> a -> Ordering) -> Array a -> Array a -> Array a | ||||||||||
differenceBy _ left [] = left | ||||||||||
differenceBy _ left@[] _ = left | ||||||||||
differenceBy cmp left right = map snd $ ST.run do | ||||||||||
result <- STA.new | ||||||||||
latestRightArrayValue <- STRef.new Nothing | ||||||||||
ST.foreach indexedAndSorted \(Tuple fromLeftArray pair@(Tuple _ x')) -> do | ||||||||||
maybeValueToRemove <- STRef.read latestRightArrayValue | ||||||||||
case fromLeftArray, maybeValueToRemove of | ||||||||||
true, Just (Tuple valueToRemove count) | cmp valueToRemove x' == EQ -> | ||||||||||
-- do not add left array's element to final array; check count | ||||||||||
if count == 1 then do | ||||||||||
void $ STRef.write Nothing latestRightArrayValue | ||||||||||
else do | ||||||||||
let decrementCount = Just (Tuple valueToRemove (count - 1)) | ||||||||||
void $ STRef.write decrementCount latestRightArrayValue | ||||||||||
|
||||||||||
true, _ -> void $ STA.push pair result | ||||||||||
|
||||||||||
false, Just (Tuple valueToRemove count) | cmp valueToRemove x' == EQ -> do | ||||||||||
let next = if count == 0 then Nothing | ||||||||||
else Just (Tuple valueToRemove (count - 1)) | ||||||||||
void $ STRef.write next latestRightArrayValue | ||||||||||
false, _ -> do | ||||||||||
void $ STRef.write (Just (Tuple x' 1)) latestRightArrayValue | ||||||||||
_ <- STA.sortWith fst result | ||||||||||
STA.unsafeFreeze result | ||||||||||
where | ||||||||||
-- Note: when elements are equal, right array elements | ||||||||||
-- (i.e. `Tuple false (Tuple _ _)`) appear "before" left array elements | ||||||||||
-- (i.e. `Tuple true (Tuple _ _)`) in the resulting array. | ||||||||||
valueThenRightFirst :: Tuple Boolean (Tuple Int a) -> Tuple Boolean (Tuple Int a) -> Ordering | ||||||||||
valueThenRightFirst (Tuple lb (Tuple _ lv)) (Tuple rb (Tuple _ rv)) = | ||||||||||
cmp lv rv <> compare lb rb | ||||||||||
|
||||||||||
indexedAndSorted = sortBy valueThenRightFirst $ combineIndex left right | ||||||||||
|
||||||||||
-- Internal use only | ||||||||||
-- Essentially... | ||||||||||
-- ``` | ||||||||||
-- combineWithIndex cmp left right = | ||||||||||
-- let | ||||||||||
-- t3 bool idx val = Tuple bool (Tuple idx val) | ||||||||||
-- leftIndexed = mapWithIndex (\idx v -> t3 true idx v) left | ||||||||||
-- | ||||||||||
-- tupleAdjusted idx v = t3 false (idx + (length left)) v | ||||||||||
-- rightIndexedPlus = mapWithIndex tupleAdjusted right | ||||||||||
-- | ||||||||||
-- in leftIndexed <> rightIndexedPlus | ||||||||||
-- ``` | ||||||||||
-- ... but without creating two intermediate arrays due to the `mapWithIndex` | ||||||||||
-- on both arrays. | ||||||||||
-- ``` | ||||||||||
-- combineIndex compare [0] [0, 1] | ||||||||||
-- == [t3 true 0 0, t3 false 1 0, t3 false 2 1] | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Easier to follow example with values that are distinct from indices There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But then there aren't values that exist in both arrays. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since Could compromise with something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
🤦♂️ You're right! I forgot that this function doesn't need to care unlike the ones I'm working on. I'll make that fix. |
||||||||||
-- ``` | ||||||||||
combineIndex :: forall a. Array a -> Array a -> Array (Tuple Boolean (Tuple Int a)) | ||||||||||
combineIndex left right = ST.run do | ||||||||||
out <- STA.new | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any benefits to preallocating the array to a known size? Tried to find a clear answer for this in JS. And if there is a benefit that we want to take advantage of, we'd need to expand the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've wondered about that myself... |
||||||||||
let leftLen = length left | ||||||||||
ST.for 0 leftLen \idx -> do | ||||||||||
let val = unsafePartial $ unsafeIndex left idx | ||||||||||
void $ STA.push (Tuple true (Tuple idx val)) out | ||||||||||
let rightLen = length right | ||||||||||
ST.for 0 rightLen \idx -> do | ||||||||||
let val = unsafePartial $ unsafeIndex right idx | ||||||||||
void $ STA.push (Tuple false (Tuple (leftLen + idx) val)) out | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any performance penalties to using a record instead of a tuple? Luckily, this tuple happens to be type-safe because of the distinct types ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure. I also tried it out with |
||||||||||
STA.unsafeFreeze out | ||||||||||
|
||||||||||
-- | Calculate the intersection of two arrays, creating a new array. Note that | ||||||||||
-- | duplicates in the first array are preserved while duplicates in the second | ||||||||||
-- | array are removed. | ||||||||||
|
@@ -1133,8 +1230,8 @@ infix 5 difference as \\ | |||||||||
-- | intersect [1, 1, 2] [2, 2, 1] = [1, 1, 2] | ||||||||||
-- | ``` | ||||||||||
-- | | ||||||||||
intersect :: forall a. Eq a => Array a -> Array a -> Array a | ||||||||||
intersect = intersectBy eq | ||||||||||
intersect :: forall a. Ord a => Array a -> Array a -> Array a | ||||||||||
intersect = intersectBy compare | ||||||||||
|
||||||||||
-- | Calculate the intersection of two arrays, using the specified equivalence | ||||||||||
-- | relation to compare elements, creating a new array. Note that duplicates | ||||||||||
|
@@ -1146,8 +1243,32 @@ intersect = intersectBy eq | |||||||||
-- | intersectBy mod3eq [1, 2, 3] [4, 6, 7] = [1, 3] | ||||||||||
-- | ``` | ||||||||||
-- | | ||||||||||
intersectBy :: forall a. (a -> a -> Boolean) -> Array a -> Array a -> Array a | ||||||||||
intersectBy eq xs ys = filter (\x -> isJust (findIndex (eq x) ys)) xs | ||||||||||
intersectBy :: forall a. (a -> a -> Ordering) -> Array a -> Array a -> Array a | ||||||||||
intersectBy _ left [] = left | ||||||||||
intersectBy _ left@[] _ = left | ||||||||||
intersectBy cmp left right = map snd $ ST.run do | ||||||||||
result <- STA.new | ||||||||||
latestRightArrayValue <- STRef.new Nothing | ||||||||||
ST.foreach indexedAndSorted \(Tuple fromLeftArray pair@(Tuple i x')) -> | ||||||||||
if fromLeftArray then do | ||||||||||
maybeValueToRemove <- STRef.read latestRightArrayValue | ||||||||||
case maybeValueToRemove of | ||||||||||
Just valueToRemove | cmp valueToRemove x' == EQ -> do | ||||||||||
void $ STA.push pair result | ||||||||||
_ -> pure unit | ||||||||||
else do | ||||||||||
void $ STRef.write (Just x') latestRightArrayValue | ||||||||||
_ <- STA.sortWith fst result | ||||||||||
STA.unsafeFreeze result | ||||||||||
where | ||||||||||
-- Note: when elements are equal, right array elements | ||||||||||
-- (i.e. `Tuple false (Tuple _ _)`) appear "before" left array elements | ||||||||||
-- (i.e. `Tuple true (Tuple _ _)`) in the resulting array. | ||||||||||
valueThenRightFirst :: Tuple Boolean (Tuple Int a) -> Tuple Boolean (Tuple Int a) -> Ordering | ||||||||||
valueThenRightFirst (Tuple lb (Tuple _ lv)) (Tuple rb (Tuple _ rv)) = | ||||||||||
cmp lv rv <> compare lb rb | ||||||||||
|
||||||||||
indexedAndSorted = sortBy valueThenRightFirst $ combineIndex left right | ||||||||||
|
||||||||||
-- | Apply a function to pairs of elements at the same index in two arrays, | ||||||||||
-- | collecting the results in a new array. | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It may be more efficient to take advantage of the of the fact that the output is always the first array plus some other stuff appended to it. Here's some non-ST pseudo-ish code describing that option:
union left right = left <> other where other = combineIndex right left -- intentionally putting right first and assuming stable sort # sortBy valueFunc # groupBy valueFunc # map head # filter keepRightOnly
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What would be the runtime cost of this approach versus mine?
I made the rather stupid assumption that mine would be
n+m
because when Harry said the current version wasn*m
, I thought he was implying we should make the code linear. I didn't actually analyze my code using Big O notation to see how many steps it takes.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe all reasonable approaches are
O(n log n)
(where n is the sum of array sizes). But benchmarking could still reveal a 3x speedup with a different approach, which would still beO(n log n)
. I'll play around with this and report back with findings. The choice of input data could also significantly change the relative performance of different algorithms.