Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable encoding by name with missing fields #219

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cassava.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 1.12
Name: cassava
Version: 0.5.3.0
Version: 0.6.0.0
Synopsis: A CSV parsing and encoding library
Description: {

Expand Down
2 changes: 1 addition & 1 deletion src/Data/Csv/Builder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ encodeRecordWith opts r =
encodeNamedRecordWith :: ToNamedRecord a =>
EncodeOptions -> Header -> a -> Builder.Builder
encodeNamedRecordWith opts hdr nr =
Encoding.encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts)
Encoding.encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) (encMissing opts)
(toNamedRecord nr) Mon.<> Encoding.recordSep (encUseCrLf opts)

-- | Like 'encodeDefaultOrderedNamedRecord', but lets you customize
Expand Down
27 changes: 16 additions & 11 deletions src/Data/Csv/Encoding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,11 @@ data EncodeOptions = EncodeOptions

-- | What kind of quoting should be applied to text fields.
, encQuoting :: !Quoting
} deriving (Eq, Show)

-- | What to write into empty fields given their field name. For
-- backward-compatibility, this defaults to a call to `error`.
, encMissing :: Name -> Field
}

-- | Encoding options for CSV files.
defaultEncodeOptions :: EncodeOptions
Expand All @@ -220,6 +224,9 @@ defaultEncodeOptions = EncodeOptions
, encUseCrLf = True
, encIncludeHeader = True
, encQuoting = QuoteMinimal
, encMissing = \n -> moduleError "namedRecordToRecord" $
"header contains name " ++ show (B8.unpack n) ++
" which is not present in the named record"
}

-- | Like 'encode', but lets you customize how the CSV data is
Expand Down Expand Up @@ -262,9 +269,9 @@ encodeRecord qtng delim = mconcat . intersperse (word8 delim)

-- | Encode a single named record, without the trailing record
-- separator (i.e. newline), using the given field order.
encodeNamedRecord :: Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord hdr qtng delim =
encodeRecord qtng delim . namedRecordToRecord hdr
encodeNamedRecord :: Header -> Quoting -> Word8 -> (Name -> Field) -> NamedRecord -> Builder
encodeNamedRecord hdr qtng delim missing =
encodeRecord qtng delim . namedRecordToRecord missing hdr

-- TODO: Optimize
escape :: Quoting -> Word8 -> B.ByteString -> B.ByteString
Expand Down Expand Up @@ -300,7 +307,7 @@ encodeByNameWith opts hdr v
rows True = encodeRecord (encQuoting opts) (encDelimiter opts) hdr <>
recordSep (encUseCrLf opts) <> records
records = unlines (recordSep (encUseCrLf opts))
. map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts)
. map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) (encMissing opts)
. toNamedRecord)
$ v
{-# INLINE encodeByNameWith #-}
Expand All @@ -320,18 +327,16 @@ encodeDefaultOrderedByNameWith opts v
rows True = encodeRecord (encQuoting opts) (encDelimiter opts) hdr <>
recordSep (encUseCrLf opts) <> records
records = unlines (recordSep (encUseCrLf opts))
. map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts)
. map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) (encMissing opts)
. toNamedRecord)
$ v
{-# INLINE encodeDefaultOrderedByNameWith #-}

namedRecordToRecord :: Header -> NamedRecord -> Record
namedRecordToRecord hdr nr = V.map find hdr
namedRecordToRecord :: (Name -> Field) -> Header -> NamedRecord -> Record
namedRecordToRecord missing hdr nr = V.map find hdr
where
find n = case HM.lookup n nr of
Nothing -> moduleError "namedRecordToRecord" $
"header contains name " ++ show (B8.unpack n) ++
" which is not present in the named record"
Nothing -> missing n
Just v -> v

moduleError :: String -> String -> a
Expand Down
14 changes: 7 additions & 7 deletions src/Data/Csv/Incremental.hs
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ encodeByNameWith opts hdr b =
Builder.toLazyByteString $
encHdr <>
runNamedBuilder b hdr (encQuoting opts) (encDelimiter opts)
(encUseCrLf opts)
(encMissing opts) (encUseCrLf opts)
where
encHdr
| encIncludeHeader opts =
Expand All @@ -418,7 +418,7 @@ encodeDefaultOrderedByNameWith opts b =
Builder.toLazyByteString $
encHdr <>
runNamedBuilder b hdr (encQuoting opts)
(encDelimiter opts) (encUseCrLf opts)
(encDelimiter opts) (encMissing opts) (encUseCrLf opts)
where
hdr = Conversion.headerOrder (undefined :: a)

Expand All @@ -430,8 +430,8 @@ encodeDefaultOrderedByNameWith opts b =

-- | Encode a single named record.
encodeNamedRecord :: ToNamedRecord a => a -> NamedBuilder a
encodeNamedRecord nr = NamedBuilder $ \ hdr qtng delim useCrLf ->
Encoding.encodeNamedRecord hdr qtng delim
encodeNamedRecord nr = NamedBuilder $ \ hdr qtng delim missing useCrLf ->
Encoding.encodeNamedRecord hdr qtng delim missing
(Conversion.toNamedRecord nr) <> recordSep useCrLf

-- | A builder for building the CSV data incrementally. Just like the
Expand All @@ -440,14 +440,14 @@ encodeNamedRecord nr = NamedBuilder $ \ hdr qtng delim useCrLf ->
-- a left-associative, `foldl'` style makes the building not be
-- incremental.
newtype NamedBuilder a = NamedBuilder {
runNamedBuilder :: Header -> Quoting -> Word8 -> Bool -> Builder.Builder
runNamedBuilder :: Header -> Quoting -> Word8 -> (Name -> Field) -> Bool -> Builder.Builder
}

-- | @since 0.5.0.0
instance Semigroup (NamedBuilder a) where
NamedBuilder f <> NamedBuilder g =
NamedBuilder $ \ hdr qtng delim useCrlf ->
f hdr qtng delim useCrlf <> g hdr qtng delim useCrlf
NamedBuilder $ \ hdr qtng delim missing useCrlf ->
f hdr qtng delim missing useCrlf <> g hdr qtng delim missing useCrlf

instance Monoid (NamedBuilder a) where
mempty = NamedBuilder (\ _ _ _ _ -> mempty)
Expand Down
15 changes: 15 additions & 0 deletions tests/UnitTests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,24 @@ customDelim delim f1 f2 = delim `notElem` [cr, nl, dquote] ==>
cr = 13
dquote = 34

customMissing :: Assertion
customMissing = encodeByNameWith encOpts hdr nrs @?= ex
where
encOpts = defaultEncodeOptions { encMissing = id }
hdr = V.fromList ["abc", "def"]
nrs :: [NamedRecord]
nrs =
[ HM.fromList [("abc", "123")]
, HM.fromList [("def", "456")]
, HM.fromList [("abc", "234"), ("def", "567")]
, HM.fromList []
]
ex = "abc,def\r\n123,def\r\nabc,456\r\n234,567\r\nabc,def\r\n"

customOptionsTests :: [TF.Test]
customOptionsTests =
[ testProperty "customDelim" customDelim
, testCase "customMissing" customMissing
]

------------------------------------------------------------------------
Expand Down