aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/CSV.hs
blob: 610b7b31f30248fe88d9bcf464a93d1ecf6462fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
{- |
   Module      : Text.Pandoc.CSV
   Copyright   : Copyright (C) 2017-2024 John MacFarlane <[email protected]>
   License     : GNU GPL, version 2 or above
   Maintainer  : John MacFarlane <[email protected]>
   Stability   : alpha
   Portability : portable

Simple CSV parser.
-}

module Text.Pandoc.CSV (
  CSVOptions(..),
  defaultCSVOptions,
  parseCSV,
  ParseError
) where

import Control.Monad (unless, void, mzero)
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Parsing hiding (escaped)

type Parser = Parsec Text ()

data CSVOptions = CSVOptions{
    csvDelim     :: Char
  , csvQuote     :: Maybe Char
  , csvKeepSpace :: Bool -- treat whitespace following delim as significant
  , csvEscape    :: Maybe Char -- default is to double up quote
} deriving (Read, Show)

defaultCSVOptions :: CSVOptions
defaultCSVOptions = CSVOptions{
    csvDelim = ','
  , csvQuote = Just '"'
  , csvKeepSpace = False
  , csvEscape = Nothing }

parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
parseCSV opts t = parse (pCSV opts) "csv" t

pCSV :: CSVOptions -> Parser [[Text]]
pCSV opts =
  (pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)

pCSVRow :: CSVOptions -> Parser [Text]
pCSVRow opts = do
  x <- pCSVCell opts
  xs <- (if T.null x then many1 else many) $ pCSVDelim opts *> pCSVCell opts
  return (x:xs)

pCSVCell :: CSVOptions -> Parser Text
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts

pCSVQuotedCell :: CSVOptions -> Parser Text
pCSVQuotedCell opts =
  case csvQuote opts of
    Nothing -> mzero
    Just quotechar -> do
      char quotechar
      res <- many (satisfy (\c -> c /= quotechar &&
                                  Just c /= csvEscape opts) <|> escaped opts)
      char quotechar
      return $ T.pack res

escaped :: CSVOptions -> Parser Char
escaped opts =
  case csvEscape opts of
    Nothing ->
      case csvQuote opts of
        Nothing -> mzero
        Just q -> try $ char q >> char q
    Just c  -> try $ char c >> noneOf "\r\n"

pCSVUnquotedCell :: CSVOptions -> Parser Text
pCSVUnquotedCell opts = T.pack <$>
  many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'))

pCSVDelim :: CSVOptions -> Parser ()
pCSVDelim opts = do
  char (csvDelim opts)
  let sp = case csvDelim opts of
              '\t' -> char ' '
              _    -> oneOf " \t"
  unless (csvKeepSpace opts) $ skipMany sp

endline :: Parser ()
endline = do
  optional (void $ char '\r')
  void $ char '\n'