-- ------------------------------------------------------------
{- |
Module : Data.String.UTF8Decoding
Copyright : Copyright (C) 2010- Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
Stability : stable
Portability: portable
Interface for Data.Char.UTF8 funtions
-}
-- ------------------------------------------------------------
module Data.String.UTF8Decoding (
decodeUtf8,
decodeUtf8EmbedErrors,
decodeUtf8IgnoreErrors,
)
where
import qualified Data.String.UTF8 as UTF8
import Data.Word (Word8)
-- | calls 'Data.Char.UTF8.decode' for parsing and decoding UTF-8
decodeUtf8 :: String -> (String, [String])
decodeUtf8 str
= (res, map (uncurry toErrStr) errs)
where
(res, errs) = UTF8.decode . stringToByteString $ str
decodeUtf8IgnoreErrors :: String -> String
decodeUtf8IgnoreErrors
= fst . decodeUtf8
decodeUtf8EmbedErrors :: String -> [Either String Char]
decodeUtf8EmbedErrors str
= map (either (Left . uncurry toErrStr) Right) $
UTF8.decodeEmbedErrors $ stringToByteString $ str
stringToByteString :: String -> [Word8]
stringToByteString = map (toEnum . fromEnum)
toErrStr :: UTF8.Error -> Int -> String
toErrStr err pos
= " at input position " ++ show pos ++ ": " ++ show err
-- ------------------------------------------------------------