Blame src/Data/String/UTF8Decoding.hs
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
{- |
|
|
Packit |
cc4c63 |
Module : Data.String.UTF8Decoding
|
|
Packit |
cc4c63 |
Copyright : Copyright (C) 2010- Uwe Schmidt
|
|
Packit |
cc4c63 |
License : MIT
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
|
|
Packit |
cc4c63 |
Stability : stable
|
|
Packit |
cc4c63 |
Portability: portable
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
Interface for Data.Char.UTF8 funtions
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-}
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
module Data.String.UTF8Decoding (
|
|
Packit |
cc4c63 |
decodeUtf8,
|
|
Packit |
cc4c63 |
decodeUtf8EmbedErrors,
|
|
Packit |
cc4c63 |
decodeUtf8IgnoreErrors,
|
|
Packit |
cc4c63 |
)
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
import qualified Data.String.UTF8 as UTF8
|
|
Packit |
cc4c63 |
import Data.Word (Word8)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- | calls 'Data.Char.UTF8.decode' for parsing and decoding UTF-8
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
decodeUtf8 :: String -> (String, [String])
|
|
Packit |
cc4c63 |
decodeUtf8 str
|
|
Packit |
cc4c63 |
= (res, map (uncurry toErrStr) errs)
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
(res, errs) = UTF8.decode . stringToByteString $ str
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
decodeUtf8IgnoreErrors :: String -> String
|
|
Packit |
cc4c63 |
decodeUtf8IgnoreErrors
|
|
Packit |
cc4c63 |
= fst . decodeUtf8
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
decodeUtf8EmbedErrors :: String -> [Either String Char]
|
|
Packit |
cc4c63 |
decodeUtf8EmbedErrors str
|
|
Packit |
cc4c63 |
= map (either (Left . uncurry toErrStr) Right) $
|
|
Packit |
cc4c63 |
UTF8.decodeEmbedErrors $ stringToByteString $ str
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
stringToByteString :: String -> [Word8]
|
|
Packit |
cc4c63 |
stringToByteString = map (toEnum . fromEnum)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
toErrStr :: UTF8.Error -> Int -> String
|
|
Packit |
cc4c63 |
toErrStr err pos
|
|
Packit |
cc4c63 |
= " at input position " ++ show pos ++ ": " ++ show err
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|