|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
{- |
|
|
Packit |
cc4c63 |
Module : Data.String.Unicode
|
|
Packit |
cc4c63 |
Copyright : Copyright (C) 2010- Uwe Schmidt
|
|
Packit |
cc4c63 |
License : MIT
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
|
|
Packit |
cc4c63 |
Stability : stable
|
|
Packit |
cc4c63 |
Portability: portable
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
Unicode and UTF-8 Conversion Functions
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-}
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
module Data.String.Unicode
|
|
Packit |
cc4c63 |
(
|
|
Packit |
cc4c63 |
-- * Unicode Type declarations
|
|
Packit |
cc4c63 |
Unicode,
|
|
Packit |
cc4c63 |
UString,
|
|
Packit |
cc4c63 |
UTF8Char,
|
|
Packit |
cc4c63 |
UTF8String,
|
|
Packit |
cc4c63 |
UStringWithErrors,
|
|
Packit |
cc4c63 |
DecodingFct,
|
|
Packit |
cc4c63 |
DecodingFctEmbedErrors,
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf8ToUnicode
|
|
Packit |
cc4c63 |
, utf8ToUnicodeEmbedErrors
|
|
Packit |
cc4c63 |
, latin1ToUnicode
|
|
Packit |
cc4c63 |
, ucs2ToUnicode
|
|
Packit |
cc4c63 |
, ucs2BigEndianToUnicode
|
|
Packit |
cc4c63 |
, ucs2LittleEndianToUnicode
|
|
Packit |
cc4c63 |
, utf16beToUnicode
|
|
Packit |
cc4c63 |
, utf16leToUnicode
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
, unicodeCharToUtf8
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
, unicodeToUtf8
|
|
Packit |
cc4c63 |
, unicodeToXmlEntity
|
|
Packit |
cc4c63 |
, unicodeToLatin1
|
|
Packit |
cc4c63 |
, unicodeRemoveNoneAscii
|
|
Packit |
cc4c63 |
, unicodeRemoveNoneLatin1
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
, intToCharRef
|
|
Packit |
cc4c63 |
, intToCharRefHex
|
|
Packit |
cc4c63 |
, intToHexString
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
, getDecodingFct
|
|
Packit |
cc4c63 |
, getDecodingFctEmbedErrors
|
|
Packit |
cc4c63 |
, getOutputEncodingFct
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
, normalizeNL
|
|
Packit |
cc4c63 |
, guessEncoding
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
, getOutputEncodingFct'
|
|
Packit |
cc4c63 |
, unicodeCharToUtf8'
|
|
Packit |
cc4c63 |
, unicodeCharToXmlEntity'
|
|
Packit |
cc4c63 |
, unicodeCharToLatin1'
|
|
Packit |
cc4c63 |
)
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
import Data.Char (toUpper)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
import Data.Char.IsoLatinTables
|
|
Packit |
cc4c63 |
import Data.Char.Properties.XMLCharProps (isXml1ByteChar,
|
|
Packit |
cc4c63 |
isXmlLatin1Char)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
import Data.String.EncodingNames
|
|
Packit |
cc4c63 |
import Data.String.UTF8Decoding (decodeUtf8,
|
|
Packit |
cc4c63 |
decodeUtf8EmbedErrors)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- | Unicode is represented as the Char type
|
|
Packit |
cc4c63 |
-- Precondition for this is the support of Unicode character range
|
|
Packit |
cc4c63 |
-- in the compiler (e.g. ghc but not hugs)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
type Unicode = Char
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- | the type for Unicode strings
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
type UString = [Unicode]
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- | UTF-8 charachters are represented by the Char type
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
type UTF8Char = Char
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- | UTF-8 strings are implemented as Haskell strings
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
type UTF8String = String
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- | Decoding function with a pair containing the result string and a list of decoding errors as result
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
type DecodingFct = String -> (UString, [String])
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
type UStringWithErrors = [Either String Char]
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- | Decoding function where decoding errors are interleaved with decoded characters
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
type DecodingFctEmbedErrors = String -> UStringWithErrors
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- conversion from Unicode strings (UString) to UTF8 encoded strings.
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeToUtf8 :: UString -> UTF8String
|
|
Packit |
cc4c63 |
unicodeToUtf8 = concatMap unicodeCharToUtf8
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- conversion from Unicode (Char) to a UTF8 encoded string.
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeCharToUtf8 :: Unicode -> UTF8String
|
|
Packit |
cc4c63 |
unicodeCharToUtf8 c
|
|
Packit |
cc4c63 |
| i >= 0 && i <= 0x0000007F -- 1 byte UTF8 (7 bits)
|
|
Packit |
cc4c63 |
= [ toEnum i ]
|
|
Packit |
cc4c63 |
| i >= 0x00000080 && i <= 0x000007FF -- 2 byte UTF8 (5 + 6 bits)
|
|
Packit |
cc4c63 |
= [ toEnum (0xC0 + i `div` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + i `mod` 0x40)
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
| i >= 0x00000800 && i <= 0x0000FFFF -- 3 byte UTF8 (4 + 6 + 6 bits)
|
|
Packit |
cc4c63 |
= [ toEnum (0xE0 + i `div` 0x1000)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x40) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + i `mod` 0x40)
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
| i >= 0x00010000 && i <= 0x001FFFFF -- 4 byte UTF8 (3 + 6 + 6 + 6 bits)
|
|
Packit |
cc4c63 |
= [ toEnum (0xF0 + i `div` 0x40000)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x1000) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x40) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + i `mod` 0x40)
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
| i >= 0x00200000 && i <= 0x03FFFFFF -- 5 byte UTF8 (2 + 6 + 6 + 6 + 6 bits)
|
|
Packit |
cc4c63 |
= [ toEnum (0xF8 + i `div` 0x1000000)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x40000) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x1000) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x40) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + i `mod` 0x40)
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
| i >= 0x04000000 && i <= 0x7FFFFFFF -- 6 byte UTF8 (1 + 6 + 6 + 6 + 6 + 6 bits)
|
|
Packit |
cc4c63 |
= [ toEnum (0xFC + i `div` 0x40000000)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x1000000) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x40000) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x1000) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + (i `div` 0x40) `mod` 0x40)
|
|
Packit |
cc4c63 |
, toEnum (0x80 + i `mod` 0x40)
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
| otherwise -- other values not supported
|
|
Packit |
cc4c63 |
= error ("unicodeCharToUtf8: illegal integer argument " ++ show i)
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
i = fromEnum c
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- code conversion from latin1 to Unicode
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
latin1ToUnicode :: String -> UString
|
|
Packit |
cc4c63 |
latin1ToUnicode = id
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
latinToUnicode :: [(Char, Char)] -> String -> UString
|
|
Packit |
cc4c63 |
latinToUnicode tt
|
|
Packit |
cc4c63 |
= map charToUni
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
charToUni c =
|
|
Packit |
cc4c63 |
foldr (\(src,dst) r ->
|
|
Packit |
cc4c63 |
case compare c src of
|
|
Packit |
cc4c63 |
EQ -> dst
|
|
Packit |
cc4c63 |
LT -> c {- not found in table -}
|
|
Packit |
cc4c63 |
GT -> r) c tt
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- | conversion from ASCII to unicode with check for legal ASCII char set
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- Structure of decoding function copied from 'Data.Char.UTF8.decode'.
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
decodeAscii :: DecodingFct
|
|
Packit |
cc4c63 |
decodeAscii
|
|
Packit |
cc4c63 |
= swap . partitionEither . decodeAsciiEmbedErrors
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
decodeAsciiEmbedErrors :: String -> UStringWithErrors
|
|
Packit |
cc4c63 |
decodeAsciiEmbedErrors str
|
|
Packit |
cc4c63 |
= map (\(c,pos) -> if isValid c
|
|
Packit |
cc4c63 |
then Right c
|
|
Packit |
cc4c63 |
else Left (toErrStr c pos)) posStr
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
posStr = zip str [(0::Int)..]
|
|
Packit |
cc4c63 |
toErrStr errChr pos
|
|
Packit |
cc4c63 |
= " at input position " ++ show pos ++ ": none ASCII char " ++ show errChr
|
|
Packit |
cc4c63 |
isValid x = x < '\x80'
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- UCS-2 big endian to Unicode conversion
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2BigEndianToUnicode :: String -> UString
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2BigEndianToUnicode (b : l : r)
|
|
Packit |
cc4c63 |
= toEnum (fromEnum b * 256 + fromEnum l) : ucs2BigEndianToUnicode r
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2BigEndianToUnicode []
|
|
Packit |
cc4c63 |
= []
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2BigEndianToUnicode _
|
|
Packit |
cc4c63 |
= [] -- error "illegal UCS-2 byte input sequence with odd length"
|
|
Packit |
cc4c63 |
-- is ignored (garbage in, garbage out)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- UCS-2 little endian to Unicode conversion
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2LittleEndianToUnicode :: String -> UString
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2LittleEndianToUnicode (l : b : r)
|
|
Packit |
cc4c63 |
= toEnum (fromEnum b * 256 + fromEnum l) : ucs2LittleEndianToUnicode r
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2LittleEndianToUnicode []
|
|
Packit |
cc4c63 |
= []
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2LittleEndianToUnicode [_]
|
|
Packit |
cc4c63 |
= [] -- error "illegal UCS-2 byte input sequence with odd length"
|
|
Packit |
cc4c63 |
-- is ignored
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- UCS-2 to UTF-8 conversion with byte order mark analysis
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2ToUnicode :: String -> UString
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2ToUnicode ('\xFE':'\xFF':s) -- 2 byte mark for big endian encoding
|
|
Packit |
cc4c63 |
= ucs2BigEndianToUnicode s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2ToUnicode ('\xFF':'\xFE':s) -- 2 byte mark for little endian encoding
|
|
Packit |
cc4c63 |
= ucs2LittleEndianToUnicode s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
ucs2ToUnicode s
|
|
Packit |
cc4c63 |
= ucs2BigEndianToUnicode s -- default: big endian
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- UTF-8 to Unicode conversion with deletion of leading byte order mark, as described in XML standard F.1
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf8ToUnicode :: DecodingFct
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf8ToUnicode ('\xEF':'\xBB':'\xBF':s) -- remove byte order mark ( XML standard F.1 )
|
|
Packit |
cc4c63 |
= decodeUtf8 s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf8ToUnicode s
|
|
Packit |
cc4c63 |
= decodeUtf8 s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf8ToUnicodeEmbedErrors :: DecodingFctEmbedErrors
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf8ToUnicodeEmbedErrors ('\xEF':'\xBB':'\xBF':s) -- remove byte order mark ( XML standard F.1 )
|
|
Packit |
cc4c63 |
= decodeUtf8EmbedErrors s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf8ToUnicodeEmbedErrors s
|
|
Packit |
cc4c63 |
= decodeUtf8EmbedErrors s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- UTF-16 big endian to UTF-8 conversion with removal of byte order mark
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf16beToUnicode :: String -> UString
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf16beToUnicode ('\xFE':'\xFF':s) -- remove byte order mark
|
|
Packit |
cc4c63 |
= ucs2BigEndianToUnicode s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf16beToUnicode s
|
|
Packit |
cc4c63 |
= ucs2BigEndianToUnicode s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- UTF-16 little endian to UTF-8 conversion with removal of byte order mark
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf16leToUnicode :: String -> UString
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf16leToUnicode ('\xFF':'\xFE':s) -- remove byte order mark
|
|
Packit |
cc4c63 |
= ucs2LittleEndianToUnicode s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
utf16leToUnicode s
|
|
Packit |
cc4c63 |
= ucs2LittleEndianToUnicode s
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- substitute all Unicode characters, that are not legal 1-byte
|
|
Packit |
cc4c63 |
-- UTF-8 XML characters by a character reference.
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- This function can be used to translate all text nodes and
|
|
Packit |
cc4c63 |
-- attribute values into pure ascii.
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- see also : 'unicodeToLatin1'
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeToXmlEntity :: UString -> String
|
|
Packit |
cc4c63 |
unicodeToXmlEntity
|
|
Packit |
cc4c63 |
= escape isXml1ByteChar (intToCharRef . fromEnum)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- substitute all Unicode characters, that are not legal latin1
|
|
Packit |
cc4c63 |
-- UTF-8 XML characters by a character reference.
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- This function can be used to translate all text nodes and
|
|
Packit |
cc4c63 |
-- attribute values into ISO latin1.
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- see also : 'unicodeToXmlEntity'
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeToLatin1 :: UString -> String
|
|
Packit |
cc4c63 |
unicodeToLatin1
|
|
Packit |
cc4c63 |
= escape isXmlLatin1Char (intToCharRef . fromEnum)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- substitute selected characters
|
|
Packit |
cc4c63 |
-- The @check@ function returns 'True' whenever a character needs to substitution
|
|
Packit |
cc4c63 |
-- The function @esc@ computes a substitute.
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
escape :: (Unicode -> Bool) -> (Unicode -> String) -> UString -> String
|
|
Packit |
cc4c63 |
escape check esc =
|
|
Packit |
cc4c63 |
concatMap (\uc -> if check uc then [uc] else esc uc)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- removes all non ascii chars, may be used to transform
|
|
Packit |
cc4c63 |
-- a document into a pure ascii representation by removing
|
|
Packit |
cc4c63 |
-- all non ascii chars from tag and attibute names
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- see also : 'unicodeRemoveNoneLatin1', 'unicodeToXmlEntity'
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeRemoveNoneAscii :: UString -> String
|
|
Packit |
cc4c63 |
unicodeRemoveNoneAscii
|
|
Packit |
cc4c63 |
= filter isXml1ByteChar
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- removes all non latin1 chars, may be used to transform
|
|
Packit |
cc4c63 |
-- a document into a pure ascii representation by removing
|
|
Packit |
cc4c63 |
-- all non ascii chars from tag and attibute names
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- see also : 'unicodeRemoveNoneAscii', 'unicodeToLatin1'
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeRemoveNoneLatin1 :: UString -> String
|
|
Packit |
cc4c63 |
unicodeRemoveNoneLatin1
|
|
Packit |
cc4c63 |
= filter isXmlLatin1Char
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- convert an Unicode into a XML character reference.
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- see also : 'intToCharRefHex'
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
intToCharRef :: Int -> String
|
|
Packit |
cc4c63 |
intToCharRef i
|
|
Packit |
cc4c63 |
= "&#" ++ show i ++ ";"
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- convert an Unicode into a XML hexadecimal character reference.
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- see also: 'intToCharRef'
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
intToCharRefHex :: Int -> String
|
|
Packit |
cc4c63 |
intToCharRefHex i
|
|
Packit |
cc4c63 |
= "&#x" ++ h2 ++ ";"
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
h1 = intToHexString i
|
|
Packit |
cc4c63 |
h2 = if length h1 `mod` 2 == 1
|
|
Packit |
cc4c63 |
then '0': h1
|
|
Packit |
cc4c63 |
else h1
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
intToHexString :: Int -> String
|
|
Packit |
cc4c63 |
intToHexString i
|
|
Packit |
cc4c63 |
| i == 0
|
|
Packit |
cc4c63 |
= "0"
|
|
Packit |
cc4c63 |
| i > 0
|
|
Packit |
cc4c63 |
= intToStr i
|
|
Packit |
cc4c63 |
| otherwise
|
|
Packit |
cc4c63 |
= error ("intToHexString: negative argument " ++ show i)
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
intToStr 0 = ""
|
|
Packit |
cc4c63 |
intToStr i' = intToStr (i' `div` 16) ++ [fourBitsToChar (i' `mod` 16)]
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
fourBitsToChar :: Int -> Char
|
|
Packit |
cc4c63 |
fourBitsToChar i = "0123456789ABCDEF" !! i
|
|
Packit |
cc4c63 |
{-# INLINE fourBitsToChar #-}
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- | White Space (XML Standard 2.3) and
|
|
Packit |
cc4c63 |
-- end of line handling (2.11)
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
-- \#x0D and \#x0D\#x0A are mapped to \#x0A
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
normalizeNL :: String -> String
|
|
Packit |
cc4c63 |
normalizeNL ('\r' : '\n' : rest) = '\n' : normalizeNL rest
|
|
Packit |
cc4c63 |
normalizeNL ('\r' : rest) = '\n' : normalizeNL rest
|
|
Packit |
cc4c63 |
normalizeNL (c : rest) = c : normalizeNL rest
|
|
Packit |
cc4c63 |
normalizeNL [] = []
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- the table of supported character encoding schemes and the associated
|
|
Packit |
cc4c63 |
-- conversion functions into Unicode:q
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
{-
|
|
Packit |
cc4c63 |
This table could be derived from decodingTableEither,
|
|
Packit |
cc4c63 |
but this way it is certainly more efficient.
|
|
Packit |
cc4c63 |
-}
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
decodingTable :: [(String, DecodingFct)]
|
|
Packit |
cc4c63 |
decodingTable
|
|
Packit |
cc4c63 |
= [ (utf8, utf8ToUnicode )
|
|
Packit |
cc4c63 |
, (isoLatin1, liftDecFct latin1ToUnicode )
|
|
Packit |
cc4c63 |
, (usAscii, decodeAscii )
|
|
Packit |
cc4c63 |
, (ucs2, liftDecFct ucs2ToUnicode )
|
|
Packit |
cc4c63 |
, (utf16, liftDecFct ucs2ToUnicode )
|
|
Packit |
cc4c63 |
, (utf16be, liftDecFct utf16beToUnicode )
|
|
Packit |
cc4c63 |
, (utf16le, liftDecFct utf16leToUnicode )
|
|
Packit |
cc4c63 |
, (iso8859_2, liftDecFct (latinToUnicode iso_8859_2) )
|
|
Packit |
cc4c63 |
, (iso8859_3, liftDecFct (latinToUnicode iso_8859_3) )
|
|
Packit |
cc4c63 |
, (iso8859_4, liftDecFct (latinToUnicode iso_8859_4) )
|
|
Packit |
cc4c63 |
, (iso8859_5, liftDecFct (latinToUnicode iso_8859_5) )
|
|
Packit |
cc4c63 |
, (iso8859_6, liftDecFct (latinToUnicode iso_8859_6) )
|
|
Packit |
cc4c63 |
, (iso8859_7, liftDecFct (latinToUnicode iso_8859_7) )
|
|
Packit |
cc4c63 |
, (iso8859_8, liftDecFct (latinToUnicode iso_8859_8) )
|
|
Packit |
cc4c63 |
, (iso8859_9, liftDecFct (latinToUnicode iso_8859_9) )
|
|
Packit |
cc4c63 |
, (iso8859_10, liftDecFct (latinToUnicode iso_8859_10) )
|
|
Packit |
cc4c63 |
, (iso8859_11, liftDecFct (latinToUnicode iso_8859_11) )
|
|
Packit |
cc4c63 |
, (iso8859_13, liftDecFct (latinToUnicode iso_8859_13) )
|
|
Packit |
cc4c63 |
, (iso8859_14, liftDecFct (latinToUnicode iso_8859_14) )
|
|
Packit |
cc4c63 |
, (iso8859_15, liftDecFct (latinToUnicode iso_8859_15) )
|
|
Packit |
cc4c63 |
, (iso8859_16, liftDecFct (latinToUnicode iso_8859_16) )
|
|
Packit |
cc4c63 |
, (unicodeString, liftDecFct id )
|
|
Packit |
cc4c63 |
, ("", liftDecFct id ) -- default
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
liftDecFct df = \ s -> (df s, [])
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- the lookup function for selecting the decoding function
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
getDecodingFct :: String -> Maybe DecodingFct
|
|
Packit |
cc4c63 |
getDecodingFct enc
|
|
Packit |
cc4c63 |
= lookup (map toUpper enc) decodingTable
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- Similar to 'decodingTable' but it embeds errors
|
|
Packit |
cc4c63 |
-- in the string of decoded characters.
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
decodingTableEmbedErrors :: [(String, DecodingFctEmbedErrors)]
|
|
Packit |
cc4c63 |
decodingTableEmbedErrors
|
|
Packit |
cc4c63 |
= [ (utf8, utf8ToUnicodeEmbedErrors )
|
|
Packit |
cc4c63 |
, (isoLatin1, liftDecFct latin1ToUnicode )
|
|
Packit |
cc4c63 |
, (usAscii, decodeAsciiEmbedErrors )
|
|
Packit |
cc4c63 |
, (ucs2, liftDecFct ucs2ToUnicode )
|
|
Packit |
cc4c63 |
, (utf16, liftDecFct ucs2ToUnicode )
|
|
Packit |
cc4c63 |
, (utf16be, liftDecFct utf16beToUnicode )
|
|
Packit |
cc4c63 |
, (utf16le, liftDecFct utf16leToUnicode )
|
|
Packit |
cc4c63 |
, (iso8859_2, liftDecFct (latinToUnicode iso_8859_2) )
|
|
Packit |
cc4c63 |
, (iso8859_3, liftDecFct (latinToUnicode iso_8859_3) )
|
|
Packit |
cc4c63 |
, (iso8859_4, liftDecFct (latinToUnicode iso_8859_4) )
|
|
Packit |
cc4c63 |
, (iso8859_5, liftDecFct (latinToUnicode iso_8859_5) )
|
|
Packit |
cc4c63 |
, (iso8859_6, liftDecFct (latinToUnicode iso_8859_6) )
|
|
Packit |
cc4c63 |
, (iso8859_7, liftDecFct (latinToUnicode iso_8859_7) )
|
|
Packit |
cc4c63 |
, (iso8859_8, liftDecFct (latinToUnicode iso_8859_8) )
|
|
Packit |
cc4c63 |
, (iso8859_9, liftDecFct (latinToUnicode iso_8859_9) )
|
|
Packit |
cc4c63 |
, (iso8859_10, liftDecFct (latinToUnicode iso_8859_10) )
|
|
Packit |
cc4c63 |
, (iso8859_11, liftDecFct (latinToUnicode iso_8859_11) )
|
|
Packit |
cc4c63 |
, (iso8859_13, liftDecFct (latinToUnicode iso_8859_13) )
|
|
Packit |
cc4c63 |
, (iso8859_14, liftDecFct (latinToUnicode iso_8859_14) )
|
|
Packit |
cc4c63 |
, (iso8859_15, liftDecFct (latinToUnicode iso_8859_15) )
|
|
Packit |
cc4c63 |
, (iso8859_16, liftDecFct (latinToUnicode iso_8859_16) )
|
|
Packit |
cc4c63 |
, (unicodeString, liftDecFct id )
|
|
Packit |
cc4c63 |
, ("", liftDecFct id ) -- default
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
liftDecFct df = map Right . df
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- the lookup function for selecting the decoding function
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
getDecodingFctEmbedErrors :: String -> Maybe DecodingFctEmbedErrors
|
|
Packit |
cc4c63 |
getDecodingFctEmbedErrors enc
|
|
Packit |
cc4c63 |
= lookup (map toUpper enc) decodingTableEmbedErrors
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- the table of supported output encoding schemes and the associated
|
|
Packit |
cc4c63 |
-- conversion functions from Unicode
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
outputEncodingTable :: [(String, (UString -> String))]
|
|
Packit |
cc4c63 |
outputEncodingTable
|
|
Packit |
cc4c63 |
= [ (utf8, unicodeToUtf8 )
|
|
Packit |
cc4c63 |
, (isoLatin1, unicodeToLatin1 )
|
|
Packit |
cc4c63 |
, (usAscii, unicodeToXmlEntity )
|
|
Packit |
cc4c63 |
, (unicodeString, id )
|
|
Packit |
cc4c63 |
, ("", unicodeToUtf8 ) -- default
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- the lookup function for selecting the encoding function
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
getOutputEncodingFct :: String -> Maybe (String -> UString)
|
|
Packit |
cc4c63 |
getOutputEncodingFct enc
|
|
Packit |
cc4c63 |
= lookup (map toUpper enc) outputEncodingTable
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
--
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding :: String -> String
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding ('\xFF':'\xFE':'\x00':'\x00':_) = "UCS-4LE" -- with byte order mark
|
|
Packit |
cc4c63 |
guessEncoding ('\xFF':'\xFE':_) = "UTF-16LE" -- with byte order mark
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding ('\xFE':'\xFF':'\x00':'\x00':_) = "UCS-4-3421" -- with byte order mark
|
|
Packit |
cc4c63 |
guessEncoding ('\xFE':'\xFF':_) = "UTF-16BE" -- with byte order mark
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding ('\xEF':'\xBB':'\xBF':_) = utf8 -- with byte order mark
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding ('\x00':'\x00':'\xFE':'\xFF':_) = "UCS-4BE" -- with byte order mark
|
|
Packit |
cc4c63 |
guessEncoding ('\x00':'\x00':'\xFF':'\xFE':_) = "UCS-4-2143" -- with byte order mark
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding ('\x00':'\x00':'\x00':'\x3C':_) = "UCS-4BE" -- "<" of "
|
|
Packit |
cc4c63 |
guessEncoding ('\x3C':'\x00':'\x00':'\x00':_) = "UCS-4LE" -- "<" of "
|
|
Packit |
cc4c63 |
guessEncoding ('\x00':'\x00':'\x3C':'\x00':_) = "UCS-4-2143" -- "<" of "
|
|
Packit |
cc4c63 |
guessEncoding ('\x00':'\x3C':'\x00':'\x00':_) = "UCS-4-3412" -- "<" of "
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding ('\x00':'\x3C':'\x00':'\x3F':_) = "UTF-16BE" -- "
|
|
Packit |
cc4c63 |
guessEncoding ('\x3C':'\x00':'\x3F':'\x00':_) = "UTF-16LE" -- "
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding ('\x4C':'\x6F':'\xA7':'\x94':_) = "EBCDIC" -- "
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
guessEncoding _ = "" -- no guess
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
swap :: (a,b) -> (b,a)
|
|
Packit |
cc4c63 |
swap (x,y) = (y,x)
|
|
Packit |
cc4c63 |
{-# INLINE swap #-}
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
partitionEither :: [Either a b] -> ([a], [b])
|
|
Packit |
cc4c63 |
partitionEither =
|
|
Packit |
cc4c63 |
foldr (\x ~(ls,rs) -> either (\l -> (l:ls,rs)) (\r -> (ls,r:rs)) x) ([],[])
|
|
Packit |
cc4c63 |
{-# INLINE partitionEither #-}
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- output encoding for bytestrings
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- the table of supported output encoding schemes and the associated
|
|
Packit |
cc4c63 |
-- conversion functions from Unicode
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
type StringFct = String -> String
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
outputEncodingTable' :: [(String, (Char -> StringFct))]
|
|
Packit |
cc4c63 |
outputEncodingTable'
|
|
Packit |
cc4c63 |
= [ (utf8, unicodeCharToUtf8' )
|
|
Packit |
cc4c63 |
, (isoLatin1, unicodeCharToLatin1' )
|
|
Packit |
cc4c63 |
, (usAscii, unicodeCharToXmlEntity' )
|
|
Packit |
cc4c63 |
, ("", unicodeCharToUtf8' ) -- default
|
|
Packit |
cc4c63 |
]
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- the lookup function for selecting the encoding function
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
getOutputEncodingFct' :: String -> Maybe (Char -> StringFct)
|
|
Packit |
cc4c63 |
getOutputEncodingFct' enc
|
|
Packit |
cc4c63 |
= lookup (map toUpper enc) outputEncodingTable'
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- conversion from Unicode (Char) to a UTF8 encoded string.
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeCharToUtf8' :: Char -> StringFct
|
|
Packit |
cc4c63 |
unicodeCharToUtf8' c
|
|
Packit |
cc4c63 |
| i >= 0 && i <= 0x0000007F -- 1 byte UTF8 (7 bits)
|
|
Packit |
cc4c63 |
= (c :)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
| i >= 0x00000080 && i <= 0x000007FF -- 2 byte UTF8 (5 + 6 bits)
|
|
Packit |
cc4c63 |
= ((toEnum (0xC0 + i `div` 0x40) ) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + i `mod` 0x40)) :)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
| i >= 0x00000800 && i <= 0x0000FFFF -- 3 byte UTF8 (4 + 6 + 6 bits)
|
|
Packit |
cc4c63 |
= ((toEnum (0xE0 + i `div` 0x1000) ) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x40) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + i `mod` 0x40)) :)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
| i >= 0x00010000 && i <= 0x001FFFFF -- 4 byte UTF8 (3 + 6 + 6 + 6 bits) -- extension to encode 21 bit values
|
|
Packit |
cc4c63 |
= ((toEnum (0xF0 + i `div` 0x40000) ) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x1000) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x40) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + i `mod` 0x40)) :)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
| i >= 0x00200000 && i <= 0x03FFFFFF -- 5 byte UTF8 (2 + 6 + 6 + 6 + 6 bits) -- extension to encode 26 bit values
|
|
Packit |
cc4c63 |
= ((toEnum (0xF8 + i `div` 0x1000000) ) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x40000) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x1000) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x40) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + i `mod` 0x40)) :)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
| i >= 0x04000000 && i <= 0x7FFFFFFF -- 6 byte UTF8 (1 + 6 + 6 + 6 + 6 + 6 bits) -- extension to encode 31 bit values
|
|
Packit |
cc4c63 |
= ((toEnum (0xFC + i `div` 0x40000000) ) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x1000000) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x40000) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x1000) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + (i `div` 0x40) `mod` 0x40)) :) .
|
|
Packit |
cc4c63 |
((toEnum (0x80 + i `mod` 0x40)) :)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
| otherwise -- other values not supported
|
|
Packit |
cc4c63 |
= error ("unicodeCharToUtf8: illegal integer argument " ++ show i)
|
|
Packit |
cc4c63 |
where
|
|
Packit |
cc4c63 |
i = fromEnum c
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- substitute all Unicode characters, that are not legal 1-byte
|
|
Packit |
cc4c63 |
-- UTF-8 XML characters by a character reference.
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeCharToXmlEntity' :: Char -> StringFct
|
|
Packit |
cc4c63 |
unicodeCharToXmlEntity' c
|
|
Packit |
cc4c63 |
| isXml1ByteChar c = (c :)
|
|
Packit |
cc4c63 |
| otherwise = ((intToCharRef . fromEnum $ c) ++)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- |
|
|
Packit |
cc4c63 |
-- substitute all Unicode characters, that are not legal latin1
|
|
Packit |
cc4c63 |
-- UTF-8 XML characters by a character reference.
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
unicodeCharToLatin1' :: Char -> StringFct
|
|
Packit |
cc4c63 |
unicodeCharToLatin1' c
|
|
Packit |
cc4c63 |
| isXmlLatin1Char c = (c :)
|
|
Packit |
cc4c63 |
| otherwise = ((intToCharRef . fromEnum $ c) ++)
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
-- ------------------------------------------------------------
|
|
Packit |
cc4c63 |
|
|
Packit |
cc4c63 |
|