Blame src/Text/Pandoc/BCP47.hs

Packit Service d2f85f
{-
Packit Service d2f85f
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
Packit Service d2f85f
Packit Service d2f85f
This program is free software; you can redistribute it and/or modify
Packit Service d2f85f
it under the terms of the GNU General Public License as published by
Packit Service d2f85f
the Free Software Foundation; either version 2 of the License, or
Packit Service d2f85f
(at your option) any later version.
Packit Service d2f85f
Packit Service d2f85f
This program is distributed in the hope that it will be useful,
Packit Service d2f85f
but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service d2f85f
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service d2f85f
GNU General Public License for more details.
Packit Service d2f85f
Packit Service d2f85f
You should have received a copy of the GNU General Public License
Packit Service d2f85f
along with this program; if not, write to the Free Software
Packit Service d2f85f
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
Packit Service d2f85f
-}
Packit Service d2f85f
Packit Service d2f85f
{- |
Packit Service d2f85f
   Module      : Text.Pandoc.BCP47
Packit Service d2f85f
   Copyright   : Copyright (C) 2017 John MacFarlane
Packit Service d2f85f
   License     : GNU GPL, version 2 or above
Packit Service d2f85f
Packit Service d2f85f
   Maintainer  : John MacFarlane <jgm@berkeley.edu>
Packit Service d2f85f
   Stability   : alpha
Packit Service d2f85f
   Portability : portable
Packit Service d2f85f
Packit Service d2f85f
Functions for parsing and rendering BCP47 language identifiers.
Packit Service d2f85f
-}
Packit Service d2f85f
module Text.Pandoc.BCP47 (
Packit Service d2f85f
                       getLang
Packit Service d2f85f
                     , parseBCP47
Packit Service d2f85f
                     , Lang(..)
Packit Service d2f85f
                     , renderLang
Packit Service d2f85f
                     )
Packit Service d2f85f
where
Packit Service d2f85f
import Control.Monad (guard)
Packit Service d2f85f
import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper, toLower,
Packit Service d2f85f
                  toUpper)
Packit Service d2f85f
import Data.List (intercalate)
Packit Service d2f85f
import Text.Pandoc.Definition
Packit Service d2f85f
import Text.Pandoc.Options
Packit Service d2f85f
import qualified Text.Parsec as P
Packit Service d2f85f
Packit Service d2f85f
-- | Represents BCP 47 language/country code.
Packit Service d2f85f
data Lang = Lang{ langLanguage :: String
Packit Service d2f85f
                , langScript   :: String
Packit Service d2f85f
                , langRegion   :: String
Packit Service d2f85f
                , langVariants :: [String] }
Packit Service d2f85f
                deriving (Eq, Ord, Show)
Packit Service d2f85f
Packit Service d2f85f
-- | Render a Lang as BCP 47.
Packit Service d2f85f
renderLang :: Lang -> String
Packit Service d2f85f
renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
Packit Service d2f85f
                    ([langScript lang, langRegion lang] ++ langVariants lang))
Packit Service d2f85f
Packit Service d2f85f
-- | Get the contents of the `lang` metadata field or variable.
Packit Service d2f85f
getLang :: WriterOptions -> Meta -> Maybe String
Packit Service d2f85f
getLang opts meta =
Packit Service d2f85f
  case lookup "lang" (writerVariables opts) of
Packit Service d2f85f
        Just s -> Just s
Packit Service d2f85f
        _      ->
Packit Service d2f85f
          case lookupMeta "lang" meta of
Packit Service d2f85f
               Just (MetaInlines [Str s]) -> Just s
Packit Service d2f85f
               Just (MetaString s)        -> Just s
Packit Service d2f85f
               _                          -> Nothing
Packit Service d2f85f
Packit Service d2f85f
-- | Parse a BCP 47 string as a Lang.  Currently we parse
Packit Service d2f85f
-- extensions and private-use fields as "variants," even
Packit Service d2f85f
-- though officially they aren't.
Packit Service d2f85f
parseBCP47 :: String -> Either String Lang
Packit Service d2f85f
parseBCP47 lang =
Packit Service d2f85f
  case P.parse bcp47 "lang" lang of
Packit Service d2f85f
       Right r -> Right r
Packit Service d2f85f
       Left e  -> Left $ show e
Packit Service d2f85f
  where bcp47 = do
Packit Service d2f85f
          language <- pLanguage
Packit Service d2f85f
          script <- P.option "" pScript
Packit Service d2f85f
          region <- P.option "" pRegion
Packit Service d2f85f
          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
Packit Service d2f85f
          P.eof
Packit Service d2f85f
          return Lang{   langLanguage = language
Packit Service d2f85f
                       , langScript = script
Packit Service d2f85f
                       , langRegion = region
Packit Service d2f85f
                       , langVariants = variants }
Packit Service d2f85f
        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
Packit Service d2f85f
        pLanguage = do
Packit Service d2f85f
          cs <- P.many1 asciiLetter
Packit Service d2f85f
          let lcs = length cs
Packit Service d2f85f
          guard $ lcs == 2 || lcs == 3
Packit Service d2f85f
          return $ map toLower cs
Packit Service d2f85f
        pScript = P.try $ do
Packit Service d2f85f
          P.char '-'
Packit Service d2f85f
          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
Packit Service d2f85f
          xs <- P.count 3
Packit Service d2f85f
                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
Packit Service d2f85f
          return $ map toLower (x:xs)
Packit Service d2f85f
        pRegion = P.try $ do
Packit Service d2f85f
          P.char '-'
Packit Service d2f85f
          cs <- P.many1 asciiLetter
Packit Service d2f85f
          let lcs = length cs
Packit Service d2f85f
          guard $ lcs == 2 || lcs == 3
Packit Service d2f85f
          return $ map toUpper cs
Packit Service d2f85f
        pVariant = P.try $ do
Packit Service d2f85f
          P.char '-'
Packit Service d2f85f
          ds <- P.option "" (P.count 1 P.digit)
Packit Service d2f85f
          cs <- P.many1 asciiLetter
Packit Service d2f85f
          let var = ds ++ cs
Packit Service d2f85f
          guard $ if null ds
Packit Service d2f85f
                     then length var >= 5 && length var <= 8
Packit Service d2f85f
                     else length var == 4
Packit Service d2f85f
          return $ map toLower var
Packit Service d2f85f
        pExtension = P.try $ do
Packit Service d2f85f
          P.char '-'
Packit Service d2f85f
          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
Packit Service d2f85f
          guard $ length cs >= 2 && length cs <= 8
Packit Service d2f85f
          return $ map toLower cs
Packit Service d2f85f
        pPrivateUse = P.try $ do
Packit Service d2f85f
          P.char '-'
Packit Service d2f85f
          P.char 'x'
Packit Service d2f85f
          P.char '-'
Packit Service d2f85f
          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
Packit Service d2f85f
          guard $ not (null cs) && length cs <= 8
Packit Service d2f85f
          let var = "x-" ++ cs
Packit Service d2f85f
          return $ map toLower var