|
Packit |
5b08af |
{-# LANGUAGE ExplicitForAll #-}
|
|
Packit |
5b08af |
{-# LANGUAGE Rank2Types #-}
|
|
Packit |
5b08af |
{-# LANGUAGE ScopedTypeVariables #-}
|
|
Packit |
5b08af |
{-# LANGUAGE ViewPatterns #-}
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
module Main
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
import Control.Arrow ( (***), second )
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
import Data.Char ( toLower, toUpper )
|
|
Packit |
5b08af |
import Data.List ( isPrefixOf )
|
|
Packit |
5b08af |
import qualified Data.ByteString.Char8 as B
|
|
Packit |
5b08af |
import qualified Data.ByteString.Lazy.Char8 as BL
|
|
Packit |
5b08af |
import Data.String (IsString (..))
|
|
Packit |
5b08af |
import qualified Data.Text as T
|
|
Packit |
5b08af |
import qualified Data.Text.Lazy as TL
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
import System.Exit (ExitCode (..), exitWith)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
import Test.HUnit
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
import Text.Parsec
|
|
Packit |
5b08af |
import Text.Regex.XMLSchema.Generic
|
|
Packit |
5b08af |
import Text.Regex.XMLSchema.Generic.StringLike
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
newtype Test' a = Test' {unTest' :: Test}
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
type BS = B.ByteString
|
|
Packit |
5b08af |
type BL = BL.ByteString
|
|
Packit |
5b08af |
type Text = T.Text
|
|
Packit |
5b08af |
type TextL = TL.Text
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
-- some little helpers for building r.e.s
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
star :: String -> String
|
|
Packit |
5b08af |
star = (++ "*") . pars
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
plus :: String -> String
|
|
Packit |
5b08af |
plus = (++ "+") . pars
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
opt :: String -> String
|
|
Packit |
5b08af |
opt = (++ "?") . pars
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dot :: String -> String
|
|
Packit |
5b08af |
dot = (++ "\\.")
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
pars :: String -> String
|
|
Packit |
5b08af |
pars = ("(" ++) . (++ ")")
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
orr :: String -> String -> String
|
|
Packit |
5b08af |
orr x y = pars $ pars x ++ "|" ++ pars y
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
xor :: String -> String -> String
|
|
Packit |
5b08af |
xor x y = pars $ pars x ++ "{|}" ++ pars y
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
nocase :: String -> String
|
|
Packit |
5b08af |
nocase (x:xs) = '[' : toUpper x : toLower x : ']' : xs
|
|
Packit |
5b08af |
nocase [] = error "nocase with empty list"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
alt :: [String] -> String
|
|
Packit |
5b08af |
alt = pars . foldr1 orr
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
altNC :: [String] -> String
|
|
Packit |
5b08af |
altNC = pars . alt . map nocase
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
subex :: String -> String -> String
|
|
Packit |
5b08af |
subex n e = pars $ "{" ++ n ++ "}" ++ pars e
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
ws :: String
|
|
Packit |
5b08af |
ws = "\\s"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
ws0 :: String
|
|
Packit |
5b08af |
ws0 = star ws
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
ws1 :: String
|
|
Packit |
5b08af |
ws1 = plus ws
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
s0 :: String -> String -> String
|
|
Packit |
5b08af |
s0 x y = x ++ ws0 ++ y
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- the date and time r.e.s
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
day :: String
|
|
Packit |
5b08af |
day = "(0?[1-9]|[12][0-9]|3[01])"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
month :: String
|
|
Packit |
5b08af |
month = "(0?[1-9]|1[0-2])"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
year2 :: String
|
|
Packit |
5b08af |
year2 = "[0-5][0-9]"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
year4 :: String
|
|
Packit |
5b08af |
year4 = "20" ++ year2
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
year :: String
|
|
Packit |
5b08af |
year = year4 `orr` year2
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
year' :: String
|
|
Packit |
5b08af |
year' = "'" ++ year2
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dayD :: String
|
|
Packit |
5b08af |
dayD = dot day
|
|
Packit |
5b08af |
monthD :: String
|
|
Packit |
5b08af |
monthD = dot month
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dayMonthYear :: String
|
|
Packit |
5b08af |
dayMonthYear = dayD `s0` monthD `s0` year
|
|
Packit |
5b08af |
dayMonth :: String
|
|
Packit |
5b08af |
dayMonth = dayD `s0` monthD
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dayOfWeekL :: String
|
|
Packit |
5b08af |
dayOfWeekL = altNC
|
|
Packit |
5b08af |
[ "montag"
|
|
Packit |
5b08af |
, "dienstag"
|
|
Packit |
5b08af |
, "mittwoch"
|
|
Packit |
5b08af |
, "donnerstag"
|
|
Packit |
5b08af |
, "freitag"
|
|
Packit |
5b08af |
, "samstag"
|
|
Packit |
5b08af |
, "sonnabend"
|
|
Packit |
5b08af |
, "sonntag"
|
|
Packit |
5b08af |
]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dayOfWeekA :: String
|
|
Packit |
5b08af |
dayOfWeekA = alt . map dot $
|
|
Packit |
5b08af |
[ "Mo", "Di", "Mi", "Do", "Fr", "Sa", "So"]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dayOfWeek :: String
|
|
Packit |
5b08af |
dayOfWeek = dayOfWeekL `orr` dayOfWeekA
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
monthL :: String
|
|
Packit |
5b08af |
monthL = altNC
|
|
Packit |
5b08af |
[ "januar"
|
|
Packit |
5b08af |
, "februar"
|
|
Packit |
5b08af |
, "märz"
|
|
Packit |
5b08af |
, "april"
|
|
Packit |
5b08af |
, "mai"
|
|
Packit |
5b08af |
, "juni"
|
|
Packit |
5b08af |
, "juli"
|
|
Packit |
5b08af |
, "august"
|
|
Packit |
5b08af |
, "september"
|
|
Packit |
5b08af |
, "oktober"
|
|
Packit |
5b08af |
, "november"
|
|
Packit |
5b08af |
, "dezember"
|
|
Packit |
5b08af |
]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
monthA :: String
|
|
Packit |
5b08af |
monthA = altNC . map dot $ map snd monthAbr
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
monthAbr :: [(Integer, String)]
|
|
Packit |
5b08af |
monthAbr = (9, "sept") :
|
|
Packit |
5b08af |
zip [1..12]
|
|
Packit |
5b08af |
[ "jan", "feb", "mär", "apr", "mai", "jun", "jul", "aug", "sep", "okt", "nov", "dez"]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
monthN :: String
|
|
Packit |
5b08af |
monthN = pars $ monthL `orr` monthA
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
hour :: String
|
|
Packit |
5b08af |
hour = pars "([0-1]?[0-9])|(2[0-4])"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
minute :: String
|
|
Packit |
5b08af |
minute = pars "(0?[0-9])|([1-5][0-9])"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
uhr :: String
|
|
Packit |
5b08af |
uhr = ws0 ++ nocase "uhr"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
hourMin :: String
|
|
Packit |
5b08af |
hourMin = hour ++ ":" ++ minute ++ opt uhr
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
wsyear :: String
|
|
Packit |
5b08af |
wsyear = year ++ "/[0-9]{2}"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
wsem :: String
|
|
Packit |
5b08af |
wsem = ("Wi?Se?" `orr` nocase "Wintersemester") ++ ws0 ++ wsyear
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
ssem :: String
|
|
Packit |
5b08af |
ssem = ("So?Se?" `orr` nocase "Sommersemester") ++ ws0 ++ year
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
sem :: String
|
|
Packit |
5b08af |
sem = wsem `orr` ssem
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
num :: String
|
|
Packit |
5b08af |
num = "\\d+"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- the token types
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tokenRE :: String
|
|
Packit |
5b08af |
tokenRE = foldr1 xor $
|
|
Packit |
5b08af |
map (uncurry subex) $
|
|
Packit |
5b08af |
[ ( "ddmmyyyy", dayMonthYear )
|
|
Packit |
5b08af |
, ( "ddMonthyyyy", dayD `s0` monthN `s0` (year `orr` year') )
|
|
Packit |
5b08af |
, ( "ddmm", dayMonth)
|
|
Packit |
5b08af |
, ( "ddMonth", dayD `s0` monthN )
|
|
Packit |
5b08af |
, ( "yyyymmdd", year ++ "[-/]" ++ month ++ "[-/]" ++ day )
|
|
Packit |
5b08af |
, ( "yyyy", year4 `orr` ("'" ++ year2) )
|
|
Packit |
5b08af |
, ( "month", monthN )
|
|
Packit |
5b08af |
, ( "weekday", dayOfWeek )
|
|
Packit |
5b08af |
, ( "HHMM", hourMin ++ opt uhr )
|
|
Packit |
5b08af |
, ( "HH", hour ++ uhr )
|
|
Packit |
5b08af |
, ( "wsem", wsem)
|
|
Packit |
5b08af |
, ( "ssem", ssem)
|
|
Packit |
5b08af |
, ( "word", "[\\w\\d]+")
|
|
Packit |
5b08af |
, ( "del", "[^\\w\\d]+")
|
|
Packit |
5b08af |
]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
type Token = (String, String)
|
|
Packit |
5b08af |
type TokenStream = [Token]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
type DateParser a = Parsec [(String, String)] () a
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
type StringFct = String -> String -- for fast concatenation
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- must be extended for weekday or semester, if neccessay
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
data DateVal = DT { _year :: ! Int
|
|
Packit |
5b08af |
, _month :: ! Int
|
|
Packit |
5b08af |
, _day :: ! Int
|
|
Packit |
5b08af |
, _hour :: ! Int
|
|
Packit |
5b08af |
, _min :: ! Int
|
|
Packit |
5b08af |
}
|
|
Packit |
5b08af |
deriving (Eq, Show)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
data DateParse = DP { _pre :: StringFct
|
|
Packit |
5b08af |
, _rep :: StringFct
|
|
Packit |
5b08af |
, _dat :: ! DateVal
|
|
Packit |
5b08af |
}
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- just a helper for result output
|
|
Packit |
5b08af |
data DateRep = DR { _p :: String
|
|
Packit |
5b08af |
, _r :: String
|
|
Packit |
5b08af |
, _d :: ! DateVal
|
|
Packit |
5b08af |
}
|
|
Packit |
5b08af |
deriving (Eq, Show)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
emptyText :: StringFct
|
|
Packit |
5b08af |
emptyText = id
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
mkText :: String -> StringFct
|
|
Packit |
5b08af |
mkText = (++)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
concText :: StringFct -> StringFct -> StringFct
|
|
Packit |
5b08af |
concText = (.)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
textToString :: StringFct -> String
|
|
Packit |
5b08af |
textToString = ($ [])
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
emptyDateVal :: DateVal
|
|
Packit |
5b08af |
emptyDateVal = DT { _year = -1
|
|
Packit |
5b08af |
, _month = -1
|
|
Packit |
5b08af |
, _day = -1
|
|
Packit |
5b08af |
, _hour = -1
|
|
Packit |
5b08af |
, _min = -1
|
|
Packit |
5b08af |
}
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
emptyDateParse :: DateParse
|
|
Packit |
5b08af |
emptyDateParse = DP { _pre = emptyText
|
|
Packit |
5b08af |
, _rep = emptyText
|
|
Packit |
5b08af |
, _dat = emptyDateVal
|
|
Packit |
5b08af |
}
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
appPre :: String -> DateParse -> DateParse
|
|
Packit |
5b08af |
appPre s d = d { _pre = _pre d `concText` mkText s }
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
appRep :: String -> DateParse -> DateParse
|
|
Packit |
5b08af |
appRep s d = d { _rep = _rep d `concText` mkText s }
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
setDay :: Int -> Int -> Int -> DateParse -> DateParse
|
|
Packit |
5b08af |
setDay j m t d = d { _dat = setDateVal j m t (-1) (-1) (_dat d) }
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
setHour :: Int -> Int -> DateParse -> DateParse
|
|
Packit |
5b08af |
setHour h m d = d { _dat = setDateVal (-1) (-1) (-1) h m (_dat d) }
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
setDateVal :: Int -> Int -> Int -> Int -> Int -> DateVal -> DateVal
|
|
Packit |
5b08af |
setDateVal j m t s i (DT j' m' t' s' i' )
|
|
Packit |
5b08af |
= DT j'' m'' t'' s'' i''
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
j'' | j < 0 = j' -- year not there
|
|
Packit |
5b08af |
| j < 100 = j + 2000 -- 2 digit year
|
|
Packit |
5b08af |
| otherwise = j -- 4 digit year
|
|
Packit |
5b08af |
m'' = m `max` m'
|
|
Packit |
5b08af |
t'' = t `max` t'
|
|
Packit |
5b08af |
s'' = s `max` s'
|
|
Packit |
5b08af |
i'' = i `max` i'
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
datePToDateRep :: DateParse -> DateRep
|
|
Packit |
5b08af |
datePToDateRep dp
|
|
Packit |
5b08af |
= DR { _p = textToString $ _pre dp
|
|
Packit |
5b08af |
, _r = textToString $ _rep dp
|
|
Packit |
5b08af |
, _d = _dat dp
|
|
Packit |
5b08af |
}
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- a simple helper for showing the results
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dateSearch' :: TokenStream -> [DateRep]
|
|
Packit |
5b08af |
dateSearch' = map datePToDateRep .
|
|
Packit |
5b08af |
dateSearch
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- look for a sequence of date specs, the last entry in the list
|
|
Packit |
5b08af |
-- does not contain a valid date, but just the context behind the last real date
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dateSearch :: TokenStream -> [DateParse]
|
|
Packit |
5b08af |
dateSearch = either (const []) id .
|
|
Packit |
5b08af |
parse (many (dateParser emptyDateParse)) ""
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- all date parsers thread a state the subparsers to accumulate
|
|
Packit |
5b08af |
-- the parts of a date, the context, the external representation and
|
|
Packit |
5b08af |
-- the pure data, year, month, day, ...
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dateParser :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
dateParser d = ( do
|
|
Packit |
5b08af |
s <- fillTok
|
|
Packit |
5b08af |
dateParser0 (appPre s d)
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
<|>
|
|
Packit |
5b08af |
parseDate d -- here is the hook for the real date parser
|
|
Packit |
5b08af |
<|>
|
|
Packit |
5b08af |
( do
|
|
Packit |
5b08af |
s <- textTok -- the default case: if parseDate fails
|
|
Packit |
5b08af |
dateParser0 (appPre s d) -- the token is handled like a normal word
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dateParser0 :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
dateParser0 d = dateParser d <|> return d
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
parseDate :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
parseDate d = parseDate0 d
|
|
Packit |
5b08af |
<|>
|
|
Packit |
5b08af |
try
|
|
Packit |
5b08af |
( do
|
|
Packit |
5b08af |
d1 <- parseWeekDay d
|
|
Packit |
5b08af |
lookAheadN 3 parseDate0 d1 -- Freitag, den 13.
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- parse a date optionally followed by a time
|
|
Packit |
5b08af |
parseDate0 :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
parseDate0 d = ( do
|
|
Packit |
5b08af |
d1 <- parseDay d
|
|
Packit |
5b08af |
option d1 (parseFollowingHour d1)
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- parse a simple token for a day
|
|
Packit |
5b08af |
parseDay :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
parseDay d = ( do
|
|
Packit |
5b08af |
(s, d') <- parseDateTok "ddmmyyyy" d
|
|
Packit |
5b08af |
let [t, m, j] = tokenize num s
|
|
Packit |
5b08af |
return $ setDay (read j) (read m) (read t) d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
<|>
|
|
Packit |
5b08af |
( do
|
|
Packit |
5b08af |
(s, d') <- parseDateTok "ddMonthyyyy" d
|
|
Packit |
5b08af |
let s' = sed ((++ ".") . monthToM) monthN s
|
|
Packit |
5b08af |
let [t, m, j] = tokenize num s'
|
|
Packit |
5b08af |
return $ setDay (read j) (read m) (read t) d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
<|>
|
|
Packit |
5b08af |
( do
|
|
Packit |
5b08af |
(s, d') <- parseDateTok "ddmm" d
|
|
Packit |
5b08af |
let [t, m] = tokenize num s
|
|
Packit |
5b08af |
return $ setDay (-1) (read m) (read t) d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
<|>
|
|
Packit |
5b08af |
( do
|
|
Packit |
5b08af |
(s, d') <- parseDateTok "ddMonth" d
|
|
Packit |
5b08af |
let s' = sed ((++ ".") . monthToM) monthN s
|
|
Packit |
5b08af |
let [t, m] = tokenize num s'
|
|
Packit |
5b08af |
return $ setDay (-1) (read m) (read t) d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
<|>
|
|
Packit |
5b08af |
( do
|
|
Packit |
5b08af |
(s, d') <- parseDateTok "yyyymmdd" d
|
|
Packit |
5b08af |
let [j, m, t] = tokenize num s
|
|
Packit |
5b08af |
return $ setDay (read j) (read m) (read t) d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
parseYear :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
parseYear d = ( do
|
|
Packit |
5b08af |
(s, d') <- parseDateTok "yyyy" d
|
|
Packit |
5b08af |
let [j] = tokenize num s
|
|
Packit |
5b08af |
return $ setDay (read j) (-1) (-1) d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- parse a weekday and add it to the external rep.
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
parseWeekDay :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
parseWeekDay d = ( do
|
|
Packit |
5b08af |
(_s, d') <- parseDateTok "weekday" d
|
|
Packit |
5b08af |
return d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- parse a following hour spec, 5 fill tokens, words or delimiters are possible
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
parseFollowingHour :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
parseFollowingHour
|
|
Packit |
5b08af |
= try . -- backtracking becomes neccessary
|
|
Packit |
5b08af |
lookAheadN 5 parseHour -- max 2 words and 3 delimiters
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- parse the simple time formats
|
|
Packit |
5b08af |
parseHour :: DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
parseHour d = ( do
|
|
Packit |
5b08af |
(s, d') <- parseDateTok "HHMM" d
|
|
Packit |
5b08af |
let [h, m] = tokenize num s
|
|
Packit |
5b08af |
return $ setHour (read h) (read m) d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
<|>
|
|
Packit |
5b08af |
( do
|
|
Packit |
5b08af |
(s, d') <- parseDateTok "HH" d
|
|
Packit |
5b08af |
let [h] = tokenize num s
|
|
Packit |
5b08af |
return $ setHour (read h) 0 d'
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- auxiliary parser combinators
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- parse a token of a given type and add the text to the external rep.
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
parseDateTok :: String -> DateParse -> DateParser (String, DateParse)
|
|
Packit |
5b08af |
parseDateTok tty d
|
|
Packit |
5b08af |
= dateTok (isTokType (== tty)) d
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dateTok :: DateParser String -> DateParse -> DateParser (String, DateParse)
|
|
Packit |
5b08af |
dateTok t d = ( do
|
|
Packit |
5b08af |
s <- t
|
|
Packit |
5b08af |
return (s, appRep s d)
|
|
Packit |
5b08af |
)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- try to apply a parser, but first skip a given # of fill tokens
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
lookAheadN :: Int -> (DateParse -> DateParser DateParse) -> DateParse -> DateParser DateParse
|
|
Packit |
5b08af |
lookAheadN n p d
|
|
Packit |
5b08af |
| n <= 0 = p d
|
|
Packit |
5b08af |
| otherwise = do
|
|
Packit |
5b08af |
(_, d1) <- dateTok fillTok d
|
|
Packit |
5b08af |
( lookAheadN (n - 1) p d1 <|> p d1 )
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
--
|
|
Packit |
5b08af |
-- basic token parsers
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- the interface to the primitive parsec token parser
|
|
Packit |
5b08af |
tok :: (Token -> Bool) -> DateParser Token
|
|
Packit |
5b08af |
tok prd = tokenPrim showTok nextPos testTok
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
showTok = show . fst
|
|
Packit |
5b08af |
nextPos pos _tok _ts = incSourceColumn pos 1
|
|
Packit |
5b08af |
testTok tk = if prd tk then Just tk else Nothing
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- check for specific token type and in case of success return the text value
|
|
Packit |
5b08af |
isTokType :: (String -> Bool) -> DateParser String
|
|
Packit |
5b08af |
isTokType isT = tok (isT . fst) >>= return . snd
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- parse an arbitrary token and return the text value
|
|
Packit |
5b08af |
textTok :: DateParser String
|
|
Packit |
5b08af |
textTok = isTokType (const True)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- a word
|
|
Packit |
5b08af |
wordTok :: DateParser String
|
|
Packit |
5b08af |
wordTok = isTokType (== "word")
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- a delimiter, whitespace is normalized, sequences are reduced to a single space char
|
|
Packit |
5b08af |
delTok :: DateParser String
|
|
Packit |
5b08af |
delTok = isTokType (== "del")
|
|
Packit |
5b08af |
>>=
|
|
Packit |
5b08af |
return . sed (const " ") ws1
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- tokens that don't contain date info
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
fillTok :: DateParser String
|
|
Packit |
5b08af |
fillTok = delTok <|> wordTok
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- semester tokens, not yet interpreted
|
|
Packit |
5b08af |
semTok' :: String -> DateParser (String, Int, Bool)
|
|
Packit |
5b08af |
semTok' sem' = do v <- isTokType (== sem')
|
|
Packit |
5b08af |
return (v, read . head . tokenizeExt year $ v, sem' == "ssem")
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
semTok :: DateParser (String, Int, Bool)
|
|
Packit |
5b08af |
semTok = semTok' "ssem" <|> semTok' "wsem"
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- conversion from month names to 1..12
|
|
Packit |
5b08af |
monthToM :: String -> String
|
|
Packit |
5b08af |
monthToM m
|
|
Packit |
5b08af |
= show .
|
|
Packit |
5b08af |
(\ l -> if null l then 99 else head l) .
|
|
Packit |
5b08af |
map fst .
|
|
Packit |
5b08af |
filter ((== True) . snd) .
|
|
Packit |
5b08af |
map (second (`isPrefixOf` map toLower m)) $
|
|
Packit |
5b08af |
monthAbr
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
ts :: String
|
|
Packit |
5b08af |
ts = "Am Sonntag, dem 17. Februar '03 findet um 9 Uhr ein wichtiger Termin für das Sommersemester 2000 statt. "
|
|
Packit |
5b08af |
++ "Dieser wird allerdings auf Montag verschoben. Und zwar auf den ersten Montag im Wintersemester 11/12, 12:30. "
|
|
Packit |
5b08af |
++ "Ein wichtiger Termin findet im SoSe 2011 statt. Im Jahr '12 gibt es Termine, aber auch in WS 2010/11. "
|
|
Packit |
5b08af |
++ "Ein weiterer Termin ist am 2.4.11 um 12 Uhr. Oder war es doch Di. der 3.4.? Egal. "
|
|
Packit |
5b08af |
++ "Ein weiterer wichtiger Termin findet am 2001-3-4 statt bzw. generell zwischen 01/3/4 - 01/6/4 um 13 Uhr. "
|
|
Packit |
5b08af |
++ "Am kommenden Mittwoch findet Changemanagement in HS5 statt. Dies gilt dann auch für den 7. Juni "
|
|
Packit |
5b08af |
++ "des Jahres 2011. Noch ein wichtiger Termin findet um 16:15 Uhr am Do., 1.2.03 statt. "
|
|
Packit |
5b08af |
++ "Freitag, der 13. Juli ist kein Glückstag"
|
|
Packit |
5b08af |
++ "und Freitag, der 13. Juli um 11:55 Uhr ist es zu spät."
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
rrr :: [String]
|
|
Packit |
5b08af |
rrr = map _r . dateSearch' . tokenizeSubex tokenRE $ ts
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
ddd :: [DateVal]
|
|
Packit |
5b08af |
ddd = map _d . dateSearch' . tokenizeSubex tokenRE $ ts
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
aaa :: [DateRep]
|
|
Packit |
5b08af |
aaa = dateSearch' . tokenizeSubex tokenRE $ ts
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
tt :: String -> [(String, String)]
|
|
Packit |
5b08af |
tt = tokenizeSubex tokenRE
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
dd :: String -> [DateVal]
|
|
Packit |
5b08af |
dd = map _d . dateSearch' . tt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
rr :: String -> [String]
|
|
Packit |
5b08af |
rr = map _r . dateSearch' . tt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
pp :: String -> [String]
|
|
Packit |
5b08af |
pp = map _p . dateSearch' . tt
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
testDate :: forall a . StringLike a => Test' a
|
|
Packit |
5b08af |
testDate
|
|
Packit |
5b08af |
= Test' $
|
|
Packit |
5b08af |
TestLabel "date and time extraction from free text" $
|
|
Packit |
5b08af |
TestList $
|
|
Packit |
5b08af |
zipWith parseT toks exx
|
|
Packit |
5b08af |
where
|
|
Packit |
5b08af |
parseT res ok
|
|
Packit |
5b08af |
= TestCase $
|
|
Packit |
5b08af |
assertEqual (show res ++ " == " ++ show ok) res ok
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
toks :: [(a, a)]
|
|
Packit |
5b08af |
toks = tokenizeSubex (fromString tokenRE) (fromString ts)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
exx :: [(a, a)]
|
|
Packit |
5b08af |
exx = map (fromString *** fromString) $
|
|
Packit |
5b08af |
[("word","Am"),("del"," "),("weekday","Sonntag"),("del",", "),("word","dem"),("del"," ")
|
|
Packit |
5b08af |
,("ddMonthyyyy","17. Februar '03"),("del"," "),("word","findet"),("del"," "),("word","um")
|
|
Packit |
5b08af |
,("del"," "),("HH","9 Uhr"),("del"," "),("word","ein"),("del"," "),("word","wichtiger")
|
|
Packit |
5b08af |
,("del"," "),("word","Termin"),("del"," "),("word","f\252r"),("del"," "),("word","das")
|
|
Packit |
5b08af |
,("del"," "),("ssem","Sommersemester 2000"),("del"," "),("word","statt"),("del",". ")
|
|
Packit |
5b08af |
,("word","Dieser"),("del"," "),("word","wird"),("del"," "),("word","allerdings")
|
|
Packit |
5b08af |
,("del"," "),("word","auf"),("del"," "),("weekday","Montag"),("del"," ")
|
|
Packit |
5b08af |
,("word","verschoben"),("del",". "),("word","Und"),("del"," "),("word","zwar")
|
|
Packit |
5b08af |
,("del"," "),("word","auf"),("del"," "),("word","den"),("del"," "),("word","ersten")
|
|
Packit |
5b08af |
,("del"," "),("weekday","Montag"),("del"," "),("word","im"),("del"," ")
|
|
Packit |
5b08af |
,("wsem","Wintersemester 11/12"),("del",", "),("HHMM","12:30"),("del",". ")
|
|
Packit |
5b08af |
,("word","Ein"),("del"," "),("word","wichtiger"),("del"," "),("word","Termin")
|
|
Packit |
5b08af |
,("del"," "),("word","findet"),("del"," "),("word","im"),("del"," "),("ssem","SoSe 2011")
|
|
Packit |
5b08af |
,("del"," "),("word","statt"),("del",". "),("word","Im"),("del"," "),("word","Jahr")
|
|
Packit |
5b08af |
,("del"," '"),("word","12"),("del"," "),("word","gibt"),("del"," "),("word","es")
|
|
Packit |
5b08af |
,("del"," "),("word","Termine"),("del",", "),("word","aber"),("del"," "),("word","auch")
|
|
Packit |
5b08af |
,("del"," "),("word","in"),("del"," "),("wsem","WS 2010/11"),("del",". "),("word","Ein")
|
|
Packit |
5b08af |
,("del"," "),("word","weiterer"),("del"," "),("word","Termin"),("del"," "),("word","ist")
|
|
Packit |
5b08af |
,("del"," "),("word","am"),("del"," "),("ddmmyyyy","2.4.11"),("del"," "),("word","um")
|
|
Packit |
5b08af |
,("del"," "),("HH","12 Uhr"),("del",". "),("word","Oder"),("del"," "),("word","war")
|
|
Packit |
5b08af |
,("del"," "),("word","es"),("del"," "),("word","doch"),("del"," "),("weekday","Di.")
|
|
Packit |
5b08af |
,("del"," "),("word","der"),("del"," "),("ddmm","3.4."),("del","? "),("word","Egal")
|
|
Packit |
5b08af |
,("del",". "),("word","Ein"),("del"," "),("word","weiterer"),("del"," ")
|
|
Packit |
5b08af |
,("word","wichtiger"),("del"," "),("word","Termin"),("del"," "),("word","findet")
|
|
Packit |
5b08af |
,("del"," "),("word","am"),("del"," "),("yyyymmdd","2001-3-4"),("del"," ")
|
|
Packit |
5b08af |
,("word","statt"),("del"," "),("word","bzw"),("del",". "),("word","generell")
|
|
Packit |
5b08af |
,("del"," "),("word","zwischen"),("del"," "),("yyyymmdd","01/3/4"),("del"," - ")
|
|
Packit |
5b08af |
,("yyyymmdd","01/6/4"),("del"," "),("word","um"),("del"," "),("HH","13 Uhr")
|
|
Packit |
5b08af |
,("del",". "),("word","Am"),("del"," "),("word","kommenden"),("del"," ")
|
|
Packit |
5b08af |
,("weekday","Mittwoch"),("del"," "),("word","findet"),("del"," ")
|
|
Packit |
5b08af |
,("word","Changemanagement"),("del"," "),("word","in"),("del"," "),("word","HS5")
|
|
Packit |
5b08af |
,("del"," "),("word","statt"),("del",". "),("word","Dies"),("del"," "),("word","gilt")
|
|
Packit |
5b08af |
,("del"," "),("word","dann"),("del"," "),("word","auch"),("del"," "),("word","f\252r")
|
|
Packit |
5b08af |
,("del"," "),("word","den"),("del"," "),("ddMonth","7. Juni"),("del"," "),("word","des")
|
|
Packit |
5b08af |
,("del"," "),("word","Jahres"),("del"," "),("yyyy","2011"),("del",". "),("word","Noch")
|
|
Packit |
5b08af |
,("del"," "),("word","ein"),("del"," "),("word","wichtiger"),("del"," "),("word","Termin")
|
|
Packit |
5b08af |
,("del"," "),("word","findet"),("del"," "),("word","um"),("del"," "),("HHMM","16:15 Uhr")
|
|
Packit |
5b08af |
,("del"," "),("word","am"),("del"," "),("weekday","Do."),("del",", "),("ddmmyyyy","1.2.03")
|
|
Packit |
5b08af |
,("del"," "),("word","statt"),("del",". "),("weekday","Freitag"),("del",", ")
|
|
Packit |
5b08af |
,("word","der"),("del"," "),("ddMonth","13. Juli"),("del"," "),("word","ist"),("del"," ")
|
|
Packit |
5b08af |
,("word","kein"),("del"," "),("word","Gl\252ckstagund"),("del"," "),("weekday","Freitag")
|
|
Packit |
5b08af |
,("del",", "),("word","der"),("del"," "),("ddMonth","13. Juli"),("del"," "),("word","um")
|
|
Packit |
5b08af |
,("del"," "),("HHMM","11:55 Uhr"),("del"," "),("word","ist"),("del"," "),("word","es")
|
|
Packit |
5b08af |
,("del"," "),("word","zu"),("del"," "),("word","sp\228t"),("del",".")
|
|
Packit |
5b08af |
]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
genericTest :: (forall a . StringLike a => Test' a) -> Test
|
|
Packit |
5b08af |
genericTest t
|
|
Packit |
5b08af |
= TestList $
|
|
Packit |
5b08af |
[ TestLabel "Test with 'String'" $ unTest' (t :: Test' String)
|
|
Packit |
5b08af |
, TestLabel "Test with 'Text'" $ unTest' (t :: Test' Text)
|
|
Packit |
5b08af |
, TestLabel "Test with 'Text.Lazy'" $ unTest' (t :: Test' TextL)
|
|
Packit |
5b08af |
, TestLabel "Test with 'ByteString'" $ unTest' (t :: Test' BS)
|
|
Packit |
5b08af |
, TestLabel "Test with 'ByteString.Lazy'" $ unTest' (t :: Test' BL)
|
|
Packit |
5b08af |
]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
allTests :: Test
|
|
Packit |
5b08af |
allTests
|
|
Packit |
5b08af |
= TestList
|
|
Packit |
5b08af |
[ genericTest testDate ]
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
main :: IO ()
|
|
Packit |
5b08af |
main
|
|
Packit |
5b08af |
= do
|
|
Packit |
5b08af |
c <- runTestTT allTests
|
|
Packit |
5b08af |
putStrLn $ show c
|
|
Packit |
5b08af |
let errs = errors c
|
|
Packit |
5b08af |
fails = failures c
|
|
Packit |
5b08af |
exitWith (codeGet errs fails)
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
codeGet :: Int -> Int -> ExitCode
|
|
Packit |
5b08af |
codeGet errs fails
|
|
Packit |
5b08af |
| fails > 0 = ExitFailure 2
|
|
Packit |
5b08af |
| errs > 0 = ExitFailure 1
|
|
Packit |
5b08af |
| otherwise = ExitSuccess
|
|
Packit |
5b08af |
|
|
Packit |
5b08af |
-- ------------------------------------------------------------
|