From 247f4e5b4ad3dbf1e95ec12a15fa0d7377e36be1 Mon Sep 17 00:00:00 2001
From: Packit <packit>
Date: Sep 25 2020 16:06:22 +0000
Subject: ghc-tagsoup-0.14.2 base


---
diff --git a/CHANGES.txt b/CHANGES.txt
new file mode 100644
index 0000000..cb3156d
--- /dev/null
+++ b/CHANGES.txt
@@ -0,0 +1,86 @@
+Changelog for TagSoup
+
+0.14.2
+    #66, make sure positions are correct for lone & characters
+0.14.1
+    #63, add maybeAttrib
+0.14
+    #14, eliminate Text.HTML.Download
+0.13.10
+    #51, improve the Haddock documentation
+    #52, fix some > 16bit HTML entities
+0.13.9
+    #50, fix a space leak
+    #36, fix the demo examples
+    #35, make IsString a superclass of StringLike
+    #33, make flattenTree O(n) instead of O(n^2)
+0.13.8
+    #30, add parse/render functions directly to the Tree module
+0.13.7
+    #32, make sure upper case &#X works in lookupEntity
+0.13.6
+    #28, some named entities require a trailing semicolon (e.g. mid)
+0.13.5
+    #26, rename the test program to test-tagsoup
+0.13.4
+    #24, add isTagComment function
+    Update the copyright year
+0.13.3
+    Work on GHC 7.9
+0.13.2
+    Remove all package upper bounds
+    Allow QuickCheck-2.6
+0.13.1
+    #562, treat <script> specially as per HTML5
+0.13
+    #616, extend to all HTML5 entities
+    Optimise lookupNamedEntity
+    Replace escapeXMLChar with escapeXML
+    Change all Entity functions to return String, not Int or Char
+0.12.7
+    Allow deepseq-1.3
+    Allow QuickCheck-2.5
+    Support bytestring-0.10 (NFData instances added)
+0.12.6
+    #515, don't crash on malformed characters (use ? instead)
+0.12.5
+    Add optRawTag to Render, to ensure script's don't get escaped
+0.12.4
+    #487, fix the behaviour of ~== for TagComment and others
+0.12.3
+    GHC 7.2 compatibility
+0.12.2
+    Add StringLike instances for Text
+0.12.1
+    Add parseOptionsEntities and improve documentation
+0.12
+    Upgrade to QuickCheck 2.4.*
+    Export toTagRep
+    Make the -download flag off by default
+    Eliminate HTTP dependency
+    Eliminate mtl dependency
+0.11.1
+    Support --flags=-download to eliminate the network dependency
+0.11
+    #326, <a "foo"> is no longer treated as an attribute
+    Add Eq/Ord instances to Tree
+    Don't mark Text.HTML.TagSoup.Tree as preliminary
+    #325, \r should be treated as whitespace
+0.10.1
+    #322, don't change ' to &apos; in render (do what the docs say)
+0.10
+    Improve the cabal file, make the test program off by default
+    Expose Text.HTML.TagSoup.Match again (was hidden accidentally)
+0.9
+    #294, let optEntityData see if there was a ';' (CHANGES API)
+    Numeric/hex entities in attributes were misparsed
+    #149, add escapeHTML function
+0.8
+    Parser now based on HTML5 specification
+    Tag is now parameterised by the string type
+0.6
+    Addition of Text.HTML.TagSoup.Tree and Text.HTML.TagSoup.Render
+    Text.HTML.TagSoup.Parser.Options renamed to ParseOptions
+    Text.HTML.TagSoup.Parser.options renamed to parseOptions
+0.4
+    Changelog started
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..39ee0e3
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
+Copyright Neil Mitchell 2006-2017.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of Neil Mitchell nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..364211d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,262 @@
+# TagSoup [![Hackage version](https://img.shields.io/hackage/v/tagsoup.svg?label=Hackage)](https://hackage.haskell.org/package/tagsoup) [![Stackage version](https://www.stackage.org/package/tagsoup/badge/lts?label=Stackage)](https://www.stackage.org/package/tagsoup) [![Linux Build Status](https://img.shields.io/travis/ndmitchell/tagsoup.svg?label=Linux%20build)](https://travis-ci.org/ndmitchell/tagsoup) [![Windows Build Status](https://img.shields.io/appveyor/ci/ndmitchell/tagsoup.svg?label=Windows%20build)](https://ci.appveyor.com/project/ndmitchell/tagsoup)
+
+TagSoup is a library for parsing HTML/XML. It supports the HTML 5 specification, and can be used to parse either well-formed XML, or unstructured and malformed HTML from the web. The library also provides useful functions to extract information from an HTML document, making it ideal for screen-scraping.
+
+The library provides a basic data type for a list of unstructured tags, a parser to convert HTML into this tag type, and useful functions and combinators for finding and extracting information. This document gives two particular examples of scraping information from the web, while a few more may be found in the [Sample](https://github.com/ndmitchell/tagsoup/blob/master/TagSoup/Sample.hs) file from the source repository. The examples we give are:
+
+* Obtaining the last modified date of the Haskell wiki
+* Obtaining a list of Simon Peyton Jones' latest papers
+* A brief overview of some other examples
+
+The intial version of this library was written in Javascript and has been used for various commercial projects involving screen scraping. In the examples general hints on screen scraping are included, learnt from bitter experience. It should be noted that if you depend on data which someone else may change at any given time, you may be in for a shock!
+
+This library was written without knowledge of the Java version of [TagSoup](http://home.ccil.org/~cowan/XML/tagsoup/). They have made a very different design decision: to ensure default attributes are present and to properly nest parsed tags. We do not do this - tags are merely a list devoid of nesting information.
+
+
+#### Acknowledgements
+
+Thanks to Mike Dodds for persuading me to write this up as a library. Thanks to many people for debugging and code contributions, including: Gleb Alexeev, Ketil Malde, Conrad Parker, Henning Thielemann, Dino Morelli, Emily Mitchell, Gwern Branwen.
+
+
+## Potential Bugs
+
+There are two things that may go wrong with these examples:
+
+* _The Websites being scraped may change._ There is nothing I can do about this, but if you suspect this is the case let me know, and I'll update the examples and tutorials. I have already done so several times, it's only a few minutes work.
+* _The `openURL` method may not work._ This happens quite regularly, and depending on your server, proxies and direction of the wind, they may not work. The solution is to use `wget` to download the page locally, then use `readFile` instead. Hopefully a decent Haskell HTTP library will emerge, and that can be used instead.
+
+
+## Last modified date of Haskell wiki
+
+Our goal is to develop a program that displays the date that the wiki at
+[`wiki.haskell.org`](http://wiki.haskell.org/Haskell) was last modified. This
+example covers all the basics in designing a basic web-scraping application.
+
+### Finding the Page
+
+We first need to find where the information is displayed and in what format.
+Taking a look at the [front web page](http://wiki.haskell.org/Haskell), when
+not logged in, we see:
+
+```html
+<ul id="f-list">
+  <li id="lastmod"> This page was last modified on 9 September 2013, at 22:38.</li>
+  <li id="copyright">Recent content is available under <a href="/HaskellWiki:Copyrights" title="HaskellWiki:Copyrights">a simple permissive license</a>.</li>
+  <li id="privacy"><a href="/HaskellWiki:Privacy_policy" title="HaskellWiki:Privacy policy">Privacy policy</a></li>
+  <li id="about"><a href="/HaskellWiki:About" title="HaskellWiki:About">About HaskellWiki</a></li>
+  <li id="disclaimer"><a href="/HaskellWiki:General_disclaimer" title="HaskellWiki:General disclaimer">Disclaimers</a></li>
+</ul>
+```
+
+So, we see that the last modified date is available. This leads us to rule 1:
+
+**Rule 1:** Scrape from what the page returns, not what a browser renders, or what view-source gives.
+
+Some web servers will serve different content depending on the user agent, some browsers will have scripting modify their displayed HTML, some pages will display differently depending on your cookies. Before you can start to figure out how to start scraping, first decide what the input to your program will be. There are two ways to get the page as it will appear to your program.
+
+#### Using the HTTP package
+
+We can write a simple HTTP downloader with using the [HTTP package](http://hackage.haskell.org/package/HTTP):
+
+```haskell
+module Main where
+
+import Network.HTTP
+
+openURL :: String -> IO String
+openURL x = getResponseBody =<< simpleHTTP (getRequest x)
+
+main :: IO ()
+main = do
+    src <- openURL "http://wiki.haskell.org/Haskell"
+    writeFile "temp.htm" src
+```
+
+Now open `temp.htm`, find the fragment of HTML containing the hit count, and examine it.
+
+#### Using the `tagsoup` Program
+
+TagSoup installs both as a library and a program. The program contains all the
+examples mentioned on this page, along with a few other useful functions. In
+order to download a URL to a file:
+
+```bash
+$ tagsoup grab http://wiki.haskell.org/Haskell > temp.htm
+```
+
+### Finding the Information
+
+Now we examine both the fragment that contains our snippet of information, and
+the wider page. What does the fragment have that nothing else has? What
+algorithm would we use to obtain that particular element? How can we still
+return the element as the content changes? What if the design changes? But
+wait, before going any further:
+
+**Rule 2:** Do not be robust to design changes, do not even consider the possibility when writing the code.
+
+If the user changes their website, they will do so in unpredictable ways. They may move the page, they may put the information somewhere else, they may remove the information entirely. If you want something robust talk to the site owner, or buy the data from someone. If you try and think about design changes, you will complicate your design, and it still won't work. It is better to write an extraction method quickly, and happily rewrite it when things change.
+
+So now, let's consider the fragment from above. It is useful to find a tag
+which is unique just above your snippet - something with a nice `id` or `class`
+attribute - something which is unlikely to occur multiple times. In the above
+example, an `id` with value  `lastmod` seems perfect.
+
+```haskell
+module Main where
+
+import Data.Char
+import Network.HTTP
+import Text.HTML.TagSoup
+
+openURL :: String -> IO String
+openURL x = getResponseBody =<< simpleHTTP (getRequest x)
+
+haskellLastModifiedDateTime :: IO ()
+haskellLastModifiedDateTime = do
+    src <- openURL "http://wiki.haskell.org/Haskell"
+    let lastModifiedDateTime = fromFooter $ parseTags src
+    putStrLn $ "wiki.haskell.org was last modified on " ++ lastModifiedDateTime
+    where fromFooter = unwords . drop 6 . words . innerText . take 2 . dropWhile (~/= "<li id=lastmod>")
+
+main :: IO ()
+main = haskellLastModifiedDateTime
+```
+
+Now we start writing the code! The first thing to do is open the required URL, then we parse the code into a list of `Tag`s with `parseTags`. The `fromFooter` function does the interesting thing, and can be read right to left:
+
+* First we throw away everything (`dropWhile`) until we get to an `li` tag
+  containing `id=lastmod`. The `(~==)` and `(~/=)` operators are different from
+standard equality and inequality since they allow additional attributes to be
+present. We write `"<li id=lastmod>"` as syntactic sugar for `TagOpen "li"
+[("id","lastmod")]`. If we just wanted any open tag with the given `id`
+attribute we could have written `(~== TagOpen "" [("id","lastmod")])` and this
+would have matched.  Any empty strings in the second element of the match are
+considered as wildcards.
+* Next we take two elements: the `<li>` tag and the text node immediately
+  following.
+* We call the `innerText` function to get all the text values from inside,
+  which will just be the text node following the `lastmod`.
+* We split the string into a series of words and drop the first six, i.e. the
+  words `This`, `page`, `was`, `last`, `modified` and `on`
+* We reassemble the remaining words into the resulting string `9 September
+  2013, at 22:38.`
+
+This code may seem slightly messy, and indeed it is - often that is the nature of extracting information from a tag soup.
+
+**Rule 3:** TagSoup is for extracting information where structure has been lost, use more structured information if it is available.
+
+
+## Simon's Papers
+
+Our next very important task is to extract a list of all Simon Peyton Jones' recent research papers off his [home page](http://research.microsoft.com/en-us/people/simonpj/). The largest change to the previous example is that now we desire a list of papers, rather than just a single result.
+
+As before we first start by writing a simple program that downloads the appropriate page, and look for common patterns. This time we want to look for all patterns which occur every time a paper is mentioned, but no where else. The other difference from last time is that previous we grabbed an automatically generated piece of information - this time the information is entered in a more freeform way by a human.
+
+First we spot that the page helpfully has named anchors, there is a current work anchor, and after that is one for Haskell. We can extract all the information between them with a simple `take`/`drop` pair:
+
+```haskell
+takeWhile (~/= "<a name=haskell>") $
+drop 5 $ dropWhile (~/= "<a name=current>") tags
+```
+
+This code drops until you get to the "current" section, then takes until you get to the "haskell" section, ensuring we only look at the important bit of the page. Next we want to find all hyperlinks within this section:
+
+```haskell
+map f $ sections (~== "<A>") $ ...
+```
+
+Remember that the function to select all tags with name "A" could have been written as `(~== TagOpen "A" [])`, or alternatively `isTagOpenName "A"`. Afterwards we map each item with an `f` function. This function needs to take the tags starting just after the link, and find the text inside the link.
+
+```haskell
+f = dequote . unwords . words . fromTagText . head . filter isTagText
+```
+
+Here the complexity of interfacing to human written markup comes through. Some of the links are in italic, some are not - the `filter` drops all those that are not, until we find a pure text node. The `unwords . words` deletes all multiple spaces, replaces tabs and newlines with spaces and trims the front and back - a neat trick when dealing with text which has spacing at the source code but not when displayed. The final thing to take account of is that some papers are given with quotes around the name, some are not - dequote will remove the quotes if they exist.
+
+For completeness, we now present the entire example:
+
+```haskell
+module Main where
+
+import Network.HTTP
+import Text.HTML.TagSoup
+
+openURL :: String -> IO String
+openURL x = getResponseBody =<< simpleHTTP (getRequest x)
+
+spjPapers :: IO ()
+spjPapers = do
+        tags <- parseTags <$> openURL "http://research.microsoft.com/en-us/people/simonpj/"
+        let links = map f $ sections (~== "<A>") $
+                    takeWhile (~/= "<a name=haskell>") $
+                    drop 5 $ dropWhile (~/= "<a name=current>") tags
+        putStr $ unlines links
+    where
+        f :: [Tag String] -> String
+        f = dequote . unwords . words . fromTagText . head . filter isTagText
+
+        dequote ('\"':xs) | last xs == '\"' = init xs
+        dequote x = x
+
+main :: IO ()
+main = spjPapers
+```
+
+## Other Examples
+
+Several more examples are given in the Example file, including obtaining the (short) list of papers from my site, getting the current time and a basic XML validator. All can be invoked using the `tagsoup` executable program. All use very much the same style as presented here - writing screen scrapers follow a standard pattern. We present the code from two for enjoyment only.
+
+### My Papers
+
+```haskell
+module Main where
+
+import Network.HTTP
+import Text.HTML.TagSoup
+
+openURL :: String -> IO String
+openURL x = getResponseBody =<< simpleHTTP (getRequest x)
+
+ndmPapers :: IO ()
+ndmPapers = do
+        tags <- parseTags <$> openURL "http://community.haskell.org/~ndm/downloads/"
+        let papers = map f $ sections (~== "<li class=paper>") tags
+        putStr $ unlines papers
+    where
+        f :: [Tag String] -> String
+        f xs = fromTagText (xs !! 2)
+
+main :: IO ()
+main = ndmPapers
+```
+
+### UK Time
+
+```haskell
+module Main where
+
+import Network.HTTP
+import Text.HTML.TagSoup
+
+openURL :: String -> IO String
+openURL x = getResponseBody =<< simpleHTTP (getRequest x)
+
+currentTime :: IO ()
+currentTime = do
+    tags <- parseTags <$> openURL "http://www.timeanddate.com/worldclock/uk/london"
+    let time = fromTagText (dropWhile (~/= "<span id=ct>") tags !! 1)
+    putStrLn time
+
+main :: IO ()
+main = currentTime
+```
+        
+## Related Projects
+
+* [TagSoup for Java](http://tagsoup.info/) - an independently written malformed HTML parser for Java. Including [links to other](http://tagsoup.info/#other) HTML parsers.
+* [HXT: Haskell XML Toolbox](http://www.fh-wedel.de/~si/HXmlToolbox/) - a more comprehensive XML parser, giving the option of using TagSoup as a lexer.
+* [Other Related Work](http://www.fh-wedel.de/~si/HXmlToolbox/#rel) - as described on the HXT pages.
+* [Using TagSoup with Parsec](http://therning.org/magnus/posts/2008-08-08-367-tagsoup-meet-parsec.html) - a nice combination of Haskell libraries.
+* [tagsoup-parsec](http://hackage.haskell.org/package/tagsoup-parsec) - a library for easily using TagSoup as a token type in Parsec.
+* [tagsoup-megaparsec](http://hackage.haskell.org/package/tagsoup-megaparsec) - a library for easily using TagSoup as a token type in Megaparsec.
+* [WraXML](http://hackage.haskell.org/packages/archive/wraxml/latest/doc/html/Text-XML-WraXML-Tree-TagSoup.html) - construct a lazy tree from TagSoup lexemes.
diff --git a/Setup.hs b/Setup.hs
new file mode 100644
index 0000000..9a994af
--- /dev/null
+++ b/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/src/Text/HTML/TagSoup.hs b/src/Text/HTML/TagSoup.hs
new file mode 100644
index 0000000..d034181
--- /dev/null
+++ b/src/Text/HTML/TagSoup.hs
@@ -0,0 +1,117 @@
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, PatternGuards #-}
+
+{-|
+    This module is for working with HTML/XML. It deals with both well-formed XML and
+    malformed HTML from the web. It features:
+
+    * A lazy parser, based on the HTML 5 specification - see 'parseTags'.
+
+    * A renderer that can write out HTML/XML - see 'renderTags'.
+
+    * Utilities for extracting information from a document - see '~==', 'sections' and 'partitions'.
+
+    The standard practice is to parse a 'String' to @[@'Tag' 'String'@]@ using 'parseTags',
+    then operate upon it to extract the necessary information.
+-}
+
+module Text.HTML.TagSoup(
+    -- * Data structures and parsing
+    Tag(..), Row, Column, Attribute,
+    module Text.HTML.TagSoup.Parser,
+    module Text.HTML.TagSoup.Render,
+    canonicalizeTags,
+
+    -- Note: the "#tag-identification#" creates an anchor that's linked to from Match.hs
+    -- * #tag-identification# Tag identification
+    isTagOpen, isTagClose, isTagText, isTagWarning, isTagPosition,
+    isTagOpenName, isTagCloseName, isTagComment,
+
+    -- * Extraction
+    fromTagText, fromAttrib,
+    maybeTagText, maybeTagWarning,
+    innerText,
+
+    -- * Utility
+    sections, partitions,
+    
+    -- * Combinators
+    TagRep(..), (~==),(~/=)
+    ) where
+
+import Text.HTML.TagSoup.Type
+import Text.HTML.TagSoup.Parser
+import Text.HTML.TagSoup.Render
+import Data.Char
+import Data.List (groupBy, tails)
+import Text.StringLike
+
+
+-- | Turns all tag names and attributes to lower case and
+--   converts DOCTYPE to upper case.
+canonicalizeTags :: StringLike str => [Tag str] -> [Tag str]
+canonicalizeTags = map f
+    where
+        f (TagOpen tag attrs) | Just ('!',name) <- uncons tag = TagOpen ('!' `cons` ucase name) attrs
+        f (TagOpen name attrs) = TagOpen (lcase name) [(lcase k, v) | (k,v) <- attrs]
+        f (TagClose name) = TagClose (lcase name)
+        f a = a
+
+        ucase = fromString . map toUpper . toString
+        lcase = fromString . map toLower . toString
+
+
+-- | Define a class to allow String's or Tag str's to be used as matches
+class TagRep a where
+    -- | Convert a value into a 'Tag'.
+    toTagRep :: StringLike str => a -> Tag str
+
+instance StringLike str => TagRep (Tag str) where toTagRep = fmap castString
+
+instance TagRep String where
+    toTagRep x = case parseTags x of
+                     [a] -> toTagRep a
+                     _ -> error $ "When using a TagRep it must be exactly one tag, you gave: " ++ x
+
+
+
+-- | Performs an inexact match, the first item should be the thing to match.
+-- If the second item is a blank string, that is considered to match anything.
+-- For example:
+--
+-- > (TagText "test" ~== TagText ""    ) == True
+-- > (TagText "test" ~== TagText "test") == True
+-- > (TagText "test" ~== TagText "soup") == False
+--
+-- For 'TagOpen' missing attributes on the right are allowed.
+(~==) :: (StringLike str, TagRep t) => Tag str -> t -> Bool
+(~==) a b = f a (toTagRep b)
+    where
+        f (TagText y) (TagText x) = strNull x || x == y
+        f (TagClose y) (TagClose x) = strNull x || x == y
+        f (TagOpen y ys) (TagOpen x xs) = (strNull x || x == y) && all g xs
+            where
+                g (name,val) | strNull name = val  `elem` map snd ys
+                             | strNull val  = name `elem` map fst ys
+                g nameval = nameval `elem` ys
+        f (TagComment x) (TagComment y) = strNull x || x == y
+        f (TagWarning x) (TagWarning y) = strNull x || x == y
+        f (TagPosition x1 x2) (TagPosition y1 y2) = x1 == y1 && x2 == y2
+        f _ _ = False
+
+-- | Negation of '~=='
+(~/=) :: (StringLike str, TagRep t) => Tag str -> t -> Bool
+(~/=) a b = not (a ~== b)
+
+
+
+-- | This function takes a list, and returns all suffixes whose
+--   first item matches the predicate.
+sections :: (a -> Bool) -> [a] -> [[a]]
+sections p = filter (p . head) . init . tails
+
+-- | This function is similar to 'sections', but splits the list
+--   so no element appears in any two partitions.
+partitions :: (a -> Bool) -> [a] -> [[a]]
+partitions p =
+   let notp = not . p
+   in  groupBy (const notp) . dropWhile notp
diff --git a/src/Text/HTML/TagSoup/Entity.hs b/src/Text/HTML/TagSoup/Entity.hs
new file mode 100644
index 0000000..56fdd77
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Entity.hs
@@ -0,0 +1,2320 @@
+-- | This module converts between HTML/XML entities (i.e. @&amp;@) and
+--   the characters they represent.
+module Text.HTML.TagSoup.Entity(
+    lookupEntity, lookupNamedEntity, lookupNumericEntity,
+    escapeXML,
+    xmlEntities, htmlEntities
+    ) where
+
+import Data.Char (chr, ord)
+import qualified Data.IntMap as IntMap
+import Data.Ix
+import qualified Data.Map as Map
+import Numeric (readHex)
+
+
+-- | Lookup an entity, using 'lookupNumericEntity' if it starts with
+--   @#@ and 'lookupNamedEntity' otherwise
+lookupEntity :: String -> Maybe String
+lookupEntity ('#':xs) = lookupNumericEntity xs
+lookupEntity xs = lookupNamedEntity xs
+
+
+-- | Lookup a numeric entity, the leading @\'#\'@ must have already been removed.
+--
+-- > lookupNumericEntity "65" == Just "A"
+-- > lookupNumericEntity "x41" == Just "A"
+-- > lookupNumericEntity "x4E" === Just "N"
+-- > lookupNumericEntity "x4e" === Just "N"
+-- > lookupNumericEntity "X4e" === Just "N"
+-- > lookupNumericEntity "Haskell" == Nothing
+-- > lookupNumericEntity "" == Nothing
+-- > lookupNumericEntity "89439085908539082" == Nothing
+lookupNumericEntity :: String -> Maybe String
+lookupNumericEntity = f
+        -- entity = '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
+    where
+        f (x:xs) | x `elem` "xX" = g [('0','9'),('a','f'),('A','F')] readHex xs
+        f xs = g [('0','9')] reads xs
+
+        g :: [(Char,Char)] -> ReadS Integer -> String -> Maybe String
+        g valid reader xs = do
+            let test b = if b then Just () else Nothing
+            test $ isValid valid xs
+            test $ not $ null xs
+            case reader xs of
+                [(a,"")] -> do
+                    test $ inRange (toInteger $ ord minBound, toInteger $ ord maxBound) a
+                    return [chr $ fromInteger a]
+                _ -> Nothing
+
+        isValid :: [(Char,Char)] -> String -> Bool
+        isValid valid xs = all (\x -> any (`inRange` x) valid) xs
+
+
+-- | Lookup a named entity, using 'htmlEntities'
+--
+-- > lookupNamedEntity "amp" == Just "&"
+-- > lookupNamedEntity "haskell" == Nothing
+lookupNamedEntity :: String -> Maybe String
+lookupNamedEntity = \x -> Map.lookup x mp
+    where mp = Map.fromList htmlEntities
+
+
+-- | Escape an XML string.
+--
+-- > escapeXML "hello world" == "hello world"
+-- > escapeXML "hello & world" == "hello &amp; world"
+escapeXML :: String -> String
+escapeXML = concatMap $ \x -> IntMap.findWithDefault [x] (ord x) mp
+    where mp = IntMap.fromList [(ord b, "&"++a++";") | (a,[b]) <- xmlEntities]
+
+
+-- | A table mapping XML entity names to resolved strings. All strings are a single character long.
+--   Does /not/ include @apos@ as Internet Explorer does not know about it.
+xmlEntities :: [(String, String)]
+xmlEntities = let a*b = (a,[b]) in
+    ["quot" * '"'
+    ,"amp"  * '&'
+    -- ,"apos" * '\''    -- Internet Explorer does not know that
+    ,"lt"   * '<'
+    ,"gt"   * '>'
+    ]
+
+-- | A table mapping HTML entity names to resolved strings. Most resolved strings are a single character long,
+--   but some (e.g. @"ngeqq"@) are two characters long. The list is taken from
+--   <http://www.w3.org/TR/html5/syntax.html#named-character-references>.
+htmlEntities :: [(String, String)]
+htmlEntities = let (*) = (,) in
+    ["Aacute" * "\x00C1"
+    ,"aacute" * "\x00E1"
+    ,"Aacute;" * "\x00C1"
+    ,"aacute;" * "\x00E1"
+    ,"Abreve;" * "\x0102"
+    ,"abreve;" * "\x0103"
+    ,"ac;" * "\x223E"
+    ,"acd;" * "\x223F"
+    ,"acE;" * "\x223E\x0333"
+    ,"Acirc" * "\x00C2"
+    ,"acirc" * "\x00E2"
+    ,"Acirc;" * "\x00C2"
+    ,"acirc;" * "\x00E2"
+    ,"acute" * "\x00B4"
+    ,"acute;" * "\x00B4"
+    ,"Acy;" * "\x0410"
+    ,"acy;" * "\x0430"
+    ,"AElig" * "\x00C6"
+    ,"aelig" * "\x00E6"
+    ,"AElig;" * "\x00C6"
+    ,"aelig;" * "\x00E6"
+    ,"af;" * "\x2061"
+    ,"Afr;" * "\x1D504"
+    ,"afr;" * "\x1D51E"
+    ,"Agrave" * "\x00C0"
+    ,"agrave" * "\x00E0"
+    ,"Agrave;" * "\x00C0"
+    ,"agrave;" * "\x00E0"
+    ,"alefsym;" * "\x2135"
+    ,"aleph;" * "\x2135"
+    ,"Alpha;" * "\x0391"
+    ,"alpha;" * "\x03B1"
+    ,"Amacr;" * "\x0100"
+    ,"amacr;" * "\x0101"
+    ,"amalg;" * "\x2A3F"
+    ,"AMP" * "\x0026"
+    ,"amp" * "\x0026"
+    ,"AMP;" * "\x0026"
+    ,"amp;" * "\x0026"
+    ,"and;" * "\x2227"
+    ,"And;" * "\x2A53"
+    ,"andand;" * "\x2A55"
+    ,"andd;" * "\x2A5C"
+    ,"andslope;" * "\x2A58"
+    ,"andv;" * "\x2A5A"
+    ,"ang;" * "\x2220"
+    ,"ange;" * "\x29A4"
+    ,"angle;" * "\x2220"
+    ,"angmsd;" * "\x2221"
+    ,"angmsdaa;" * "\x29A8"
+    ,"angmsdab;" * "\x29A9"
+    ,"angmsdac;" * "\x29AA"
+    ,"angmsdad;" * "\x29AB"
+    ,"angmsdae;" * "\x29AC"
+    ,"angmsdaf;" * "\x29AD"
+    ,"angmsdag;" * "\x29AE"
+    ,"angmsdah;" * "\x29AF"
+    ,"angrt;" * "\x221F"
+    ,"angrtvb;" * "\x22BE"
+    ,"angrtvbd;" * "\x299D"
+    ,"angsph;" * "\x2222"
+    ,"angst;" * "\x00C5"
+    ,"angzarr;" * "\x237C"
+    ,"Aogon;" * "\x0104"
+    ,"aogon;" * "\x0105"
+    ,"Aopf;" * "\x1D538"
+    ,"aopf;" * "\x1D552"
+    ,"ap;" * "\x2248"
+    ,"apacir;" * "\x2A6F"
+    ,"ape;" * "\x224A"
+    ,"apE;" * "\x2A70"
+    ,"apid;" * "\x224B"
+    ,"apos;" * "\x0027"
+    ,"ApplyFunction;" * "\x2061"
+    ,"approx;" * "\x2248"
+    ,"approxeq;" * "\x224A"
+    ,"Aring" * "\x00C5"
+    ,"aring" * "\x00E5"
+    ,"Aring;" * "\x00C5"
+    ,"aring;" * "\x00E5"
+    ,"Ascr;" * "\x1D49C"
+    ,"ascr;" * "\x1D4B6"
+    ,"Assign;" * "\x2254"
+    ,"ast;" * "\x002A"
+    ,"asymp;" * "\x2248"
+    ,"asympeq;" * "\x224D"
+    ,"Atilde" * "\x00C3"
+    ,"atilde" * "\x00E3"
+    ,"Atilde;" * "\x00C3"
+    ,"atilde;" * "\x00E3"
+    ,"Auml" * "\x00C4"
+    ,"auml" * "\x00E4"
+    ,"Auml;" * "\x00C4"
+    ,"auml;" * "\x00E4"
+    ,"awconint;" * "\x2233"
+    ,"awint;" * "\x2A11"
+    ,"backcong;" * "\x224C"
+    ,"backepsilon;" * "\x03F6"
+    ,"backprime;" * "\x2035"
+    ,"backsim;" * "\x223D"
+    ,"backsimeq;" * "\x22CD"
+    ,"Backslash;" * "\x2216"
+    ,"Barv;" * "\x2AE7"
+    ,"barvee;" * "\x22BD"
+    ,"barwed;" * "\x2305"
+    ,"Barwed;" * "\x2306"
+    ,"barwedge;" * "\x2305"
+    ,"bbrk;" * "\x23B5"
+    ,"bbrktbrk;" * "\x23B6"
+    ,"bcong;" * "\x224C"
+    ,"Bcy;" * "\x0411"
+    ,"bcy;" * "\x0431"
+    ,"bdquo;" * "\x201E"
+    ,"becaus;" * "\x2235"
+    ,"Because;" * "\x2235"
+    ,"because;" * "\x2235"
+    ,"bemptyv;" * "\x29B0"
+    ,"bepsi;" * "\x03F6"
+    ,"bernou;" * "\x212C"
+    ,"Bernoullis;" * "\x212C"
+    ,"Beta;" * "\x0392"
+    ,"beta;" * "\x03B2"
+    ,"beth;" * "\x2136"
+    ,"between;" * "\x226C"
+    ,"Bfr;" * "\x1D505"
+    ,"bfr;" * "\x1D51F"
+    ,"bigcap;" * "\x22C2"
+    ,"bigcirc;" * "\x25EF"
+    ,"bigcup;" * "\x22C3"
+    ,"bigodot;" * "\x2A00"
+    ,"bigoplus;" * "\x2A01"
+    ,"bigotimes;" * "\x2A02"
+    ,"bigsqcup;" * "\x2A06"
+    ,"bigstar;" * "\x2605"
+    ,"bigtriangledown;" * "\x25BD"
+    ,"bigtriangleup;" * "\x25B3"
+    ,"biguplus;" * "\x2A04"
+    ,"bigvee;" * "\x22C1"
+    ,"bigwedge;" * "\x22C0"
+    ,"bkarow;" * "\x290D"
+    ,"blacklozenge;" * "\x29EB"
+    ,"blacksquare;" * "\x25AA"
+    ,"blacktriangle;" * "\x25B4"
+    ,"blacktriangledown;" * "\x25BE"
+    ,"blacktriangleleft;" * "\x25C2"
+    ,"blacktriangleright;" * "\x25B8"
+    ,"blank;" * "\x2423"
+    ,"blk12;" * "\x2592"
+    ,"blk14;" * "\x2591"
+    ,"blk34;" * "\x2593"
+    ,"block;" * "\x2588"
+    ,"bne;" * "\x003D\x20E5"
+    ,"bnequiv;" * "\x2261\x20E5"
+    ,"bnot;" * "\x2310"
+    ,"bNot;" * "\x2AED"
+    ,"Bopf;" * "\x1D539"
+    ,"bopf;" * "\x1D553"
+    ,"bot;" * "\x22A5"
+    ,"bottom;" * "\x22A5"
+    ,"bowtie;" * "\x22C8"
+    ,"boxbox;" * "\x29C9"
+    ,"boxdl;" * "\x2510"
+    ,"boxdL;" * "\x2555"
+    ,"boxDl;" * "\x2556"
+    ,"boxDL;" * "\x2557"
+    ,"boxdr;" * "\x250C"
+    ,"boxdR;" * "\x2552"
+    ,"boxDr;" * "\x2553"
+    ,"boxDR;" * "\x2554"
+    ,"boxh;" * "\x2500"
+    ,"boxH;" * "\x2550"
+    ,"boxhd;" * "\x252C"
+    ,"boxHd;" * "\x2564"
+    ,"boxhD;" * "\x2565"
+    ,"boxHD;" * "\x2566"
+    ,"boxhu;" * "\x2534"
+    ,"boxHu;" * "\x2567"
+    ,"boxhU;" * "\x2568"
+    ,"boxHU;" * "\x2569"
+    ,"boxminus;" * "\x229F"
+    ,"boxplus;" * "\x229E"
+    ,"boxtimes;" * "\x22A0"
+    ,"boxul;" * "\x2518"
+    ,"boxuL;" * "\x255B"
+    ,"boxUl;" * "\x255C"
+    ,"boxUL;" * "\x255D"
+    ,"boxur;" * "\x2514"
+    ,"boxuR;" * "\x2558"
+    ,"boxUr;" * "\x2559"
+    ,"boxUR;" * "\x255A"
+    ,"boxv;" * "\x2502"
+    ,"boxV;" * "\x2551"
+    ,"boxvh;" * "\x253C"
+    ,"boxvH;" * "\x256A"
+    ,"boxVh;" * "\x256B"
+    ,"boxVH;" * "\x256C"
+    ,"boxvl;" * "\x2524"
+    ,"boxvL;" * "\x2561"
+    ,"boxVl;" * "\x2562"
+    ,"boxVL;" * "\x2563"
+    ,"boxvr;" * "\x251C"
+    ,"boxvR;" * "\x255E"
+    ,"boxVr;" * "\x255F"
+    ,"boxVR;" * "\x2560"
+    ,"bprime;" * "\x2035"
+    ,"Breve;" * "\x02D8"
+    ,"breve;" * "\x02D8"
+    ,"brvbar" * "\x00A6"
+    ,"brvbar;" * "\x00A6"
+    ,"Bscr;" * "\x212C"
+    ,"bscr;" * "\x1D4B7"
+    ,"bsemi;" * "\x204F"
+    ,"bsim;" * "\x223D"
+    ,"bsime;" * "\x22CD"
+    ,"bsol;" * "\x005C"
+    ,"bsolb;" * "\x29C5"
+    ,"bsolhsub;" * "\x27C8"
+    ,"bull;" * "\x2022"
+    ,"bullet;" * "\x2022"
+    ,"bump;" * "\x224E"
+    ,"bumpe;" * "\x224F"
+    ,"bumpE;" * "\x2AAE"
+    ,"Bumpeq;" * "\x224E"
+    ,"bumpeq;" * "\x224F"
+    ,"Cacute;" * "\x0106"
+    ,"cacute;" * "\x0107"
+    ,"cap;" * "\x2229"
+    ,"Cap;" * "\x22D2"
+    ,"capand;" * "\x2A44"
+    ,"capbrcup;" * "\x2A49"
+    ,"capcap;" * "\x2A4B"
+    ,"capcup;" * "\x2A47"
+    ,"capdot;" * "\x2A40"
+    ,"CapitalDifferentialD;" * "\x2145"
+    ,"caps;" * "\x2229\xFE00"
+    ,"caret;" * "\x2041"
+    ,"caron;" * "\x02C7"
+    ,"Cayleys;" * "\x212D"
+    ,"ccaps;" * "\x2A4D"
+    ,"Ccaron;" * "\x010C"
+    ,"ccaron;" * "\x010D"
+    ,"Ccedil" * "\x00C7"
+    ,"ccedil" * "\x00E7"
+    ,"Ccedil;" * "\x00C7"
+    ,"ccedil;" * "\x00E7"
+    ,"Ccirc;" * "\x0108"
+    ,"ccirc;" * "\x0109"
+    ,"Cconint;" * "\x2230"
+    ,"ccups;" * "\x2A4C"
+    ,"ccupssm;" * "\x2A50"
+    ,"Cdot;" * "\x010A"
+    ,"cdot;" * "\x010B"
+    ,"cedil" * "\x00B8"
+    ,"cedil;" * "\x00B8"
+    ,"Cedilla;" * "\x00B8"
+    ,"cemptyv;" * "\x29B2"
+    ,"cent" * "\x00A2"
+    ,"cent;" * "\x00A2"
+    ,"CenterDot;" * "\x00B7"
+    ,"centerdot;" * "\x00B7"
+    ,"Cfr;" * "\x212D"
+    ,"cfr;" * "\x1D520"
+    ,"CHcy;" * "\x0427"
+    ,"chcy;" * "\x0447"
+    ,"check;" * "\x2713"
+    ,"checkmark;" * "\x2713"
+    ,"Chi;" * "\x03A7"
+    ,"chi;" * "\x03C7"
+    ,"cir;" * "\x25CB"
+    ,"circ;" * "\x02C6"
+    ,"circeq;" * "\x2257"
+    ,"circlearrowleft;" * "\x21BA"
+    ,"circlearrowright;" * "\x21BB"
+    ,"circledast;" * "\x229B"
+    ,"circledcirc;" * "\x229A"
+    ,"circleddash;" * "\x229D"
+    ,"CircleDot;" * "\x2299"
+    ,"circledR;" * "\x00AE"
+    ,"circledS;" * "\x24C8"
+    ,"CircleMinus;" * "\x2296"
+    ,"CirclePlus;" * "\x2295"
+    ,"CircleTimes;" * "\x2297"
+    ,"cire;" * "\x2257"
+    ,"cirE;" * "\x29C3"
+    ,"cirfnint;" * "\x2A10"
+    ,"cirmid;" * "\x2AEF"
+    ,"cirscir;" * "\x29C2"
+    ,"ClockwiseContourIntegral;" * "\x2232"
+    ,"CloseCurlyDoubleQuote;" * "\x201D"
+    ,"CloseCurlyQuote;" * "\x2019"
+    ,"clubs;" * "\x2663"
+    ,"clubsuit;" * "\x2663"
+    ,"colon;" * "\x003A"
+    ,"Colon;" * "\x2237"
+    ,"colone;" * "\x2254"
+    ,"Colone;" * "\x2A74"
+    ,"coloneq;" * "\x2254"
+    ,"comma;" * "\x002C"
+    ,"commat;" * "\x0040"
+    ,"comp;" * "\x2201"
+    ,"compfn;" * "\x2218"
+    ,"complement;" * "\x2201"
+    ,"complexes;" * "\x2102"
+    ,"cong;" * "\x2245"
+    ,"congdot;" * "\x2A6D"
+    ,"Congruent;" * "\x2261"
+    ,"conint;" * "\x222E"
+    ,"Conint;" * "\x222F"
+    ,"ContourIntegral;" * "\x222E"
+    ,"Copf;" * "\x2102"
+    ,"copf;" * "\x1D554"
+    ,"coprod;" * "\x2210"
+    ,"Coproduct;" * "\x2210"
+    ,"COPY" * "\x00A9"
+    ,"copy" * "\x00A9"
+    ,"COPY;" * "\x00A9"
+    ,"copy;" * "\x00A9"
+    ,"copysr;" * "\x2117"
+    ,"CounterClockwiseContourIntegral;" * "\x2233"
+    ,"crarr;" * "\x21B5"
+    ,"cross;" * "\x2717"
+    ,"Cross;" * "\x2A2F"
+    ,"Cscr;" * "\x1D49E"
+    ,"cscr;" * "\x1D4B8"
+    ,"csub;" * "\x2ACF"
+    ,"csube;" * "\x2AD1"
+    ,"csup;" * "\x2AD0"
+    ,"csupe;" * "\x2AD2"
+    ,"ctdot;" * "\x22EF"
+    ,"cudarrl;" * "\x2938"
+    ,"cudarrr;" * "\x2935"
+    ,"cuepr;" * "\x22DE"
+    ,"cuesc;" * "\x22DF"
+    ,"cularr;" * "\x21B6"
+    ,"cularrp;" * "\x293D"
+    ,"cup;" * "\x222A"
+    ,"Cup;" * "\x22D3"
+    ,"cupbrcap;" * "\x2A48"
+    ,"CupCap;" * "\x224D"
+    ,"cupcap;" * "\x2A46"
+    ,"cupcup;" * "\x2A4A"
+    ,"cupdot;" * "\x228D"
+    ,"cupor;" * "\x2A45"
+    ,"cups;" * "\x222A\xFE00"
+    ,"curarr;" * "\x21B7"
+    ,"curarrm;" * "\x293C"
+    ,"curlyeqprec;" * "\x22DE"
+    ,"curlyeqsucc;" * "\x22DF"
+    ,"curlyvee;" * "\x22CE"
+    ,"curlywedge;" * "\x22CF"
+    ,"curren" * "\x00A4"
+    ,"curren;" * "\x00A4"
+    ,"curvearrowleft;" * "\x21B6"
+    ,"curvearrowright;" * "\x21B7"
+    ,"cuvee;" * "\x22CE"
+    ,"cuwed;" * "\x22CF"
+    ,"cwconint;" * "\x2232"
+    ,"cwint;" * "\x2231"
+    ,"cylcty;" * "\x232D"
+    ,"dagger;" * "\x2020"
+    ,"Dagger;" * "\x2021"
+    ,"daleth;" * "\x2138"
+    ,"darr;" * "\x2193"
+    ,"Darr;" * "\x21A1"
+    ,"dArr;" * "\x21D3"
+    ,"dash;" * "\x2010"
+    ,"dashv;" * "\x22A3"
+    ,"Dashv;" * "\x2AE4"
+    ,"dbkarow;" * "\x290F"
+    ,"dblac;" * "\x02DD"
+    ,"Dcaron;" * "\x010E"
+    ,"dcaron;" * "\x010F"
+    ,"Dcy;" * "\x0414"
+    ,"dcy;" * "\x0434"
+    ,"DD;" * "\x2145"
+    ,"dd;" * "\x2146"
+    ,"ddagger;" * "\x2021"
+    ,"ddarr;" * "\x21CA"
+    ,"DDotrahd;" * "\x2911"
+    ,"ddotseq;" * "\x2A77"
+    ,"deg" * "\x00B0"
+    ,"deg;" * "\x00B0"
+    ,"Del;" * "\x2207"
+    ,"Delta;" * "\x0394"
+    ,"delta;" * "\x03B4"
+    ,"demptyv;" * "\x29B1"
+    ,"dfisht;" * "\x297F"
+    ,"Dfr;" * "\x1D507"
+    ,"dfr;" * "\x1D521"
+    ,"dHar;" * "\x2965"
+    ,"dharl;" * "\x21C3"
+    ,"dharr;" * "\x21C2"
+    ,"DiacriticalAcute;" * "\x00B4"
+    ,"DiacriticalDot;" * "\x02D9"
+    ,"DiacriticalDoubleAcute;" * "\x02DD"
+    ,"DiacriticalGrave;" * "\x0060"
+    ,"DiacriticalTilde;" * "\x02DC"
+    ,"diam;" * "\x22C4"
+    ,"Diamond;" * "\x22C4"
+    ,"diamond;" * "\x22C4"
+    ,"diamondsuit;" * "\x2666"
+    ,"diams;" * "\x2666"
+    ,"die;" * "\x00A8"
+    ,"DifferentialD;" * "\x2146"
+    ,"digamma;" * "\x03DD"
+    ,"disin;" * "\x22F2"
+    ,"div;" * "\x00F7"
+    ,"divide" * "\x00F7"
+    ,"divide;" * "\x00F7"
+    ,"divideontimes;" * "\x22C7"
+    ,"divonx;" * "\x22C7"
+    ,"DJcy;" * "\x0402"
+    ,"djcy;" * "\x0452"
+    ,"dlcorn;" * "\x231E"
+    ,"dlcrop;" * "\x230D"
+    ,"dollar;" * "\x0024"
+    ,"Dopf;" * "\x1D53B"
+    ,"dopf;" * "\x1D555"
+    ,"Dot;" * "\x00A8"
+    ,"dot;" * "\x02D9"
+    ,"DotDot;" * "\x20DC"
+    ,"doteq;" * "\x2250"
+    ,"doteqdot;" * "\x2251"
+    ,"DotEqual;" * "\x2250"
+    ,"dotminus;" * "\x2238"
+    ,"dotplus;" * "\x2214"
+    ,"dotsquare;" * "\x22A1"
+    ,"doublebarwedge;" * "\x2306"
+    ,"DoubleContourIntegral;" * "\x222F"
+    ,"DoubleDot;" * "\x00A8"
+    ,"DoubleDownArrow;" * "\x21D3"
+    ,"DoubleLeftArrow;" * "\x21D0"
+    ,"DoubleLeftRightArrow;" * "\x21D4"
+    ,"DoubleLeftTee;" * "\x2AE4"
+    ,"DoubleLongLeftArrow;" * "\x27F8"
+    ,"DoubleLongLeftRightArrow;" * "\x27FA"
+    ,"DoubleLongRightArrow;" * "\x27F9"
+    ,"DoubleRightArrow;" * "\x21D2"
+    ,"DoubleRightTee;" * "\x22A8"
+    ,"DoubleUpArrow;" * "\x21D1"
+    ,"DoubleUpDownArrow;" * "\x21D5"
+    ,"DoubleVerticalBar;" * "\x2225"
+    ,"DownArrow;" * "\x2193"
+    ,"downarrow;" * "\x2193"
+    ,"Downarrow;" * "\x21D3"
+    ,"DownArrowBar;" * "\x2913"
+    ,"DownArrowUpArrow;" * "\x21F5"
+    ,"DownBreve;" * "\x0311"
+    ,"downdownarrows;" * "\x21CA"
+    ,"downharpoonleft;" * "\x21C3"
+    ,"downharpoonright;" * "\x21C2"
+    ,"DownLeftRightVector;" * "\x2950"
+    ,"DownLeftTeeVector;" * "\x295E"
+    ,"DownLeftVector;" * "\x21BD"
+    ,"DownLeftVectorBar;" * "\x2956"
+    ,"DownRightTeeVector;" * "\x295F"
+    ,"DownRightVector;" * "\x21C1"
+    ,"DownRightVectorBar;" * "\x2957"
+    ,"DownTee;" * "\x22A4"
+    ,"DownTeeArrow;" * "\x21A7"
+    ,"drbkarow;" * "\x2910"
+    ,"drcorn;" * "\x231F"
+    ,"drcrop;" * "\x230C"
+    ,"Dscr;" * "\x1D49F"
+    ,"dscr;" * "\x1D4B9"
+    ,"DScy;" * "\x0405"
+    ,"dscy;" * "\x0455"
+    ,"dsol;" * "\x29F6"
+    ,"Dstrok;" * "\x0110"
+    ,"dstrok;" * "\x0111"
+    ,"dtdot;" * "\x22F1"
+    ,"dtri;" * "\x25BF"
+    ,"dtrif;" * "\x25BE"
+    ,"duarr;" * "\x21F5"
+    ,"duhar;" * "\x296F"
+    ,"dwangle;" * "\x29A6"
+    ,"DZcy;" * "\x040F"
+    ,"dzcy;" * "\x045F"
+    ,"dzigrarr;" * "\x27FF"
+    ,"Eacute" * "\x00C9"
+    ,"eacute" * "\x00E9"
+    ,"Eacute;" * "\x00C9"
+    ,"eacute;" * "\x00E9"
+    ,"easter;" * "\x2A6E"
+    ,"Ecaron;" * "\x011A"
+    ,"ecaron;" * "\x011B"
+    ,"ecir;" * "\x2256"
+    ,"Ecirc" * "\x00CA"
+    ,"ecirc" * "\x00EA"
+    ,"Ecirc;" * "\x00CA"
+    ,"ecirc;" * "\x00EA"
+    ,"ecolon;" * "\x2255"
+    ,"Ecy;" * "\x042D"
+    ,"ecy;" * "\x044D"
+    ,"eDDot;" * "\x2A77"
+    ,"Edot;" * "\x0116"
+    ,"edot;" * "\x0117"
+    ,"eDot;" * "\x2251"
+    ,"ee;" * "\x2147"
+    ,"efDot;" * "\x2252"
+    ,"Efr;" * "\x1D508"
+    ,"efr;" * "\x1D522"
+    ,"eg;" * "\x2A9A"
+    ,"Egrave" * "\x00C8"
+    ,"egrave" * "\x00E8"
+    ,"Egrave;" * "\x00C8"
+    ,"egrave;" * "\x00E8"
+    ,"egs;" * "\x2A96"
+    ,"egsdot;" * "\x2A98"
+    ,"el;" * "\x2A99"
+    ,"Element;" * "\x2208"
+    ,"elinters;" * "\x23E7"
+    ,"ell;" * "\x2113"
+    ,"els;" * "\x2A95"
+    ,"elsdot;" * "\x2A97"
+    ,"Emacr;" * "\x0112"
+    ,"emacr;" * "\x0113"
+    ,"empty;" * "\x2205"
+    ,"emptyset;" * "\x2205"
+    ,"EmptySmallSquare;" * "\x25FB"
+    ,"emptyv;" * "\x2205"
+    ,"EmptyVerySmallSquare;" * "\x25AB"
+    ,"emsp13;" * "\x2004"
+    ,"emsp14;" * "\x2005"
+    ,"emsp;" * "\x2003"
+    ,"ENG;" * "\x014A"
+    ,"eng;" * "\x014B"
+    ,"ensp;" * "\x2002"
+    ,"Eogon;" * "\x0118"
+    ,"eogon;" * "\x0119"
+    ,"Eopf;" * "\x1D53C"
+    ,"eopf;" * "\x1D556"
+    ,"epar;" * "\x22D5"
+    ,"eparsl;" * "\x29E3"
+    ,"eplus;" * "\x2A71"
+    ,"epsi;" * "\x03B5"
+    ,"Epsilon;" * "\x0395"
+    ,"epsilon;" * "\x03B5"
+    ,"epsiv;" * "\x03F5"
+    ,"eqcirc;" * "\x2256"
+    ,"eqcolon;" * "\x2255"
+    ,"eqsim;" * "\x2242"
+    ,"eqslantgtr;" * "\x2A96"
+    ,"eqslantless;" * "\x2A95"
+    ,"Equal;" * "\x2A75"
+    ,"equals;" * "\x003D"
+    ,"EqualTilde;" * "\x2242"
+    ,"equest;" * "\x225F"
+    ,"Equilibrium;" * "\x21CC"
+    ,"equiv;" * "\x2261"
+    ,"equivDD;" * "\x2A78"
+    ,"eqvparsl;" * "\x29E5"
+    ,"erarr;" * "\x2971"
+    ,"erDot;" * "\x2253"
+    ,"escr;" * "\x212F"
+    ,"Escr;" * "\x2130"
+    ,"esdot;" * "\x2250"
+    ,"esim;" * "\x2242"
+    ,"Esim;" * "\x2A73"
+    ,"Eta;" * "\x0397"
+    ,"eta;" * "\x03B7"
+    ,"ETH" * "\x00D0"
+    ,"eth" * "\x00F0"
+    ,"ETH;" * "\x00D0"
+    ,"eth;" * "\x00F0"
+    ,"Euml" * "\x00CB"
+    ,"euml" * "\x00EB"
+    ,"Euml;" * "\x00CB"
+    ,"euml;" * "\x00EB"
+    ,"euro;" * "\x20AC"
+    ,"excl;" * "\x0021"
+    ,"exist;" * "\x2203"
+    ,"Exists;" * "\x2203"
+    ,"expectation;" * "\x2130"
+    ,"ExponentialE;" * "\x2147"
+    ,"exponentiale;" * "\x2147"
+    ,"fallingdotseq;" * "\x2252"
+    ,"Fcy;" * "\x0424"
+    ,"fcy;" * "\x0444"
+    ,"female;" * "\x2640"
+    ,"ffilig;" * "\xFB03"
+    ,"fflig;" * "\xFB00"
+    ,"ffllig;" * "\xFB04"
+    ,"Ffr;" * "\x1D509"
+    ,"ffr;" * "\x1D523"
+    ,"filig;" * "\xFB01"
+    ,"FilledSmallSquare;" * "\x25FC"
+    ,"FilledVerySmallSquare;" * "\x25AA"
+    ,"fjlig;" * "\x0066\x006A"
+    ,"flat;" * "\x266D"
+    ,"fllig;" * "\xFB02"
+    ,"fltns;" * "\x25B1"
+    ,"fnof;" * "\x0192"
+    ,"Fopf;" * "\x1D53D"
+    ,"fopf;" * "\x1D557"
+    ,"ForAll;" * "\x2200"
+    ,"forall;" * "\x2200"
+    ,"fork;" * "\x22D4"
+    ,"forkv;" * "\x2AD9"
+    ,"Fouriertrf;" * "\x2131"
+    ,"fpartint;" * "\x2A0D"
+    ,"frac12" * "\x00BD"
+    ,"frac12;" * "\x00BD"
+    ,"frac13;" * "\x2153"
+    ,"frac14" * "\x00BC"
+    ,"frac14;" * "\x00BC"
+    ,"frac15;" * "\x2155"
+    ,"frac16;" * "\x2159"
+    ,"frac18;" * "\x215B"
+    ,"frac23;" * "\x2154"
+    ,"frac25;" * "\x2156"
+    ,"frac34" * "\x00BE"
+    ,"frac34;" * "\x00BE"
+    ,"frac35;" * "\x2157"
+    ,"frac38;" * "\x215C"
+    ,"frac45;" * "\x2158"
+    ,"frac56;" * "\x215A"
+    ,"frac58;" * "\x215D"
+    ,"frac78;" * "\x215E"
+    ,"frasl;" * "\x2044"
+    ,"frown;" * "\x2322"
+    ,"Fscr;" * "\x2131"
+    ,"fscr;" * "\x1D4BB"
+    ,"gacute;" * "\x01F5"
+    ,"Gamma;" * "\x0393"
+    ,"gamma;" * "\x03B3"
+    ,"Gammad;" * "\x03DC"
+    ,"gammad;" * "\x03DD"
+    ,"gap;" * "\x2A86"
+    ,"Gbreve;" * "\x011E"
+    ,"gbreve;" * "\x011F"
+    ,"Gcedil;" * "\x0122"
+    ,"Gcirc;" * "\x011C"
+    ,"gcirc;" * "\x011D"
+    ,"Gcy;" * "\x0413"
+    ,"gcy;" * "\x0433"
+    ,"Gdot;" * "\x0120"
+    ,"gdot;" * "\x0121"
+    ,"ge;" * "\x2265"
+    ,"gE;" * "\x2267"
+    ,"gel;" * "\x22DB"
+    ,"gEl;" * "\x2A8C"
+    ,"geq;" * "\x2265"
+    ,"geqq;" * "\x2267"
+    ,"geqslant;" * "\x2A7E"
+    ,"ges;" * "\x2A7E"
+    ,"gescc;" * "\x2AA9"
+    ,"gesdot;" * "\x2A80"
+    ,"gesdoto;" * "\x2A82"
+    ,"gesdotol;" * "\x2A84"
+    ,"gesl;" * "\x22DB\xFE00"
+    ,"gesles;" * "\x2A94"
+    ,"Gfr;" * "\x1D50A"
+    ,"gfr;" * "\x1D524"
+    ,"gg;" * "\x226B"
+    ,"Gg;" * "\x22D9"
+    ,"ggg;" * "\x22D9"
+    ,"gimel;" * "\x2137"
+    ,"GJcy;" * "\x0403"
+    ,"gjcy;" * "\x0453"
+    ,"gl;" * "\x2277"
+    ,"gla;" * "\x2AA5"
+    ,"glE;" * "\x2A92"
+    ,"glj;" * "\x2AA4"
+    ,"gnap;" * "\x2A8A"
+    ,"gnapprox;" * "\x2A8A"
+    ,"gnE;" * "\x2269"
+    ,"gne;" * "\x2A88"
+    ,"gneq;" * "\x2A88"
+    ,"gneqq;" * "\x2269"
+    ,"gnsim;" * "\x22E7"
+    ,"Gopf;" * "\x1D53E"
+    ,"gopf;" * "\x1D558"
+    ,"grave;" * "\x0060"
+    ,"GreaterEqual;" * "\x2265"
+    ,"GreaterEqualLess;" * "\x22DB"
+    ,"GreaterFullEqual;" * "\x2267"
+    ,"GreaterGreater;" * "\x2AA2"
+    ,"GreaterLess;" * "\x2277"
+    ,"GreaterSlantEqual;" * "\x2A7E"
+    ,"GreaterTilde;" * "\x2273"
+    ,"gscr;" * "\x210A"
+    ,"Gscr;" * "\x1D4A2"
+    ,"gsim;" * "\x2273"
+    ,"gsime;" * "\x2A8E"
+    ,"gsiml;" * "\x2A90"
+    ,"GT" * "\x003E"
+    ,"gt" * "\x003E"
+    ,"GT;" * "\x003E"
+    ,"gt;" * "\x003E"
+    ,"Gt;" * "\x226B"
+    ,"gtcc;" * "\x2AA7"
+    ,"gtcir;" * "\x2A7A"
+    ,"gtdot;" * "\x22D7"
+    ,"gtlPar;" * "\x2995"
+    ,"gtquest;" * "\x2A7C"
+    ,"gtrapprox;" * "\x2A86"
+    ,"gtrarr;" * "\x2978"
+    ,"gtrdot;" * "\x22D7"
+    ,"gtreqless;" * "\x22DB"
+    ,"gtreqqless;" * "\x2A8C"
+    ,"gtrless;" * "\x2277"
+    ,"gtrsim;" * "\x2273"
+    ,"gvertneqq;" * "\x2269\xFE00"
+    ,"gvnE;" * "\x2269\xFE00"
+    ,"Hacek;" * "\x02C7"
+    ,"hairsp;" * "\x200A"
+    ,"half;" * "\x00BD"
+    ,"hamilt;" * "\x210B"
+    ,"HARDcy;" * "\x042A"
+    ,"hardcy;" * "\x044A"
+    ,"harr;" * "\x2194"
+    ,"hArr;" * "\x21D4"
+    ,"harrcir;" * "\x2948"
+    ,"harrw;" * "\x21AD"
+    ,"Hat;" * "\x005E"
+    ,"hbar;" * "\x210F"
+    ,"Hcirc;" * "\x0124"
+    ,"hcirc;" * "\x0125"
+    ,"hearts;" * "\x2665"
+    ,"heartsuit;" * "\x2665"
+    ,"hellip;" * "\x2026"
+    ,"hercon;" * "\x22B9"
+    ,"Hfr;" * "\x210C"
+    ,"hfr;" * "\x1D525"
+    ,"HilbertSpace;" * "\x210B"
+    ,"hksearow;" * "\x2925"
+    ,"hkswarow;" * "\x2926"
+    ,"hoarr;" * "\x21FF"
+    ,"homtht;" * "\x223B"
+    ,"hookleftarrow;" * "\x21A9"
+    ,"hookrightarrow;" * "\x21AA"
+    ,"Hopf;" * "\x210D"
+    ,"hopf;" * "\x1D559"
+    ,"horbar;" * "\x2015"
+    ,"HorizontalLine;" * "\x2500"
+    ,"Hscr;" * "\x210B"
+    ,"hscr;" * "\x1D4BD"
+    ,"hslash;" * "\x210F"
+    ,"Hstrok;" * "\x0126"
+    ,"hstrok;" * "\x0127"
+    ,"HumpDownHump;" * "\x224E"
+    ,"HumpEqual;" * "\x224F"
+    ,"hybull;" * "\x2043"
+    ,"hyphen;" * "\x2010"
+    ,"Iacute" * "\x00CD"
+    ,"iacute" * "\x00ED"
+    ,"Iacute;" * "\x00CD"
+    ,"iacute;" * "\x00ED"
+    ,"ic;" * "\x2063"
+    ,"Icirc" * "\x00CE"
+    ,"icirc" * "\x00EE"
+    ,"Icirc;" * "\x00CE"
+    ,"icirc;" * "\x00EE"
+    ,"Icy;" * "\x0418"
+    ,"icy;" * "\x0438"
+    ,"Idot;" * "\x0130"
+    ,"IEcy;" * "\x0415"
+    ,"iecy;" * "\x0435"
+    ,"iexcl" * "\x00A1"
+    ,"iexcl;" * "\x00A1"
+    ,"iff;" * "\x21D4"
+    ,"Ifr;" * "\x2111"
+    ,"ifr;" * "\x1D526"
+    ,"Igrave" * "\x00CC"
+    ,"igrave" * "\x00EC"
+    ,"Igrave;" * "\x00CC"
+    ,"igrave;" * "\x00EC"
+    ,"ii;" * "\x2148"
+    ,"iiiint;" * "\x2A0C"
+    ,"iiint;" * "\x222D"
+    ,"iinfin;" * "\x29DC"
+    ,"iiota;" * "\x2129"
+    ,"IJlig;" * "\x0132"
+    ,"ijlig;" * "\x0133"
+    ,"Im;" * "\x2111"
+    ,"Imacr;" * "\x012A"
+    ,"imacr;" * "\x012B"
+    ,"image;" * "\x2111"
+    ,"ImaginaryI;" * "\x2148"
+    ,"imagline;" * "\x2110"
+    ,"imagpart;" * "\x2111"
+    ,"imath;" * "\x0131"
+    ,"imof;" * "\x22B7"
+    ,"imped;" * "\x01B5"
+    ,"Implies;" * "\x21D2"
+    ,"in;" * "\x2208"
+    ,"incare;" * "\x2105"
+    ,"infin;" * "\x221E"
+    ,"infintie;" * "\x29DD"
+    ,"inodot;" * "\x0131"
+    ,"int;" * "\x222B"
+    ,"Int;" * "\x222C"
+    ,"intcal;" * "\x22BA"
+    ,"integers;" * "\x2124"
+    ,"Integral;" * "\x222B"
+    ,"intercal;" * "\x22BA"
+    ,"Intersection;" * "\x22C2"
+    ,"intlarhk;" * "\x2A17"
+    ,"intprod;" * "\x2A3C"
+    ,"InvisibleComma;" * "\x2063"
+    ,"InvisibleTimes;" * "\x2062"
+    ,"IOcy;" * "\x0401"
+    ,"iocy;" * "\x0451"
+    ,"Iogon;" * "\x012E"
+    ,"iogon;" * "\x012F"
+    ,"Iopf;" * "\x1D540"
+    ,"iopf;" * "\x1D55A"
+    ,"Iota;" * "\x0399"
+    ,"iota;" * "\x03B9"
+    ,"iprod;" * "\x2A3C"
+    ,"iquest" * "\x00BF"
+    ,"iquest;" * "\x00BF"
+    ,"Iscr;" * "\x2110"
+    ,"iscr;" * "\x1D4BE"
+    ,"isin;" * "\x2208"
+    ,"isindot;" * "\x22F5"
+    ,"isinE;" * "\x22F9"
+    ,"isins;" * "\x22F4"
+    ,"isinsv;" * "\x22F3"
+    ,"isinv;" * "\x2208"
+    ,"it;" * "\x2062"
+    ,"Itilde;" * "\x0128"
+    ,"itilde;" * "\x0129"
+    ,"Iukcy;" * "\x0406"
+    ,"iukcy;" * "\x0456"
+    ,"Iuml" * "\x00CF"
+    ,"iuml" * "\x00EF"
+    ,"Iuml;" * "\x00CF"
+    ,"iuml;" * "\x00EF"
+    ,"Jcirc;" * "\x0134"
+    ,"jcirc;" * "\x0135"
+    ,"Jcy;" * "\x0419"
+    ,"jcy;" * "\x0439"
+    ,"Jfr;" * "\x1D50D"
+    ,"jfr;" * "\x1D527"
+    ,"jmath;" * "\x0237"
+    ,"Jopf;" * "\x1D541"
+    ,"jopf;" * "\x1D55B"
+    ,"Jscr;" * "\x1D4A5"
+    ,"jscr;" * "\x1D4BF"
+    ,"Jsercy;" * "\x0408"
+    ,"jsercy;" * "\x0458"
+    ,"Jukcy;" * "\x0404"
+    ,"jukcy;" * "\x0454"
+    ,"Kappa;" * "\x039A"
+    ,"kappa;" * "\x03BA"
+    ,"kappav;" * "\x03F0"
+    ,"Kcedil;" * "\x0136"
+    ,"kcedil;" * "\x0137"
+    ,"Kcy;" * "\x041A"
+    ,"kcy;" * "\x043A"
+    ,"Kfr;" * "\x1D50E"
+    ,"kfr;" * "\x1D528"
+    ,"kgreen;" * "\x0138"
+    ,"KHcy;" * "\x0425"
+    ,"khcy;" * "\x0445"
+    ,"KJcy;" * "\x040C"
+    ,"kjcy;" * "\x045C"
+    ,"Kopf;" * "\x1D542"
+    ,"kopf;" * "\x1D55C"
+    ,"Kscr;" * "\x1D4A6"
+    ,"kscr;" * "\x1D4C0"
+    ,"lAarr;" * "\x21DA"
+    ,"Lacute;" * "\x0139"
+    ,"lacute;" * "\x013A"
+    ,"laemptyv;" * "\x29B4"
+    ,"lagran;" * "\x2112"
+    ,"Lambda;" * "\x039B"
+    ,"lambda;" * "\x03BB"
+    ,"lang;" * "\x27E8"
+    ,"Lang;" * "\x27EA"
+    ,"langd;" * "\x2991"
+    ,"langle;" * "\x27E8"
+    ,"lap;" * "\x2A85"
+    ,"Laplacetrf;" * "\x2112"
+    ,"laquo" * "\x00AB"
+    ,"laquo;" * "\x00AB"
+    ,"larr;" * "\x2190"
+    ,"Larr;" * "\x219E"
+    ,"lArr;" * "\x21D0"
+    ,"larrb;" * "\x21E4"
+    ,"larrbfs;" * "\x291F"
+    ,"larrfs;" * "\x291D"
+    ,"larrhk;" * "\x21A9"
+    ,"larrlp;" * "\x21AB"
+    ,"larrpl;" * "\x2939"
+    ,"larrsim;" * "\x2973"
+    ,"larrtl;" * "\x21A2"
+    ,"lat;" * "\x2AAB"
+    ,"latail;" * "\x2919"
+    ,"lAtail;" * "\x291B"
+    ,"late;" * "\x2AAD"
+    ,"lates;" * "\x2AAD\xFE00"
+    ,"lbarr;" * "\x290C"
+    ,"lBarr;" * "\x290E"
+    ,"lbbrk;" * "\x2772"
+    ,"lbrace;" * "\x007B"
+    ,"lbrack;" * "\x005B"
+    ,"lbrke;" * "\x298B"
+    ,"lbrksld;" * "\x298F"
+    ,"lbrkslu;" * "\x298D"
+    ,"Lcaron;" * "\x013D"
+    ,"lcaron;" * "\x013E"
+    ,"Lcedil;" * "\x013B"
+    ,"lcedil;" * "\x013C"
+    ,"lceil;" * "\x2308"
+    ,"lcub;" * "\x007B"
+    ,"Lcy;" * "\x041B"
+    ,"lcy;" * "\x043B"
+    ,"ldca;" * "\x2936"
+    ,"ldquo;" * "\x201C"
+    ,"ldquor;" * "\x201E"
+    ,"ldrdhar;" * "\x2967"
+    ,"ldrushar;" * "\x294B"
+    ,"ldsh;" * "\x21B2"
+    ,"le;" * "\x2264"
+    ,"lE;" * "\x2266"
+    ,"LeftAngleBracket;" * "\x27E8"
+    ,"LeftArrow;" * "\x2190"
+    ,"leftarrow;" * "\x2190"
+    ,"Leftarrow;" * "\x21D0"
+    ,"LeftArrowBar;" * "\x21E4"
+    ,"LeftArrowRightArrow;" * "\x21C6"
+    ,"leftarrowtail;" * "\x21A2"
+    ,"LeftCeiling;" * "\x2308"
+    ,"LeftDoubleBracket;" * "\x27E6"
+    ,"LeftDownTeeVector;" * "\x2961"
+    ,"LeftDownVector;" * "\x21C3"
+    ,"LeftDownVectorBar;" * "\x2959"
+    ,"LeftFloor;" * "\x230A"
+    ,"leftharpoondown;" * "\x21BD"
+    ,"leftharpoonup;" * "\x21BC"
+    ,"leftleftarrows;" * "\x21C7"
+    ,"LeftRightArrow;" * "\x2194"
+    ,"leftrightarrow;" * "\x2194"
+    ,"Leftrightarrow;" * "\x21D4"
+    ,"leftrightarrows;" * "\x21C6"
+    ,"leftrightharpoons;" * "\x21CB"
+    ,"leftrightsquigarrow;" * "\x21AD"
+    ,"LeftRightVector;" * "\x294E"
+    ,"LeftTee;" * "\x22A3"
+    ,"LeftTeeArrow;" * "\x21A4"
+    ,"LeftTeeVector;" * "\x295A"
+    ,"leftthreetimes;" * "\x22CB"
+    ,"LeftTriangle;" * "\x22B2"
+    ,"LeftTriangleBar;" * "\x29CF"
+    ,"LeftTriangleEqual;" * "\x22B4"
+    ,"LeftUpDownVector;" * "\x2951"
+    ,"LeftUpTeeVector;" * "\x2960"
+    ,"LeftUpVector;" * "\x21BF"
+    ,"LeftUpVectorBar;" * "\x2958"
+    ,"LeftVector;" * "\x21BC"
+    ,"LeftVectorBar;" * "\x2952"
+    ,"leg;" * "\x22DA"
+    ,"lEg;" * "\x2A8B"
+    ,"leq;" * "\x2264"
+    ,"leqq;" * "\x2266"
+    ,"leqslant;" * "\x2A7D"
+    ,"les;" * "\x2A7D"
+    ,"lescc;" * "\x2AA8"
+    ,"lesdot;" * "\x2A7F"
+    ,"lesdoto;" * "\x2A81"
+    ,"lesdotor;" * "\x2A83"
+    ,"lesg;" * "\x22DA\xFE00"
+    ,"lesges;" * "\x2A93"
+    ,"lessapprox;" * "\x2A85"
+    ,"lessdot;" * "\x22D6"
+    ,"lesseqgtr;" * "\x22DA"
+    ,"lesseqqgtr;" * "\x2A8B"
+    ,"LessEqualGreater;" * "\x22DA"
+    ,"LessFullEqual;" * "\x2266"
+    ,"LessGreater;" * "\x2276"
+    ,"lessgtr;" * "\x2276"
+    ,"LessLess;" * "\x2AA1"
+    ,"lesssim;" * "\x2272"
+    ,"LessSlantEqual;" * "\x2A7D"
+    ,"LessTilde;" * "\x2272"
+    ,"lfisht;" * "\x297C"
+    ,"lfloor;" * "\x230A"
+    ,"Lfr;" * "\x1D50F"
+    ,"lfr;" * "\x1D529"
+    ,"lg;" * "\x2276"
+    ,"lgE;" * "\x2A91"
+    ,"lHar;" * "\x2962"
+    ,"lhard;" * "\x21BD"
+    ,"lharu;" * "\x21BC"
+    ,"lharul;" * "\x296A"
+    ,"lhblk;" * "\x2584"
+    ,"LJcy;" * "\x0409"
+    ,"ljcy;" * "\x0459"
+    ,"ll;" * "\x226A"
+    ,"Ll;" * "\x22D8"
+    ,"llarr;" * "\x21C7"
+    ,"llcorner;" * "\x231E"
+    ,"Lleftarrow;" * "\x21DA"
+    ,"llhard;" * "\x296B"
+    ,"lltri;" * "\x25FA"
+    ,"Lmidot;" * "\x013F"
+    ,"lmidot;" * "\x0140"
+    ,"lmoust;" * "\x23B0"
+    ,"lmoustache;" * "\x23B0"
+    ,"lnap;" * "\x2A89"
+    ,"lnapprox;" * "\x2A89"
+    ,"lnE;" * "\x2268"
+    ,"lne;" * "\x2A87"
+    ,"lneq;" * "\x2A87"
+    ,"lneqq;" * "\x2268"
+    ,"lnsim;" * "\x22E6"
+    ,"loang;" * "\x27EC"
+    ,"loarr;" * "\x21FD"
+    ,"lobrk;" * "\x27E6"
+    ,"LongLeftArrow;" * "\x27F5"
+    ,"longleftarrow;" * "\x27F5"
+    ,"Longleftarrow;" * "\x27F8"
+    ,"LongLeftRightArrow;" * "\x27F7"
+    ,"longleftrightarrow;" * "\x27F7"
+    ,"Longleftrightarrow;" * "\x27FA"
+    ,"longmapsto;" * "\x27FC"
+    ,"LongRightArrow;" * "\x27F6"
+    ,"longrightarrow;" * "\x27F6"
+    ,"Longrightarrow;" * "\x27F9"
+    ,"looparrowleft;" * "\x21AB"
+    ,"looparrowright;" * "\x21AC"
+    ,"lopar;" * "\x2985"
+    ,"Lopf;" * "\x1D543"
+    ,"lopf;" * "\x1D55D"
+    ,"loplus;" * "\x2A2D"
+    ,"lotimes;" * "\x2A34"
+    ,"lowast;" * "\x2217"
+    ,"lowbar;" * "\x005F"
+    ,"LowerLeftArrow;" * "\x2199"
+    ,"LowerRightArrow;" * "\x2198"
+    ,"loz;" * "\x25CA"
+    ,"lozenge;" * "\x25CA"
+    ,"lozf;" * "\x29EB"
+    ,"lpar;" * "\x0028"
+    ,"lparlt;" * "\x2993"
+    ,"lrarr;" * "\x21C6"
+    ,"lrcorner;" * "\x231F"
+    ,"lrhar;" * "\x21CB"
+    ,"lrhard;" * "\x296D"
+    ,"lrm;" * "\x200E"
+    ,"lrtri;" * "\x22BF"
+    ,"lsaquo;" * "\x2039"
+    ,"Lscr;" * "\x2112"
+    ,"lscr;" * "\x1D4C1"
+    ,"Lsh;" * "\x21B0"
+    ,"lsh;" * "\x21B0"
+    ,"lsim;" * "\x2272"
+    ,"lsime;" * "\x2A8D"
+    ,"lsimg;" * "\x2A8F"
+    ,"lsqb;" * "\x005B"
+    ,"lsquo;" * "\x2018"
+    ,"lsquor;" * "\x201A"
+    ,"Lstrok;" * "\x0141"
+    ,"lstrok;" * "\x0142"
+    ,"LT" * "\x003C"
+    ,"lt" * "\x003C"
+    ,"LT;" * "\x003C"
+    ,"lt;" * "\x003C"
+    ,"Lt;" * "\x226A"
+    ,"ltcc;" * "\x2AA6"
+    ,"ltcir;" * "\x2A79"
+    ,"ltdot;" * "\x22D6"
+    ,"lthree;" * "\x22CB"
+    ,"ltimes;" * "\x22C9"
+    ,"ltlarr;" * "\x2976"
+    ,"ltquest;" * "\x2A7B"
+    ,"ltri;" * "\x25C3"
+    ,"ltrie;" * "\x22B4"
+    ,"ltrif;" * "\x25C2"
+    ,"ltrPar;" * "\x2996"
+    ,"lurdshar;" * "\x294A"
+    ,"luruhar;" * "\x2966"
+    ,"lvertneqq;" * "\x2268\xFE00"
+    ,"lvnE;" * "\x2268\xFE00"
+    ,"macr" * "\x00AF"
+    ,"macr;" * "\x00AF"
+    ,"male;" * "\x2642"
+    ,"malt;" * "\x2720"
+    ,"maltese;" * "\x2720"
+    ,"map;" * "\x21A6"
+    ,"Map;" * "\x2905"
+    ,"mapsto;" * "\x21A6"
+    ,"mapstodown;" * "\x21A7"
+    ,"mapstoleft;" * "\x21A4"
+    ,"mapstoup;" * "\x21A5"
+    ,"marker;" * "\x25AE"
+    ,"mcomma;" * "\x2A29"
+    ,"Mcy;" * "\x041C"
+    ,"mcy;" * "\x043C"
+    ,"mdash;" * "\x2014"
+    ,"mDDot;" * "\x223A"
+    ,"measuredangle;" * "\x2221"
+    ,"MediumSpace;" * "\x205F"
+    ,"Mellintrf;" * "\x2133"
+    ,"Mfr;" * "\x1D510"
+    ,"mfr;" * "\x1D52A"
+    ,"mho;" * "\x2127"
+    ,"micro" * "\x00B5"
+    ,"micro;" * "\x00B5"
+    ,"mid;" * "\x2223"
+    ,"midast;" * "\x002A"
+    ,"midcir;" * "\x2AF0"
+    ,"middot" * "\x00B7"
+    ,"middot;" * "\x00B7"
+    ,"minus;" * "\x2212"
+    ,"minusb;" * "\x229F"
+    ,"minusd;" * "\x2238"
+    ,"minusdu;" * "\x2A2A"
+    ,"MinusPlus;" * "\x2213"
+    ,"mlcp;" * "\x2ADB"
+    ,"mldr;" * "\x2026"
+    ,"mnplus;" * "\x2213"
+    ,"models;" * "\x22A7"
+    ,"Mopf;" * "\x1D544"
+    ,"mopf;" * "\x1D55E"
+    ,"mp;" * "\x2213"
+    ,"Mscr;" * "\x2133"
+    ,"mscr;" * "\x1D4C2"
+    ,"mstpos;" * "\x223E"
+    ,"Mu;" * "\x039C"
+    ,"mu;" * "\x03BC"
+    ,"multimap;" * "\x22B8"
+    ,"mumap;" * "\x22B8"
+    ,"nabla;" * "\x2207"
+    ,"Nacute;" * "\x0143"
+    ,"nacute;" * "\x0144"
+    ,"nang;" * "\x2220\x20D2"
+    ,"nap;" * "\x2249"
+    ,"napE;" * "\x2A70\x0338"
+    ,"napid;" * "\x224B\x0338"
+    ,"napos;" * "\x0149"
+    ,"napprox;" * "\x2249"
+    ,"natur;" * "\x266E"
+    ,"natural;" * "\x266E"
+    ,"naturals;" * "\x2115"
+    ,"nbsp" * "\x00A0"
+    ,"nbsp;" * "\x00A0"
+    ,"nbump;" * "\x224E\x0338"
+    ,"nbumpe;" * "\x224F\x0338"
+    ,"ncap;" * "\x2A43"
+    ,"Ncaron;" * "\x0147"
+    ,"ncaron;" * "\x0148"
+    ,"Ncedil;" * "\x0145"
+    ,"ncedil;" * "\x0146"
+    ,"ncong;" * "\x2247"
+    ,"ncongdot;" * "\x2A6D\x0338"
+    ,"ncup;" * "\x2A42"
+    ,"Ncy;" * "\x041D"
+    ,"ncy;" * "\x043D"
+    ,"ndash;" * "\x2013"
+    ,"ne;" * "\x2260"
+    ,"nearhk;" * "\x2924"
+    ,"nearr;" * "\x2197"
+    ,"neArr;" * "\x21D7"
+    ,"nearrow;" * "\x2197"
+    ,"nedot;" * "\x2250\x0338"
+    ,"NegativeMediumSpace;" * "\x200B"
+    ,"NegativeThickSpace;" * "\x200B"
+    ,"NegativeThinSpace;" * "\x200B"
+    ,"NegativeVeryThinSpace;" * "\x200B"
+    ,"nequiv;" * "\x2262"
+    ,"nesear;" * "\x2928"
+    ,"nesim;" * "\x2242\x0338"
+    ,"NestedGreaterGreater;" * "\x226B"
+    ,"NestedLessLess;" * "\x226A"
+    ,"NewLine;" * "\x000A"
+    ,"nexist;" * "\x2204"
+    ,"nexists;" * "\x2204"
+    ,"Nfr;" * "\x1D511"
+    ,"nfr;" * "\x1D52B"
+    ,"ngE;" * "\x2267\x0338"
+    ,"nge;" * "\x2271"
+    ,"ngeq;" * "\x2271"
+    ,"ngeqq;" * "\x2267\x0338"
+    ,"ngeqslant;" * "\x2A7E\x0338"
+    ,"nges;" * "\x2A7E\x0338"
+    ,"nGg;" * "\x22D9\x0338"
+    ,"ngsim;" * "\x2275"
+    ,"nGt;" * "\x226B\x20D2"
+    ,"ngt;" * "\x226F"
+    ,"ngtr;" * "\x226F"
+    ,"nGtv;" * "\x226B\x0338"
+    ,"nharr;" * "\x21AE"
+    ,"nhArr;" * "\x21CE"
+    ,"nhpar;" * "\x2AF2"
+    ,"ni;" * "\x220B"
+    ,"nis;" * "\x22FC"
+    ,"nisd;" * "\x22FA"
+    ,"niv;" * "\x220B"
+    ,"NJcy;" * "\x040A"
+    ,"njcy;" * "\x045A"
+    ,"nlarr;" * "\x219A"
+    ,"nlArr;" * "\x21CD"
+    ,"nldr;" * "\x2025"
+    ,"nlE;" * "\x2266\x0338"
+    ,"nle;" * "\x2270"
+    ,"nleftarrow;" * "\x219A"
+    ,"nLeftarrow;" * "\x21CD"
+    ,"nleftrightarrow;" * "\x21AE"
+    ,"nLeftrightarrow;" * "\x21CE"
+    ,"nleq;" * "\x2270"
+    ,"nleqq;" * "\x2266\x0338"
+    ,"nleqslant;" * "\x2A7D\x0338"
+    ,"nles;" * "\x2A7D\x0338"
+    ,"nless;" * "\x226E"
+    ,"nLl;" * "\x22D8\x0338"
+    ,"nlsim;" * "\x2274"
+    ,"nLt;" * "\x226A\x20D2"
+    ,"nlt;" * "\x226E"
+    ,"nltri;" * "\x22EA"
+    ,"nltrie;" * "\x22EC"
+    ,"nLtv;" * "\x226A\x0338"
+    ,"nmid;" * "\x2224"
+    ,"NoBreak;" * "\x2060"
+    ,"NonBreakingSpace;" * "\x00A0"
+    ,"Nopf;" * "\x2115"
+    ,"nopf;" * "\x1D55F"
+    ,"not" * "\x00AC"
+    ,"not;" * "\x00AC"
+    ,"Not;" * "\x2AEC"
+    ,"NotCongruent;" * "\x2262"
+    ,"NotCupCap;" * "\x226D"
+    ,"NotDoubleVerticalBar;" * "\x2226"
+    ,"NotElement;" * "\x2209"
+    ,"NotEqual;" * "\x2260"
+    ,"NotEqualTilde;" * "\x2242\x0338"
+    ,"NotExists;" * "\x2204"
+    ,"NotGreater;" * "\x226F"
+    ,"NotGreaterEqual;" * "\x2271"
+    ,"NotGreaterFullEqual;" * "\x2267\x0338"
+    ,"NotGreaterGreater;" * "\x226B\x0338"
+    ,"NotGreaterLess;" * "\x2279"
+    ,"NotGreaterSlantEqual;" * "\x2A7E\x0338"
+    ,"NotGreaterTilde;" * "\x2275"
+    ,"NotHumpDownHump;" * "\x224E\x0338"
+    ,"NotHumpEqual;" * "\x224F\x0338"
+    ,"notin;" * "\x2209"
+    ,"notindot;" * "\x22F5\x0338"
+    ,"notinE;" * "\x22F9\x0338"
+    ,"notinva;" * "\x2209"
+    ,"notinvb;" * "\x22F7"
+    ,"notinvc;" * "\x22F6"
+    ,"NotLeftTriangle;" * "\x22EA"
+    ,"NotLeftTriangleBar;" * "\x29CF\x0338"
+    ,"NotLeftTriangleEqual;" * "\x22EC"
+    ,"NotLess;" * "\x226E"
+    ,"NotLessEqual;" * "\x2270"
+    ,"NotLessGreater;" * "\x2278"
+    ,"NotLessLess;" * "\x226A\x0338"
+    ,"NotLessSlantEqual;" * "\x2A7D\x0338"
+    ,"NotLessTilde;" * "\x2274"
+    ,"NotNestedGreaterGreater;" * "\x2AA2\x0338"
+    ,"NotNestedLessLess;" * "\x2AA1\x0338"
+    ,"notni;" * "\x220C"
+    ,"notniva;" * "\x220C"
+    ,"notnivb;" * "\x22FE"
+    ,"notnivc;" * "\x22FD"
+    ,"NotPrecedes;" * "\x2280"
+    ,"NotPrecedesEqual;" * "\x2AAF\x0338"
+    ,"NotPrecedesSlantEqual;" * "\x22E0"
+    ,"NotReverseElement;" * "\x220C"
+    ,"NotRightTriangle;" * "\x22EB"
+    ,"NotRightTriangleBar;" * "\x29D0\x0338"
+    ,"NotRightTriangleEqual;" * "\x22ED"
+    ,"NotSquareSubset;" * "\x228F\x0338"
+    ,"NotSquareSubsetEqual;" * "\x22E2"
+    ,"NotSquareSuperset;" * "\x2290\x0338"
+    ,"NotSquareSupersetEqual;" * "\x22E3"
+    ,"NotSubset;" * "\x2282\x20D2"
+    ,"NotSubsetEqual;" * "\x2288"
+    ,"NotSucceeds;" * "\x2281"
+    ,"NotSucceedsEqual;" * "\x2AB0\x0338"
+    ,"NotSucceedsSlantEqual;" * "\x22E1"
+    ,"NotSucceedsTilde;" * "\x227F\x0338"
+    ,"NotSuperset;" * "\x2283\x20D2"
+    ,"NotSupersetEqual;" * "\x2289"
+    ,"NotTilde;" * "\x2241"
+    ,"NotTildeEqual;" * "\x2244"
+    ,"NotTildeFullEqual;" * "\x2247"
+    ,"NotTildeTilde;" * "\x2249"
+    ,"NotVerticalBar;" * "\x2224"
+    ,"npar;" * "\x2226"
+    ,"nparallel;" * "\x2226"
+    ,"nparsl;" * "\x2AFD\x20E5"
+    ,"npart;" * "\x2202\x0338"
+    ,"npolint;" * "\x2A14"
+    ,"npr;" * "\x2280"
+    ,"nprcue;" * "\x22E0"
+    ,"npre;" * "\x2AAF\x0338"
+    ,"nprec;" * "\x2280"
+    ,"npreceq;" * "\x2AAF\x0338"
+    ,"nrarr;" * "\x219B"
+    ,"nrArr;" * "\x21CF"
+    ,"nrarrc;" * "\x2933\x0338"
+    ,"nrarrw;" * "\x219D\x0338"
+    ,"nrightarrow;" * "\x219B"
+    ,"nRightarrow;" * "\x21CF"
+    ,"nrtri;" * "\x22EB"
+    ,"nrtrie;" * "\x22ED"
+    ,"nsc;" * "\x2281"
+    ,"nsccue;" * "\x22E1"
+    ,"nsce;" * "\x2AB0\x0338"
+    ,"Nscr;" * "\x1D4A9"
+    ,"nscr;" * "\x1D4C3"
+    ,"nshortmid;" * "\x2224"
+    ,"nshortparallel;" * "\x2226"
+    ,"nsim;" * "\x2241"
+    ,"nsime;" * "\x2244"
+    ,"nsimeq;" * "\x2244"
+    ,"nsmid;" * "\x2224"
+    ,"nspar;" * "\x2226"
+    ,"nsqsube;" * "\x22E2"
+    ,"nsqsupe;" * "\x22E3"
+    ,"nsub;" * "\x2284"
+    ,"nsube;" * "\x2288"
+    ,"nsubE;" * "\x2AC5\x0338"
+    ,"nsubset;" * "\x2282\x20D2"
+    ,"nsubseteq;" * "\x2288"
+    ,"nsubseteqq;" * "\x2AC5\x0338"
+    ,"nsucc;" * "\x2281"
+    ,"nsucceq;" * "\x2AB0\x0338"
+    ,"nsup;" * "\x2285"
+    ,"nsupe;" * "\x2289"
+    ,"nsupE;" * "\x2AC6\x0338"
+    ,"nsupset;" * "\x2283\x20D2"
+    ,"nsupseteq;" * "\x2289"
+    ,"nsupseteqq;" * "\x2AC6\x0338"
+    ,"ntgl;" * "\x2279"
+    ,"Ntilde" * "\x00D1"
+    ,"ntilde" * "\x00F1"
+    ,"Ntilde;" * "\x00D1"
+    ,"ntilde;" * "\x00F1"
+    ,"ntlg;" * "\x2278"
+    ,"ntriangleleft;" * "\x22EA"
+    ,"ntrianglelefteq;" * "\x22EC"
+    ,"ntriangleright;" * "\x22EB"
+    ,"ntrianglerighteq;" * "\x22ED"
+    ,"Nu;" * "\x039D"
+    ,"nu;" * "\x03BD"
+    ,"num;" * "\x0023"
+    ,"numero;" * "\x2116"
+    ,"numsp;" * "\x2007"
+    ,"nvap;" * "\x224D\x20D2"
+    ,"nvdash;" * "\x22AC"
+    ,"nvDash;" * "\x22AD"
+    ,"nVdash;" * "\x22AE"
+    ,"nVDash;" * "\x22AF"
+    ,"nvge;" * "\x2265\x20D2"
+    ,"nvgt;" * "\x003E\x20D2"
+    ,"nvHarr;" * "\x2904"
+    ,"nvinfin;" * "\x29DE"
+    ,"nvlArr;" * "\x2902"
+    ,"nvle;" * "\x2264\x20D2"
+    ,"nvlt;" * "\x003C\x20D2"
+    ,"nvltrie;" * "\x22B4\x20D2"
+    ,"nvrArr;" * "\x2903"
+    ,"nvrtrie;" * "\x22B5\x20D2"
+    ,"nvsim;" * "\x223C\x20D2"
+    ,"nwarhk;" * "\x2923"
+    ,"nwarr;" * "\x2196"
+    ,"nwArr;" * "\x21D6"
+    ,"nwarrow;" * "\x2196"
+    ,"nwnear;" * "\x2927"
+    ,"Oacute" * "\x00D3"
+    ,"oacute" * "\x00F3"
+    ,"Oacute;" * "\x00D3"
+    ,"oacute;" * "\x00F3"
+    ,"oast;" * "\x229B"
+    ,"ocir;" * "\x229A"
+    ,"Ocirc" * "\x00D4"
+    ,"ocirc" * "\x00F4"
+    ,"Ocirc;" * "\x00D4"
+    ,"ocirc;" * "\x00F4"
+    ,"Ocy;" * "\x041E"
+    ,"ocy;" * "\x043E"
+    ,"odash;" * "\x229D"
+    ,"Odblac;" * "\x0150"
+    ,"odblac;" * "\x0151"
+    ,"odiv;" * "\x2A38"
+    ,"odot;" * "\x2299"
+    ,"odsold;" * "\x29BC"
+    ,"OElig;" * "\x0152"
+    ,"oelig;" * "\x0153"
+    ,"ofcir;" * "\x29BF"
+    ,"Ofr;" * "\x1D512"
+    ,"ofr;" * "\x1D52C"
+    ,"ogon;" * "\x02DB"
+    ,"Ograve" * "\x00D2"
+    ,"ograve" * "\x00F2"
+    ,"Ograve;" * "\x00D2"
+    ,"ograve;" * "\x00F2"
+    ,"ogt;" * "\x29C1"
+    ,"ohbar;" * "\x29B5"
+    ,"ohm;" * "\x03A9"
+    ,"oint;" * "\x222E"
+    ,"olarr;" * "\x21BA"
+    ,"olcir;" * "\x29BE"
+    ,"olcross;" * "\x29BB"
+    ,"oline;" * "\x203E"
+    ,"olt;" * "\x29C0"
+    ,"Omacr;" * "\x014C"
+    ,"omacr;" * "\x014D"
+    ,"Omega;" * "\x03A9"
+    ,"omega;" * "\x03C9"
+    ,"Omicron;" * "\x039F"
+    ,"omicron;" * "\x03BF"
+    ,"omid;" * "\x29B6"
+    ,"ominus;" * "\x2296"
+    ,"Oopf;" * "\x1D546"
+    ,"oopf;" * "\x1D560"
+    ,"opar;" * "\x29B7"
+    ,"OpenCurlyDoubleQuote;" * "\x201C"
+    ,"OpenCurlyQuote;" * "\x2018"
+    ,"operp;" * "\x29B9"
+    ,"oplus;" * "\x2295"
+    ,"or;" * "\x2228"
+    ,"Or;" * "\x2A54"
+    ,"orarr;" * "\x21BB"
+    ,"ord;" * "\x2A5D"
+    ,"order;" * "\x2134"
+    ,"orderof;" * "\x2134"
+    ,"ordf" * "\x00AA"
+    ,"ordf;" * "\x00AA"
+    ,"ordm" * "\x00BA"
+    ,"ordm;" * "\x00BA"
+    ,"origof;" * "\x22B6"
+    ,"oror;" * "\x2A56"
+    ,"orslope;" * "\x2A57"
+    ,"orv;" * "\x2A5B"
+    ,"oS;" * "\x24C8"
+    ,"oscr;" * "\x2134"
+    ,"Oscr;" * "\x1D4AA"
+    ,"Oslash" * "\x00D8"
+    ,"oslash" * "\x00F8"
+    ,"Oslash;" * "\x00D8"
+    ,"oslash;" * "\x00F8"
+    ,"osol;" * "\x2298"
+    ,"Otilde" * "\x00D5"
+    ,"otilde" * "\x00F5"
+    ,"Otilde;" * "\x00D5"
+    ,"otilde;" * "\x00F5"
+    ,"otimes;" * "\x2297"
+    ,"Otimes;" * "\x2A37"
+    ,"otimesas;" * "\x2A36"
+    ,"Ouml" * "\x00D6"
+    ,"ouml" * "\x00F6"
+    ,"Ouml;" * "\x00D6"
+    ,"ouml;" * "\x00F6"
+    ,"ovbar;" * "\x233D"
+    ,"OverBar;" * "\x203E"
+    ,"OverBrace;" * "\x23DE"
+    ,"OverBracket;" * "\x23B4"
+    ,"OverParenthesis;" * "\x23DC"
+    ,"par;" * "\x2225"
+    ,"para" * "\x00B6"
+    ,"para;" * "\x00B6"
+    ,"parallel;" * "\x2225"
+    ,"parsim;" * "\x2AF3"
+    ,"parsl;" * "\x2AFD"
+    ,"part;" * "\x2202"
+    ,"PartialD;" * "\x2202"
+    ,"Pcy;" * "\x041F"
+    ,"pcy;" * "\x043F"
+    ,"percnt;" * "\x0025"
+    ,"period;" * "\x002E"
+    ,"permil;" * "\x2030"
+    ,"perp;" * "\x22A5"
+    ,"pertenk;" * "\x2031"
+    ,"Pfr;" * "\x1D513"
+    ,"pfr;" * "\x1D52D"
+    ,"Phi;" * "\x03A6"
+    ,"phi;" * "\x03C6"
+    ,"phiv;" * "\x03D5"
+    ,"phmmat;" * "\x2133"
+    ,"phone;" * "\x260E"
+    ,"Pi;" * "\x03A0"
+    ,"pi;" * "\x03C0"
+    ,"pitchfork;" * "\x22D4"
+    ,"piv;" * "\x03D6"
+    ,"planck;" * "\x210F"
+    ,"planckh;" * "\x210E"
+    ,"plankv;" * "\x210F"
+    ,"plus;" * "\x002B"
+    ,"plusacir;" * "\x2A23"
+    ,"plusb;" * "\x229E"
+    ,"pluscir;" * "\x2A22"
+    ,"plusdo;" * "\x2214"
+    ,"plusdu;" * "\x2A25"
+    ,"pluse;" * "\x2A72"
+    ,"PlusMinus;" * "\x00B1"
+    ,"plusmn" * "\x00B1"
+    ,"plusmn;" * "\x00B1"
+    ,"plussim;" * "\x2A26"
+    ,"plustwo;" * "\x2A27"
+    ,"pm;" * "\x00B1"
+    ,"Poincareplane;" * "\x210C"
+    ,"pointint;" * "\x2A15"
+    ,"Popf;" * "\x2119"
+    ,"popf;" * "\x1D561"
+    ,"pound" * "\x00A3"
+    ,"pound;" * "\x00A3"
+    ,"pr;" * "\x227A"
+    ,"Pr;" * "\x2ABB"
+    ,"prap;" * "\x2AB7"
+    ,"prcue;" * "\x227C"
+    ,"pre;" * "\x2AAF"
+    ,"prE;" * "\x2AB3"
+    ,"prec;" * "\x227A"
+    ,"precapprox;" * "\x2AB7"
+    ,"preccurlyeq;" * "\x227C"
+    ,"Precedes;" * "\x227A"
+    ,"PrecedesEqual;" * "\x2AAF"
+    ,"PrecedesSlantEqual;" * "\x227C"
+    ,"PrecedesTilde;" * "\x227E"
+    ,"preceq;" * "\x2AAF"
+    ,"precnapprox;" * "\x2AB9"
+    ,"precneqq;" * "\x2AB5"
+    ,"precnsim;" * "\x22E8"
+    ,"precsim;" * "\x227E"
+    ,"prime;" * "\x2032"
+    ,"Prime;" * "\x2033"
+    ,"primes;" * "\x2119"
+    ,"prnap;" * "\x2AB9"
+    ,"prnE;" * "\x2AB5"
+    ,"prnsim;" * "\x22E8"
+    ,"prod;" * "\x220F"
+    ,"Product;" * "\x220F"
+    ,"profalar;" * "\x232E"
+    ,"profline;" * "\x2312"
+    ,"profsurf;" * "\x2313"
+    ,"prop;" * "\x221D"
+    ,"Proportion;" * "\x2237"
+    ,"Proportional;" * "\x221D"
+    ,"propto;" * "\x221D"
+    ,"prsim;" * "\x227E"
+    ,"prurel;" * "\x22B0"
+    ,"Pscr;" * "\x1D4AB"
+    ,"pscr;" * "\x1D4C5"
+    ,"Psi;" * "\x03A8"
+    ,"psi;" * "\x03C8"
+    ,"puncsp;" * "\x2008"
+    ,"Qfr;" * "\x1D514"
+    ,"qfr;" * "\x1D52E"
+    ,"qint;" * "\x2A0C"
+    ,"Qopf;" * "\x211A"
+    ,"qopf;" * "\x1D562"
+    ,"qprime;" * "\x2057"
+    ,"Qscr;" * "\x1D4AC"
+    ,"qscr;" * "\x1D4C6"
+    ,"quaternions;" * "\x210D"
+    ,"quatint;" * "\x2A16"
+    ,"quest;" * "\x003F"
+    ,"questeq;" * "\x225F"
+    ,"QUOT" * "\x0022"
+    ,"quot" * "\x0022"
+    ,"QUOT;" * "\x0022"
+    ,"quot;" * "\x0022"
+    ,"rAarr;" * "\x21DB"
+    ,"race;" * "\x223D\x0331"
+    ,"Racute;" * "\x0154"
+    ,"racute;" * "\x0155"
+    ,"radic;" * "\x221A"
+    ,"raemptyv;" * "\x29B3"
+    ,"rang;" * "\x27E9"
+    ,"Rang;" * "\x27EB"
+    ,"rangd;" * "\x2992"
+    ,"range;" * "\x29A5"
+    ,"rangle;" * "\x27E9"
+    ,"raquo" * "\x00BB"
+    ,"raquo;" * "\x00BB"
+    ,"rarr;" * "\x2192"
+    ,"Rarr;" * "\x21A0"
+    ,"rArr;" * "\x21D2"
+    ,"rarrap;" * "\x2975"
+    ,"rarrb;" * "\x21E5"
+    ,"rarrbfs;" * "\x2920"
+    ,"rarrc;" * "\x2933"
+    ,"rarrfs;" * "\x291E"
+    ,"rarrhk;" * "\x21AA"
+    ,"rarrlp;" * "\x21AC"
+    ,"rarrpl;" * "\x2945"
+    ,"rarrsim;" * "\x2974"
+    ,"rarrtl;" * "\x21A3"
+    ,"Rarrtl;" * "\x2916"
+    ,"rarrw;" * "\x219D"
+    ,"ratail;" * "\x291A"
+    ,"rAtail;" * "\x291C"
+    ,"ratio;" * "\x2236"
+    ,"rationals;" * "\x211A"
+    ,"rbarr;" * "\x290D"
+    ,"rBarr;" * "\x290F"
+    ,"RBarr;" * "\x2910"
+    ,"rbbrk;" * "\x2773"
+    ,"rbrace;" * "\x007D"
+    ,"rbrack;" * "\x005D"
+    ,"rbrke;" * "\x298C"
+    ,"rbrksld;" * "\x298E"
+    ,"rbrkslu;" * "\x2990"
+    ,"Rcaron;" * "\x0158"
+    ,"rcaron;" * "\x0159"
+    ,"Rcedil;" * "\x0156"
+    ,"rcedil;" * "\x0157"
+    ,"rceil;" * "\x2309"
+    ,"rcub;" * "\x007D"
+    ,"Rcy;" * "\x0420"
+    ,"rcy;" * "\x0440"
+    ,"rdca;" * "\x2937"
+    ,"rdldhar;" * "\x2969"
+    ,"rdquo;" * "\x201D"
+    ,"rdquor;" * "\x201D"
+    ,"rdsh;" * "\x21B3"
+    ,"Re;" * "\x211C"
+    ,"real;" * "\x211C"
+    ,"realine;" * "\x211B"
+    ,"realpart;" * "\x211C"
+    ,"reals;" * "\x211D"
+    ,"rect;" * "\x25AD"
+    ,"REG" * "\x00AE"
+    ,"reg" * "\x00AE"
+    ,"REG;" * "\x00AE"
+    ,"reg;" * "\x00AE"
+    ,"ReverseElement;" * "\x220B"
+    ,"ReverseEquilibrium;" * "\x21CB"
+    ,"ReverseUpEquilibrium;" * "\x296F"
+    ,"rfisht;" * "\x297D"
+    ,"rfloor;" * "\x230B"
+    ,"Rfr;" * "\x211C"
+    ,"rfr;" * "\x1D52F"
+    ,"rHar;" * "\x2964"
+    ,"rhard;" * "\x21C1"
+    ,"rharu;" * "\x21C0"
+    ,"rharul;" * "\x296C"
+    ,"Rho;" * "\x03A1"
+    ,"rho;" * "\x03C1"
+    ,"rhov;" * "\x03F1"
+    ,"RightAngleBracket;" * "\x27E9"
+    ,"RightArrow;" * "\x2192"
+    ,"rightarrow;" * "\x2192"
+    ,"Rightarrow;" * "\x21D2"
+    ,"RightArrowBar;" * "\x21E5"
+    ,"RightArrowLeftArrow;" * "\x21C4"
+    ,"rightarrowtail;" * "\x21A3"
+    ,"RightCeiling;" * "\x2309"
+    ,"RightDoubleBracket;" * "\x27E7"
+    ,"RightDownTeeVector;" * "\x295D"
+    ,"RightDownVector;" * "\x21C2"
+    ,"RightDownVectorBar;" * "\x2955"
+    ,"RightFloor;" * "\x230B"
+    ,"rightharpoondown;" * "\x21C1"
+    ,"rightharpoonup;" * "\x21C0"
+    ,"rightleftarrows;" * "\x21C4"
+    ,"rightleftharpoons;" * "\x21CC"
+    ,"rightrightarrows;" * "\x21C9"
+    ,"rightsquigarrow;" * "\x219D"
+    ,"RightTee;" * "\x22A2"
+    ,"RightTeeArrow;" * "\x21A6"
+    ,"RightTeeVector;" * "\x295B"
+    ,"rightthreetimes;" * "\x22CC"
+    ,"RightTriangle;" * "\x22B3"
+    ,"RightTriangleBar;" * "\x29D0"
+    ,"RightTriangleEqual;" * "\x22B5"
+    ,"RightUpDownVector;" * "\x294F"
+    ,"RightUpTeeVector;" * "\x295C"
+    ,"RightUpVector;" * "\x21BE"
+    ,"RightUpVectorBar;" * "\x2954"
+    ,"RightVector;" * "\x21C0"
+    ,"RightVectorBar;" * "\x2953"
+    ,"ring;" * "\x02DA"
+    ,"risingdotseq;" * "\x2253"
+    ,"rlarr;" * "\x21C4"
+    ,"rlhar;" * "\x21CC"
+    ,"rlm;" * "\x200F"
+    ,"rmoust;" * "\x23B1"
+    ,"rmoustache;" * "\x23B1"
+    ,"rnmid;" * "\x2AEE"
+    ,"roang;" * "\x27ED"
+    ,"roarr;" * "\x21FE"
+    ,"robrk;" * "\x27E7"
+    ,"ropar;" * "\x2986"
+    ,"Ropf;" * "\x211D"
+    ,"ropf;" * "\x1D563"
+    ,"roplus;" * "\x2A2E"
+    ,"rotimes;" * "\x2A35"
+    ,"RoundImplies;" * "\x2970"
+    ,"rpar;" * "\x0029"
+    ,"rpargt;" * "\x2994"
+    ,"rppolint;" * "\x2A12"
+    ,"rrarr;" * "\x21C9"
+    ,"Rrightarrow;" * "\x21DB"
+    ,"rsaquo;" * "\x203A"
+    ,"Rscr;" * "\x211B"
+    ,"rscr;" * "\x1D4C7"
+    ,"Rsh;" * "\x21B1"
+    ,"rsh;" * "\x21B1"
+    ,"rsqb;" * "\x005D"
+    ,"rsquo;" * "\x2019"
+    ,"rsquor;" * "\x2019"
+    ,"rthree;" * "\x22CC"
+    ,"rtimes;" * "\x22CA"
+    ,"rtri;" * "\x25B9"
+    ,"rtrie;" * "\x22B5"
+    ,"rtrif;" * "\x25B8"
+    ,"rtriltri;" * "\x29CE"
+    ,"RuleDelayed;" * "\x29F4"
+    ,"ruluhar;" * "\x2968"
+    ,"rx;" * "\x211E"
+    ,"Sacute;" * "\x015A"
+    ,"sacute;" * "\x015B"
+    ,"sbquo;" * "\x201A"
+    ,"sc;" * "\x227B"
+    ,"Sc;" * "\x2ABC"
+    ,"scap;" * "\x2AB8"
+    ,"Scaron;" * "\x0160"
+    ,"scaron;" * "\x0161"
+    ,"sccue;" * "\x227D"
+    ,"sce;" * "\x2AB0"
+    ,"scE;" * "\x2AB4"
+    ,"Scedil;" * "\x015E"
+    ,"scedil;" * "\x015F"
+    ,"Scirc;" * "\x015C"
+    ,"scirc;" * "\x015D"
+    ,"scnap;" * "\x2ABA"
+    ,"scnE;" * "\x2AB6"
+    ,"scnsim;" * "\x22E9"
+    ,"scpolint;" * "\x2A13"
+    ,"scsim;" * "\x227F"
+    ,"Scy;" * "\x0421"
+    ,"scy;" * "\x0441"
+    ,"sdot;" * "\x22C5"
+    ,"sdotb;" * "\x22A1"
+    ,"sdote;" * "\x2A66"
+    ,"searhk;" * "\x2925"
+    ,"searr;" * "\x2198"
+    ,"seArr;" * "\x21D8"
+    ,"searrow;" * "\x2198"
+    ,"sect" * "\x00A7"
+    ,"sect;" * "\x00A7"
+    ,"semi;" * "\x003B"
+    ,"seswar;" * "\x2929"
+    ,"setminus;" * "\x2216"
+    ,"setmn;" * "\x2216"
+    ,"sext;" * "\x2736"
+    ,"Sfr;" * "\x1D516"
+    ,"sfr;" * "\x1D530"
+    ,"sfrown;" * "\x2322"
+    ,"sharp;" * "\x266F"
+    ,"SHCHcy;" * "\x0429"
+    ,"shchcy;" * "\x0449"
+    ,"SHcy;" * "\x0428"
+    ,"shcy;" * "\x0448"
+    ,"ShortDownArrow;" * "\x2193"
+    ,"ShortLeftArrow;" * "\x2190"
+    ,"shortmid;" * "\x2223"
+    ,"shortparallel;" * "\x2225"
+    ,"ShortRightArrow;" * "\x2192"
+    ,"ShortUpArrow;" * "\x2191"
+    ,"shy" * "\x00AD"
+    ,"shy;" * "\x00AD"
+    ,"Sigma;" * "\x03A3"
+    ,"sigma;" * "\x03C3"
+    ,"sigmaf;" * "\x03C2"
+    ,"sigmav;" * "\x03C2"
+    ,"sim;" * "\x223C"
+    ,"simdot;" * "\x2A6A"
+    ,"sime;" * "\x2243"
+    ,"simeq;" * "\x2243"
+    ,"simg;" * "\x2A9E"
+    ,"simgE;" * "\x2AA0"
+    ,"siml;" * "\x2A9D"
+    ,"simlE;" * "\x2A9F"
+    ,"simne;" * "\x2246"
+    ,"simplus;" * "\x2A24"
+    ,"simrarr;" * "\x2972"
+    ,"slarr;" * "\x2190"
+    ,"SmallCircle;" * "\x2218"
+    ,"smallsetminus;" * "\x2216"
+    ,"smashp;" * "\x2A33"
+    ,"smeparsl;" * "\x29E4"
+    ,"smid;" * "\x2223"
+    ,"smile;" * "\x2323"
+    ,"smt;" * "\x2AAA"
+    ,"smte;" * "\x2AAC"
+    ,"smtes;" * "\x2AAC\xFE00"
+    ,"SOFTcy;" * "\x042C"
+    ,"softcy;" * "\x044C"
+    ,"sol;" * "\x002F"
+    ,"solb;" * "\x29C4"
+    ,"solbar;" * "\x233F"
+    ,"Sopf;" * "\x1D54A"
+    ,"sopf;" * "\x1D564"
+    ,"spades;" * "\x2660"
+    ,"spadesuit;" * "\x2660"
+    ,"spar;" * "\x2225"
+    ,"sqcap;" * "\x2293"
+    ,"sqcaps;" * "\x2293\xFE00"
+    ,"sqcup;" * "\x2294"
+    ,"sqcups;" * "\x2294\xFE00"
+    ,"Sqrt;" * "\x221A"
+    ,"sqsub;" * "\x228F"
+    ,"sqsube;" * "\x2291"
+    ,"sqsubset;" * "\x228F"
+    ,"sqsubseteq;" * "\x2291"
+    ,"sqsup;" * "\x2290"
+    ,"sqsupe;" * "\x2292"
+    ,"sqsupset;" * "\x2290"
+    ,"sqsupseteq;" * "\x2292"
+    ,"squ;" * "\x25A1"
+    ,"Square;" * "\x25A1"
+    ,"square;" * "\x25A1"
+    ,"SquareIntersection;" * "\x2293"
+    ,"SquareSubset;" * "\x228F"
+    ,"SquareSubsetEqual;" * "\x2291"
+    ,"SquareSuperset;" * "\x2290"
+    ,"SquareSupersetEqual;" * "\x2292"
+    ,"SquareUnion;" * "\x2294"
+    ,"squarf;" * "\x25AA"
+    ,"squf;" * "\x25AA"
+    ,"srarr;" * "\x2192"
+    ,"Sscr;" * "\x1D4AE"
+    ,"sscr;" * "\x1D4C8"
+    ,"ssetmn;" * "\x2216"
+    ,"ssmile;" * "\x2323"
+    ,"sstarf;" * "\x22C6"
+    ,"Star;" * "\x22C6"
+    ,"star;" * "\x2606"
+    ,"starf;" * "\x2605"
+    ,"straightepsilon;" * "\x03F5"
+    ,"straightphi;" * "\x03D5"
+    ,"strns;" * "\x00AF"
+    ,"sub;" * "\x2282"
+    ,"Sub;" * "\x22D0"
+    ,"subdot;" * "\x2ABD"
+    ,"sube;" * "\x2286"
+    ,"subE;" * "\x2AC5"
+    ,"subedot;" * "\x2AC3"
+    ,"submult;" * "\x2AC1"
+    ,"subne;" * "\x228A"
+    ,"subnE;" * "\x2ACB"
+    ,"subplus;" * "\x2ABF"
+    ,"subrarr;" * "\x2979"
+    ,"subset;" * "\x2282"
+    ,"Subset;" * "\x22D0"
+    ,"subseteq;" * "\x2286"
+    ,"subseteqq;" * "\x2AC5"
+    ,"SubsetEqual;" * "\x2286"
+    ,"subsetneq;" * "\x228A"
+    ,"subsetneqq;" * "\x2ACB"
+    ,"subsim;" * "\x2AC7"
+    ,"subsub;" * "\x2AD5"
+    ,"subsup;" * "\x2AD3"
+    ,"succ;" * "\x227B"
+    ,"succapprox;" * "\x2AB8"
+    ,"succcurlyeq;" * "\x227D"
+    ,"Succeeds;" * "\x227B"
+    ,"SucceedsEqual;" * "\x2AB0"
+    ,"SucceedsSlantEqual;" * "\x227D"
+    ,"SucceedsTilde;" * "\x227F"
+    ,"succeq;" * "\x2AB0"
+    ,"succnapprox;" * "\x2ABA"
+    ,"succneqq;" * "\x2AB6"
+    ,"succnsim;" * "\x22E9"
+    ,"succsim;" * "\x227F"
+    ,"SuchThat;" * "\x220B"
+    ,"Sum;" * "\x2211"
+    ,"sum;" * "\x2211"
+    ,"sung;" * "\x266A"
+    ,"sup1" * "\x00B9"
+    ,"sup1;" * "\x00B9"
+    ,"sup2" * "\x00B2"
+    ,"sup2;" * "\x00B2"
+    ,"sup3" * "\x00B3"
+    ,"sup3;" * "\x00B3"
+    ,"sup;" * "\x2283"
+    ,"Sup;" * "\x22D1"
+    ,"supdot;" * "\x2ABE"
+    ,"supdsub;" * "\x2AD8"
+    ,"supe;" * "\x2287"
+    ,"supE;" * "\x2AC6"
+    ,"supedot;" * "\x2AC4"
+    ,"Superset;" * "\x2283"
+    ,"SupersetEqual;" * "\x2287"
+    ,"suphsol;" * "\x27C9"
+    ,"suphsub;" * "\x2AD7"
+    ,"suplarr;" * "\x297B"
+    ,"supmult;" * "\x2AC2"
+    ,"supne;" * "\x228B"
+    ,"supnE;" * "\x2ACC"
+    ,"supplus;" * "\x2AC0"
+    ,"supset;" * "\x2283"
+    ,"Supset;" * "\x22D1"
+    ,"supseteq;" * "\x2287"
+    ,"supseteqq;" * "\x2AC6"
+    ,"supsetneq;" * "\x228B"
+    ,"supsetneqq;" * "\x2ACC"
+    ,"supsim;" * "\x2AC8"
+    ,"supsub;" * "\x2AD4"
+    ,"supsup;" * "\x2AD6"
+    ,"swarhk;" * "\x2926"
+    ,"swarr;" * "\x2199"
+    ,"swArr;" * "\x21D9"
+    ,"swarrow;" * "\x2199"
+    ,"swnwar;" * "\x292A"
+    ,"szlig" * "\x00DF"
+    ,"szlig;" * "\x00DF"
+    ,"Tab;" * "\x0009"
+    ,"target;" * "\x2316"
+    ,"Tau;" * "\x03A4"
+    ,"tau;" * "\x03C4"
+    ,"tbrk;" * "\x23B4"
+    ,"Tcaron;" * "\x0164"
+    ,"tcaron;" * "\x0165"
+    ,"Tcedil;" * "\x0162"
+    ,"tcedil;" * "\x0163"
+    ,"Tcy;" * "\x0422"
+    ,"tcy;" * "\x0442"
+    ,"tdot;" * "\x20DB"
+    ,"telrec;" * "\x2315"
+    ,"Tfr;" * "\x1D517"
+    ,"tfr;" * "\x1D531"
+    ,"there4;" * "\x2234"
+    ,"Therefore;" * "\x2234"
+    ,"therefore;" * "\x2234"
+    ,"Theta;" * "\x0398"
+    ,"theta;" * "\x03B8"
+    ,"thetasym;" * "\x03D1"
+    ,"thetav;" * "\x03D1"
+    ,"thickapprox;" * "\x2248"
+    ,"thicksim;" * "\x223C"
+    ,"ThickSpace;" * "\x205F\x200A"
+    ,"thinsp;" * "\x2009"
+    ,"ThinSpace;" * "\x2009"
+    ,"thkap;" * "\x2248"
+    ,"thksim;" * "\x223C"
+    ,"THORN" * "\x00DE"
+    ,"thorn" * "\x00FE"
+    ,"THORN;" * "\x00DE"
+    ,"thorn;" * "\x00FE"
+    ,"tilde;" * "\x02DC"
+    ,"Tilde;" * "\x223C"
+    ,"TildeEqual;" * "\x2243"
+    ,"TildeFullEqual;" * "\x2245"
+    ,"TildeTilde;" * "\x2248"
+    ,"times" * "\x00D7"
+    ,"times;" * "\x00D7"
+    ,"timesb;" * "\x22A0"
+    ,"timesbar;" * "\x2A31"
+    ,"timesd;" * "\x2A30"
+    ,"tint;" * "\x222D"
+    ,"toea;" * "\x2928"
+    ,"top;" * "\x22A4"
+    ,"topbot;" * "\x2336"
+    ,"topcir;" * "\x2AF1"
+    ,"Topf;" * "\x1D54B"
+    ,"topf;" * "\x1D565"
+    ,"topfork;" * "\x2ADA"
+    ,"tosa;" * "\x2929"
+    ,"tprime;" * "\x2034"
+    ,"TRADE;" * "\x2122"
+    ,"trade;" * "\x2122"
+    ,"triangle;" * "\x25B5"
+    ,"triangledown;" * "\x25BF"
+    ,"triangleleft;" * "\x25C3"
+    ,"trianglelefteq;" * "\x22B4"
+    ,"triangleq;" * "\x225C"
+    ,"triangleright;" * "\x25B9"
+    ,"trianglerighteq;" * "\x22B5"
+    ,"tridot;" * "\x25EC"
+    ,"trie;" * "\x225C"
+    ,"triminus;" * "\x2A3A"
+    ,"TripleDot;" * "\x20DB"
+    ,"triplus;" * "\x2A39"
+    ,"trisb;" * "\x29CD"
+    ,"tritime;" * "\x2A3B"
+    ,"trpezium;" * "\x23E2"
+    ,"Tscr;" * "\x1D4AF"
+    ,"tscr;" * "\x1D4C9"
+    ,"TScy;" * "\x0426"
+    ,"tscy;" * "\x0446"
+    ,"TSHcy;" * "\x040B"
+    ,"tshcy;" * "\x045B"
+    ,"Tstrok;" * "\x0166"
+    ,"tstrok;" * "\x0167"
+    ,"twixt;" * "\x226C"
+    ,"twoheadleftarrow;" * "\x219E"
+    ,"twoheadrightarrow;" * "\x21A0"
+    ,"Uacute" * "\x00DA"
+    ,"uacute" * "\x00FA"
+    ,"Uacute;" * "\x00DA"
+    ,"uacute;" * "\x00FA"
+    ,"uarr;" * "\x2191"
+    ,"Uarr;" * "\x219F"
+    ,"uArr;" * "\x21D1"
+    ,"Uarrocir;" * "\x2949"
+    ,"Ubrcy;" * "\x040E"
+    ,"ubrcy;" * "\x045E"
+    ,"Ubreve;" * "\x016C"
+    ,"ubreve;" * "\x016D"
+    ,"Ucirc" * "\x00DB"
+    ,"ucirc" * "\x00FB"
+    ,"Ucirc;" * "\x00DB"
+    ,"ucirc;" * "\x00FB"
+    ,"Ucy;" * "\x0423"
+    ,"ucy;" * "\x0443"
+    ,"udarr;" * "\x21C5"
+    ,"Udblac;" * "\x0170"
+    ,"udblac;" * "\x0171"
+    ,"udhar;" * "\x296E"
+    ,"ufisht;" * "\x297E"
+    ,"Ufr;" * "\x1D518"
+    ,"ufr;" * "\x1D532"
+    ,"Ugrave" * "\x00D9"
+    ,"ugrave" * "\x00F9"
+    ,"Ugrave;" * "\x00D9"
+    ,"ugrave;" * "\x00F9"
+    ,"uHar;" * "\x2963"
+    ,"uharl;" * "\x21BF"
+    ,"uharr;" * "\x21BE"
+    ,"uhblk;" * "\x2580"
+    ,"ulcorn;" * "\x231C"
+    ,"ulcorner;" * "\x231C"
+    ,"ulcrop;" * "\x230F"
+    ,"ultri;" * "\x25F8"
+    ,"Umacr;" * "\x016A"
+    ,"umacr;" * "\x016B"
+    ,"uml" * "\x00A8"
+    ,"uml;" * "\x00A8"
+    ,"UnderBar;" * "\x005F"
+    ,"UnderBrace;" * "\x23DF"
+    ,"UnderBracket;" * "\x23B5"
+    ,"UnderParenthesis;" * "\x23DD"
+    ,"Union;" * "\x22C3"
+    ,"UnionPlus;" * "\x228E"
+    ,"Uogon;" * "\x0172"
+    ,"uogon;" * "\x0173"
+    ,"Uopf;" * "\x1D54C"
+    ,"uopf;" * "\x1D566"
+    ,"UpArrow;" * "\x2191"
+    ,"uparrow;" * "\x2191"
+    ,"Uparrow;" * "\x21D1"
+    ,"UpArrowBar;" * "\x2912"
+    ,"UpArrowDownArrow;" * "\x21C5"
+    ,"UpDownArrow;" * "\x2195"
+    ,"updownarrow;" * "\x2195"
+    ,"Updownarrow;" * "\x21D5"
+    ,"UpEquilibrium;" * "\x296E"
+    ,"upharpoonleft;" * "\x21BF"
+    ,"upharpoonright;" * "\x21BE"
+    ,"uplus;" * "\x228E"
+    ,"UpperLeftArrow;" * "\x2196"
+    ,"UpperRightArrow;" * "\x2197"
+    ,"upsi;" * "\x03C5"
+    ,"Upsi;" * "\x03D2"
+    ,"upsih;" * "\x03D2"
+    ,"Upsilon;" * "\x03A5"
+    ,"upsilon;" * "\x03C5"
+    ,"UpTee;" * "\x22A5"
+    ,"UpTeeArrow;" * "\x21A5"
+    ,"upuparrows;" * "\x21C8"
+    ,"urcorn;" * "\x231D"
+    ,"urcorner;" * "\x231D"
+    ,"urcrop;" * "\x230E"
+    ,"Uring;" * "\x016E"
+    ,"uring;" * "\x016F"
+    ,"urtri;" * "\x25F9"
+    ,"Uscr;" * "\x1D4B0"
+    ,"uscr;" * "\x1D4CA"
+    ,"utdot;" * "\x22F0"
+    ,"Utilde;" * "\x0168"
+    ,"utilde;" * "\x0169"
+    ,"utri;" * "\x25B5"
+    ,"utrif;" * "\x25B4"
+    ,"uuarr;" * "\x21C8"
+    ,"Uuml" * "\x00DC"
+    ,"uuml" * "\x00FC"
+    ,"Uuml;" * "\x00DC"
+    ,"uuml;" * "\x00FC"
+    ,"uwangle;" * "\x29A7"
+    ,"vangrt;" * "\x299C"
+    ,"varepsilon;" * "\x03F5"
+    ,"varkappa;" * "\x03F0"
+    ,"varnothing;" * "\x2205"
+    ,"varphi;" * "\x03D5"
+    ,"varpi;" * "\x03D6"
+    ,"varpropto;" * "\x221D"
+    ,"varr;" * "\x2195"
+    ,"vArr;" * "\x21D5"
+    ,"varrho;" * "\x03F1"
+    ,"varsigma;" * "\x03C2"
+    ,"varsubsetneq;" * "\x228A\xFE00"
+    ,"varsubsetneqq;" * "\x2ACB\xFE00"
+    ,"varsupsetneq;" * "\x228B\xFE00"
+    ,"varsupsetneqq;" * "\x2ACC\xFE00"
+    ,"vartheta;" * "\x03D1"
+    ,"vartriangleleft;" * "\x22B2"
+    ,"vartriangleright;" * "\x22B3"
+    ,"vBar;" * "\x2AE8"
+    ,"Vbar;" * "\x2AEB"
+    ,"vBarv;" * "\x2AE9"
+    ,"Vcy;" * "\x0412"
+    ,"vcy;" * "\x0432"
+    ,"vdash;" * "\x22A2"
+    ,"vDash;" * "\x22A8"
+    ,"Vdash;" * "\x22A9"
+    ,"VDash;" * "\x22AB"
+    ,"Vdashl;" * "\x2AE6"
+    ,"vee;" * "\x2228"
+    ,"Vee;" * "\x22C1"
+    ,"veebar;" * "\x22BB"
+    ,"veeeq;" * "\x225A"
+    ,"vellip;" * "\x22EE"
+    ,"verbar;" * "\x007C"
+    ,"Verbar;" * "\x2016"
+    ,"vert;" * "\x007C"
+    ,"Vert;" * "\x2016"
+    ,"VerticalBar;" * "\x2223"
+    ,"VerticalLine;" * "\x007C"
+    ,"VerticalSeparator;" * "\x2758"
+    ,"VerticalTilde;" * "\x2240"
+    ,"VeryThinSpace;" * "\x200A"
+    ,"Vfr;" * "\x1D519"
+    ,"vfr;" * "\x1D533"
+    ,"vltri;" * "\x22B2"
+    ,"vnsub;" * "\x2282\x20D2"
+    ,"vnsup;" * "\x2283\x20D2"
+    ,"Vopf;" * "\x1D54D"
+    ,"vopf;" * "\x1D567"
+    ,"vprop;" * "\x221D"
+    ,"vrtri;" * "\x22B3"
+    ,"Vscr;" * "\x1D4B1"
+    ,"vscr;" * "\x1D4CB"
+    ,"vsubne;" * "\x228A\xFE00"
+    ,"vsubnE;" * "\x2ACB\xFE00"
+    ,"vsupne;" * "\x228B\xFE00"
+    ,"vsupnE;" * "\x2ACC\xFE00"
+    ,"Vvdash;" * "\x22AA"
+    ,"vzigzag;" * "\x299A"
+    ,"Wcirc;" * "\x0174"
+    ,"wcirc;" * "\x0175"
+    ,"wedbar;" * "\x2A5F"
+    ,"wedge;" * "\x2227"
+    ,"Wedge;" * "\x22C0"
+    ,"wedgeq;" * "\x2259"
+    ,"weierp;" * "\x2118"
+    ,"Wfr;" * "\x1D51A"
+    ,"wfr;" * "\x1D534"
+    ,"Wopf;" * "\x1D54E"
+    ,"wopf;" * "\x1D568"
+    ,"wp;" * "\x2118"
+    ,"wr;" * "\x2240"
+    ,"wreath;" * "\x2240"
+    ,"Wscr;" * "\x1D4B2"
+    ,"wscr;" * "\x1D4CC"
+    ,"xcap;" * "\x22C2"
+    ,"xcirc;" * "\x25EF"
+    ,"xcup;" * "\x22C3"
+    ,"xdtri;" * "\x25BD"
+    ,"Xfr;" * "\x1D51B"
+    ,"xfr;" * "\x1D535"
+    ,"xharr;" * "\x27F7"
+    ,"xhArr;" * "\x27FA"
+    ,"Xi;" * "\x039E"
+    ,"xi;" * "\x03BE"
+    ,"xlarr;" * "\x27F5"
+    ,"xlArr;" * "\x27F8"
+    ,"xmap;" * "\x27FC"
+    ,"xnis;" * "\x22FB"
+    ,"xodot;" * "\x2A00"
+    ,"Xopf;" * "\x1D54F"
+    ,"xopf;" * "\x1D569"
+    ,"xoplus;" * "\x2A01"
+    ,"xotime;" * "\x2A02"
+    ,"xrarr;" * "\x27F6"
+    ,"xrArr;" * "\x27F9"
+    ,"Xscr;" * "\x1D4B3"
+    ,"xscr;" * "\x1D4CD"
+    ,"xsqcup;" * "\x2A06"
+    ,"xuplus;" * "\x2A04"
+    ,"xutri;" * "\x25B3"
+    ,"xvee;" * "\x22C1"
+    ,"xwedge;" * "\x22C0"
+    ,"Yacute" * "\x00DD"
+    ,"yacute" * "\x00FD"
+    ,"Yacute;" * "\x00DD"
+    ,"yacute;" * "\x00FD"
+    ,"YAcy;" * "\x042F"
+    ,"yacy;" * "\x044F"
+    ,"Ycirc;" * "\x0176"
+    ,"ycirc;" * "\x0177"
+    ,"Ycy;" * "\x042B"
+    ,"ycy;" * "\x044B"
+    ,"yen" * "\x00A5"
+    ,"yen;" * "\x00A5"
+    ,"Yfr;" * "\x1D51C"
+    ,"yfr;" * "\x1D536"
+    ,"YIcy;" * "\x0407"
+    ,"yicy;" * "\x0457"
+    ,"Yopf;" * "\x1D550"
+    ,"yopf;" * "\x1D56A"
+    ,"Yscr;" * "\x1D4B4"
+    ,"yscr;" * "\x1D4CE"
+    ,"YUcy;" * "\x042E"
+    ,"yucy;" * "\x044E"
+    ,"yuml" * "\x00FF"
+    ,"yuml;" * "\x00FF"
+    ,"Yuml;" * "\x0178"
+    ,"Zacute;" * "\x0179"
+    ,"zacute;" * "\x017A"
+    ,"Zcaron;" * "\x017D"
+    ,"zcaron;" * "\x017E"
+    ,"Zcy;" * "\x0417"
+    ,"zcy;" * "\x0437"
+    ,"Zdot;" * "\x017B"
+    ,"zdot;" * "\x017C"
+    ,"zeetrf;" * "\x2128"
+    ,"ZeroWidthSpace;" * "\x200B"
+    ,"Zeta;" * "\x0396"
+    ,"zeta;" * "\x03B6"
+    ,"Zfr;" * "\x2128"
+    ,"zfr;" * "\x1D537"
+    ,"ZHcy;" * "\x0416"
+    ,"zhcy;" * "\x0436"
+    ,"zigrarr;" * "\x21DD"
+    ,"Zopf;" * "\x2124"
+    ,"zopf;" * "\x1D56B"
+    ,"Zscr;" * "\x1D4B5"
+    ,"zscr;" * "\x1D4CF"
+    ,"zwj;" * "\x200D"
+    ,"zwnj;" * "\x200C"
+    ]
diff --git a/src/Text/HTML/TagSoup/Generated.hs b/src/Text/HTML/TagSoup/Generated.hs
new file mode 100644
index 0000000..b82916d
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Generated.hs
@@ -0,0 +1,2 @@
+module Text.HTML.TagSoup.Generated(parseTagsOptions) where
+import Text.HTML.TagSoup.Manual
diff --git a/src/Text/HTML/TagSoup/Implementation.hs b/src/Text/HTML/TagSoup/Implementation.hs
new file mode 100644
index 0000000..b110850
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Implementation.hs
@@ -0,0 +1,193 @@
+{-# LANGUAGE RecordWildCards, PatternGuards, ScopedTypeVariables #-}
+
+module Text.HTML.TagSoup.Implementation where
+
+import Text.HTML.TagSoup.Type
+import Text.HTML.TagSoup.Options
+import Text.StringLike as Str
+import Numeric (readHex)
+import Data.Char (chr, ord)
+import Data.Ix
+import Control.Exception(assert)
+import Control.Arrow
+
+---------------------------------------------------------------------
+-- BOTTOM LAYER
+
+data Out
+    = Char Char
+    | Tag             -- <
+    | TagShut         -- </
+    | AttName
+    | AttVal
+    | TagEnd          -- >
+    | TagEndClose     -- />
+    | Comment         -- <!--
+    | CommentEnd      -- -->
+    | EntityName      -- &
+    | EntityNum       -- &#
+    | EntityHex       -- &#x
+    | EntityEnd Bool  -- Attributed followed by ; for True, missing ; for False
+    | Warn String
+    | Pos Position
+      deriving (Show,Eq)
+
+errSeen x = Warn $ "Unexpected " ++ show x
+errWant x = Warn $ "Expected " ++ show x
+
+data S = S
+    {s :: S
+    ,tl :: S
+    ,hd :: Char
+    ,eof :: Bool
+    ,next :: String -> Maybe S
+    ,pos :: [Out] -> [Out]
+    }
+
+
+expand :: Position -> String -> S
+expand p text = p `seq` res
+    where res = S{s = res
+                 ,tl = expand (positionChar p (head text)) (tail text)
+                 ,hd = if null text then '\0' else head text
+                 ,eof = null text
+                 ,next = next p text
+                 ,pos = (Pos p:)
+                 }
+
+          next p (t:ext) (s:tr) | t == s = next (positionChar p t) ext tr
+          next p text [] = Just $ expand p text
+          next _ _ _ = Nothing
+
+
+infixr &
+
+class Outable a where (&) :: a -> [Out] -> [Out]
+instance Outable Char where (&) = ampChar
+instance Outable Out where (&) = ampOut
+ampChar x y = Char x : y
+ampOut x y = x : y
+
+
+state :: String -> S
+state s = expand nullPosition s
+
+---------------------------------------------------------------------
+-- TOP LAYER
+
+
+output :: forall str . StringLike str => ParseOptions str -> [Out] -> [Tag str]
+output ParseOptions{..} x = (if optTagTextMerge then tagTextMerge else id) $ go ((nullPosition,[]),x)
+    where
+        -- main choice loop
+        go :: ((Position,[Tag str]),[Out]) -> [Tag str]
+        go ((p,ws),xs) | p `seq` False = [] -- otherwise p is a space leak when optTagPosition == False
+        go ((p,ws),xs) | not $ null ws = (if optTagWarning then (reverse ws++) else id) $ go ((p,[]),xs)
+        go ((p,ws),Pos p2:xs) = go ((p2,ws),xs)
+
+        go x | isChar x = pos x $ TagText a : go y
+            where (y,a) = charsStr x
+        go x | isTag x = pos x $ TagOpen a b : (if isTagEndClose z then pos x $ TagClose a : go (next z) else go (skip isTagEnd z))
+            where (y,a) = charsStr $ next x
+                  (z,b) = atts y
+        go x | isTagShut x = pos x $ (TagClose a:) $
+                (if not (null b) then warn x "Unexpected attributes in close tag" else id) $
+                if isTagEndClose z then warn x "Unexpected self-closing in close tag" $ go (next z) else go (skip isTagEnd z)
+            where (y,a) = charsStr $ next x
+                  (z,b) = atts y
+        go x | isComment x = pos x $ TagComment a : go (skip isCommentEnd y)
+            where (y,a) = charsStr $ next x
+        go x | isEntityName x = poss x ((if optTagWarning then id else filter (not . isTagWarning)) $ optEntityData (a, getEntityEnd y)) ++ go (skip isEntityEnd y) 
+            where (y,a) = charsStr $ next x
+        go x | isEntityNumHex x = pos x $ TagText (fromChar $ entityChr x a) : go (skip isEntityEnd y)
+            where (y,a) = chars $ next x
+        go x | Just a <- fromWarn x = if optTagWarning then pos x $ TagWarning (fromString a) : go (next x) else go (next x)
+        go x | isEof x = []
+
+        atts :: ((Position,[Tag str]),[Out]) -> ( ((Position,[Tag str]),[Out]) , [(str,str)] )
+        atts x | isAttName x = second ((a,b):) $ atts z
+            where (y,a) = charsStr (next x)
+                  (z,b) = if isAttVal y then charsEntsStr (next y) else (y, empty)
+        atts x | isAttVal x = second ((empty,a):) $ atts y
+            where (y,a) = charsEntsStr (next x)
+        atts x = (x, [])
+
+        -- chars
+        chars x = charss False x
+        charsStr x = (id *** fromString) $ chars x
+        charsEntsStr x = (id *** fromString) $ charss True x
+
+        -- loop round collecting characters, if the b is set including entity
+        charss :: Bool -> ((Position,[Tag str]),[Out]) -> ( ((Position,[Tag str]),[Out]) , String)
+        charss t x | Just a <- fromChr x = (y, a:b)
+            where (y,b) = charss t (next x)
+        charss t x | t, isEntityName x = second (toString n ++) $ charss t $ addWarns m z
+            where (y,a) = charsStr $ next x
+                  b = getEntityEnd y
+                  z = skip isEntityEnd y
+                  (n,m) = optEntityAttrib (a,b)
+        charss t x | t, isEntityNumHex x = second (entityChr x a:) $ charss t z
+            where (y,a) = chars $ next x
+                  z = skip isEntityEnd y
+        charss t ((_,w),Pos p:xs) = charss t ((p,w),xs)
+        charss t x | Just a <- fromWarn x = charss t $ (if optTagWarning then addWarns [TagWarning $ fromString a] else id) $ next x
+        charss t x = (x, [])
+
+        -- utility functions
+        next x = second (drop 1) x
+        skip f x = assert (isEof x || f x) (next x)
+        addWarns ws x@((p,w),y) = ((p, reverse (poss x ws) ++ w), y)
+        pos ((p,_),_) rest = if optTagPosition then tagPosition p : rest else rest
+        warn x s rest = if optTagWarning then pos x $ TagWarning (fromString s) : rest else rest
+        poss x = concatMap (\w -> pos x [w]) 
+
+
+entityChr x s | isEntityNum x = chr_ $ read s
+              | isEntityHex x = chr_ $ fst $ head $ readHex s
+    where chr_ x | inRange (toInteger $ ord minBound, toInteger $ ord maxBound) x = chr $ fromInteger x
+                 | otherwise = '?'
+
+
+isEof (_,[]) = True; isEof _ = False
+isChar (_,Char{}:_) = True; isChar _ = False
+isTag (_,Tag{}:_) = True; isTag _ = False
+isTagShut (_,TagShut{}:_) = True; isTagShut _ = False
+isAttName (_,AttName{}:_) = True; isAttName _ = False
+isAttVal (_,AttVal{}:_) = True; isAttVal _ = False
+isTagEnd (_,TagEnd{}:_) = True; isTagEnd _ = False
+isTagEndClose (_,TagEndClose{}:_) = True; isTagEndClose _ = False
+isComment (_,Comment{}:_) = True; isComment _ = False
+isCommentEnd (_,CommentEnd{}:_) = True; isCommentEnd _ = False
+isEntityName (_,EntityName{}:_) = True; isEntityName _ = False
+isEntityNumHex (_,EntityNum{}:_) = True; isEntityNumHex (_,EntityHex{}:_) = True; isEntityNumHex _ = False
+isEntityNum (_,EntityNum{}:_) = True; isEntityNum _ = False
+isEntityHex (_,EntityHex{}:_) = True; isEntityHex _ = False
+isEntityEnd (_,EntityEnd{}:_) = True; isEntityEnd _ = False
+isWarn (_,Warn{}:_) = True; isWarn _ = False
+
+fromChr (_,Char x:_) = Just x ; fromChr _ = Nothing
+fromWarn (_,Warn x:_) = Just x ; fromWarn _ = Nothing
+
+getEntityEnd (_,EntityEnd b:_) = b
+
+
+-- Merge all adjacent TagText bits
+tagTextMerge :: StringLike str => [Tag str] -> [Tag str]
+tagTextMerge (TagText x:xs) = TagText (strConcat (x:a)) : tagTextMerge b
+    where
+        (a,b) = f xs
+
+        -- additional brackets on 3 lines to work around HSE 1.3.2 bugs with pattern fixities
+        f (TagText x:xs) = (x:a,b)
+            where (a,b) = f xs
+        f (TagPosition{}:(x@TagText{}:xs)) = f $ x : xs
+        f x = g x id x
+
+        g o op (p@TagPosition{}:(w@TagWarning{}:xs)) = g o (op . (p:) . (w:)) xs
+        g o op (w@TagWarning{}:xs) = g o (op . (w:)) xs
+        g o op (p@TagPosition{}:(x@TagText{}:xs)) = f $ p : x : op xs
+        g o op (x@TagText{}:xs) = f $ x : op xs
+        g o op _ = ([], o)
+
+tagTextMerge (x:xs) = x : tagTextMerge xs
+tagTextMerge [] = []
diff --git a/src/Text/HTML/TagSoup/Manual.hs b/src/Text/HTML/TagSoup/Manual.hs
new file mode 100644
index 0000000..1bab781
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Manual.hs
@@ -0,0 +1,13 @@
+
+module Text.HTML.TagSoup.Manual(parseTagsOptions) where
+
+import Text.HTML.TagSoup.Specification
+import Text.HTML.TagSoup.Implementation
+import Text.HTML.TagSoup.Type
+import Text.HTML.TagSoup.Options
+import Text.StringLike
+
+
+parseTagsOptions :: StringLike str => ParseOptions str -> str -> [Tag str]
+parseTagsOptions opts = output opts . parse . toString
+
diff --git a/src/Text/HTML/TagSoup/Match.hs b/src/Text/HTML/TagSoup/Match.hs
new file mode 100644
index 0000000..e8b1841
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Match.hs
@@ -0,0 +1,198 @@
+-- | Combinators to match tags. Some people prefer to use @(~==)@ from
+--   "Text.HTML.TagSoup", others prefer these more structured combinators.
+--   Which you use is personal preference.
+--
+-- The functions below offer maximum flexibility for matching tags.
+-- Using 'tagOpen', for example, you can match all links or buttons that have the "btn" class.
+--
+-- For simple uses cases—like matching all comment tags, or matching opening @\<a>@ tags,
+-- use the tag identification functions in "Text.HTML.TagSoup#tag-identification".
+module Text.HTML.TagSoup.Match where
+
+import Text.HTML.TagSoup.Type (Tag(..), Attribute)
+import Data.List (tails)
+
+-- * Matching Tags
+
+-- | Match an opening tag
+--
+-- ==== __Examples__
+--
+-- /Matching an opening @\<a>@ tag with a @"btn"@ class:/
+--
+-- >>> let tag = TagOpen "a" [("class", "btn")]
+-- >>> tagOpen (== "a") (\attrs -> any (== ("class", "btn")) attrs) tag
+-- True
+tagOpen :: (str -> Bool) -> ([Attribute str] -> Bool) -> Tag str -> Bool
+tagOpen pName pAttrs (TagOpen name attrs) =
+   pName name && pAttrs attrs
+tagOpen _ _ _ = False
+
+-- | Match a closing tag
+--
+-- ==== __Examples__
+--
+-- /Matching a closing @\<\/a>@ tag:/
+--
+-- >>> tagClose (== "a") (TagClose "a")
+-- True
+--
+-- >>> tagClose (== "a") (TagOpen "a" [])
+-- False
+tagClose :: (str -> Bool) -> Tag str -> Bool
+tagClose pName (TagClose name) = pName name
+tagClose _ _ = False
+
+-- | Match text tags
+--
+-- ==== __Examples__
+--
+-- /Match all text tags:/
+--
+-- >>> let tags = parseTags "<p>This is a paragraph</p>"
+-- [TagOpen "p" [],TagText "This is a paragraph",TagClose "p"]
+-- >>> filter (tagText (const True)) tags
+-- [TagText "This is a paragraph"]
+tagText :: (str -> Bool) -> Tag str -> Bool
+tagText p (TagText text) = p text
+tagText _ _ = False
+
+-- | Match comment tags
+--
+-- ==== __Examples__
+--
+-- /Matching comment tags that include an exclamation mark:/
+--
+-- >>> let tags = parseTags "<!--This is a comment-->"
+-- [TagComment "This is a comment!"]
+-- >>> all (tagComment (\s -> '!' `elem` s)) tags
+-- True
+tagComment :: (str -> Bool) -> Tag str -> Bool
+tagComment p (TagComment text) = p text
+tagComment _ _ = False
+
+
+-- | Match an opening tag's name literally
+--
+-- ==== __Examples__
+--
+-- /Matching @\<a>@ tags with the @id@ "foo":/
+--
+-- >>> let tag = TagOpen "a" [("id", "foo")]
+-- TagOpen "a" [("id","foo")]
+-- >>> tagOpenLit "a" (\attrs -> any (== ("id", "foo")) attrs) tag
+-- True
+--
+tagOpenLit :: Eq str => str -> ([Attribute str] -> Bool) -> Tag str -> Bool
+tagOpenLit name = tagOpen (name==)
+
+-- | Match a closing tag's name literally
+--
+-- ==== __Examples__
+--
+-- /Match a closing @\<a>@ tag:/
+--
+-- >>> tagCloseLit "a" (TagClose "a")
+-- True
+--
+-- >>> tagCloseLit "a" (TagClose "em")
+-- False
+tagCloseLit :: Eq str => str -> Tag str -> Bool
+tagCloseLit name = tagClose (name==)
+
+-- | Match an opening tag's name literally, and at least one of its attributes
+--
+-- ==== __Examples__
+--
+-- /Matching a @\<div>@ tag with the @id@ "foo":/
+--
+-- >>> tagOpenAttrLit "div" ("id", "foo") (TagOpen "div" [("id", "foo")])
+-- True
+tagOpenAttrLit :: Eq str => str -> Attribute str -> Tag str -> Bool
+tagOpenAttrLit name attr =
+   tagOpenLit name (anyAttrLit attr)
+
+{- |
+Match a tag with given name, that contains an attribute
+with given name, that satisfies a predicate.
+If an attribute occurs multiple times,
+all occurrences are checked.
+
+==== __Examples__
+
+/Matching an @\<a>@ tag with an ID that starts with "comment-":/
+
+>>> let commentTag = TagOpen "a" [("id", "comment-45678")]
+>>> tagOpenAttrNameLit "a" "id" (\idValue -> "comment-" `Data.List.isPrefixOf` idValue) commentTag
+True
+-}
+tagOpenAttrNameLit :: Eq str => str -> str -> (str -> Bool) -> Tag str -> Bool
+tagOpenAttrNameLit tagName attrName pAttrValue =
+   tagOpenLit tagName
+      (anyAttr (\(name,value) -> name==attrName && pAttrValue value))
+
+
+-- | Check if the 'Tag str' is 'TagOpen' and matches the given name
+--
+-- ==== __Examples__
+--
+-- /Matching an @\<a>@ tag:/
+--
+-- >>> tagOpenNameLit "a" (TagOpen "a" [])
+-- True
+--
+-- >>> tagOpenNameLit "a" (TagOpen "div" [])
+-- False
+tagOpenNameLit :: Eq str => str -> Tag str -> Bool
+tagOpenNameLit name = tagOpenLit name (const True)
+
+-- | Check if the 'Tag str' is 'TagClose' and matches the given name
+--
+-- ==== __Examples__
+--
+-- /Matching a closing @\<\/a>@ tag:/
+--
+-- >>> tagCloseNameLit "a" (TagClose "a")
+-- True
+--
+-- >>> tagCloseNameLit "a" (TagClose "div")
+-- False
+tagCloseNameLit :: Eq str => str -> Tag str -> Bool
+tagCloseNameLit name = tagCloseLit name
+
+
+-- * Matching attributes
+
+-- | Does any attribute name/value match the predicate.
+anyAttr :: ((str,str) -> Bool) -> [Attribute str] -> Bool
+anyAttr = any
+
+-- | Does any attribute name match the predicate.
+anyAttrName :: (str -> Bool) -> [Attribute str] -> Bool
+anyAttrName p = any (p . fst)
+
+-- | Does any attribute value match the predicate.
+anyAttrValue :: (str -> Bool) -> [Attribute str] -> Bool
+anyAttrValue p = any (p . snd)
+
+
+-- | Does any attribute name/value match.
+anyAttrLit :: Eq str => (str,str) -> [Attribute str] -> Bool
+anyAttrLit attr = anyAttr (attr==)
+
+-- | Does any attribute name match.
+anyAttrNameLit :: Eq str => str -> [Attribute str] -> Bool
+anyAttrNameLit name = anyAttrName (name==)
+
+-- | Does any attribute value match.
+anyAttrValueLit :: Eq str => str -> [Attribute str] -> Bool
+anyAttrValueLit value = anyAttrValue (value==)
+
+
+
+-- | Get the tags under tags with a given name where the attributes match some predicate.
+getTagContent :: Eq str => str -> ([Attribute str] -> Bool) -> [Tag str] -> [Tag str]
+getTagContent name pAttrs =
+   takeWhile (not . tagCloseLit name) . drop 1 .
+   head . sections (tagOpenLit name pAttrs)
+    where sections p = filter (p . head) . init . tails
diff --git a/src/Text/HTML/TagSoup/Options.hs b/src/Text/HTML/TagSoup/Options.hs
new file mode 100644
index 0000000..f3c1c4a
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Options.hs
@@ -0,0 +1,80 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+
+module Text.HTML.TagSoup.Options where
+
+import Data.Typeable
+import Text.HTML.TagSoup.Type
+import Text.HTML.TagSoup.Entity
+import Text.StringLike
+
+
+-- | These options control how 'parseTags' works. The 'ParseOptions' type is usually generated by one of
+--   'parseOptions', 'parseOptionsFast' or 'parseOptionsEntities', then selected fields may be overriden.
+--
+--   The options 'optTagPosition' and 'optTagWarning' specify whether to generate
+--   'TagPosition' or 'TagWarning' elements respectively. Usually these options should be set to @False@
+--   to simplify future stages, unless you rely on position information or want to give malformed HTML
+--   messages to the end user.
+--
+--   The options 'optEntityData' and 'optEntityAttrib' control how entities, for example @&nbsp;@ are handled.
+--   Both take a string, and a boolean, where @True@ indicates that the entity ended with a semi-colon @;@.
+--   Inside normal text 'optEntityData' will be called, and the results will be inserted in the tag stream.
+--   Inside a tag attribute 'optEntityAttrib' will be called, and the first component of the result will be used
+--   in the attribute, and the second component will be appended after the 'TagOpen' value (usually the second
+--   component is @[]@). As an example, to not decode any entities, pass:
+--
+-- > parseOptions
+-- >     {optEntityData=\(str,b) -> [TagText $ "&" ++ str ++ [';' | b]]
+-- >     ,optEntityAttrib\(str,b) -> ("&" ++ str ++ [';' | b], [])
+
+--   The 'optTagTextMerge' value specifies if you always want adjacent 'TagText' values to be merged.
+--   Merging adjacent pieces of text has a small performance penalty, but will usually make subsequent analysis
+--   simpler. Contiguous runs of characters without entities or tags will also be generated as single 'TagText'
+--   values.
+data ParseOptions str = ParseOptions
+    {optTagPosition :: Bool -- ^ Should 'TagPosition' values be given before some items (default=False,fast=False).
+    ,optTagWarning :: Bool  -- ^ Should 'TagWarning' values be given (default=False,fast=False)
+    ,optEntityData :: (str,Bool) -> [Tag str] -- ^ How to lookup an entity (Bool = has ending @';'@)
+    ,optEntityAttrib :: (str,Bool) -> (str,[Tag str]) -- ^ How to lookup an entity in an attribute (Bool = has ending @';'@?)
+    ,optTagTextMerge :: Bool -- ^ Require no adjacent 'TagText' values (default=True,fast=False)
+    }
+    deriving Typeable
+
+
+-- | A 'ParseOptions' structure using a custom function to lookup attributes. Any attribute
+--   that is not found will be left intact, and a 'TagWarning' given (if 'optTagWarning' is set).
+--
+--   If you do not want to resolve any entities, simpliy pass @const Nothing@ for the lookup function.
+parseOptionsEntities :: StringLike str => (str -> Maybe str) -> ParseOptions str
+parseOptionsEntities lookupEntity = ParseOptions False False entityData entityAttrib True
+    where
+        entityData x = TagText a : b
+            where (a,b) = entityAttrib x
+
+        entityAttrib ~(x,b) =
+            let x' = x `append` fromString [';'|b]
+            in case lookupEntity x' of
+                Just y -> (y, [])
+                Nothing -> (fromChar '&' `append` x'
+                           ,[TagWarning $ fromString "Unknown entity: " `append` x])
+
+
+-- | The default parse options value, described in 'ParseOptions'. Equivalent to
+--   @'parseOptionsEntities' 'lookupEntity'@.
+parseOptions :: StringLike str => ParseOptions str
+parseOptions = parseOptionsEntities $ fmap fromString . lookupEntity . toString
+
+
+-- | A 'ParseOptions' structure optimised for speed, following the fast options.
+parseOptionsFast :: StringLike str => ParseOptions str
+parseOptionsFast = parseOptions{optTagTextMerge=False}
+
+
+-- | Change the underlying string type of a 'ParseOptions' value.
+fmapParseOptions :: (StringLike from, StringLike to) => ParseOptions from -> ParseOptions to
+fmapParseOptions (ParseOptions a b c d e) = ParseOptions a b c2 d2 e
+    where
+        c2 ~(x,y) = map (fmap castString) $ c (castString x, y)
+        d2 ~(x,y) = (castString r, map (fmap castString) s)
+            where (r,s) = d (castString x, y)
+
diff --git a/src/Text/HTML/TagSoup/Parser.hs b/src/Text/HTML/TagSoup/Parser.hs
new file mode 100644
index 0000000..5fac359
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Parser.hs
@@ -0,0 +1,25 @@
+
+module Text.HTML.TagSoup.Parser(
+    parseTags, parseTagsOptions,
+    ParseOptions(..), parseOptions, parseOptionsFast, parseOptionsEntities
+    ) where
+
+import Text.HTML.TagSoup.Type
+import Text.HTML.TagSoup.Options
+import qualified Text.HTML.TagSoup.Generated as Gen
+
+
+-- | Parse a string to a list of tags, using an HTML 5 compliant parser.
+--
+-- > parseTags "<hello>my&amp;</world>" == [TagOpen "hello" [],TagText "my&",TagClose "world"]
+parseTags :: StringLike str => str -> [Tag str]
+parseTags = parseTagsOptions parseOptions
+
+
+-- | Parse a string to a list of tags, using settings supplied by the 'ParseOptions' parameter,
+--   eg. to output position information:
+--
+-- > parseTagsOptions parseOptions{optTagPosition = True} "<hello>my&amp;</world>" ==
+-- >    [TagPosition 1 1,TagOpen "hello" [],TagPosition 1 8,TagText "my&",TagPosition 1 15,TagClose "world"]
+parseTagsOptions :: StringLike str => ParseOptions str -> str -> [Tag str]
+parseTagsOptions = Gen.parseTagsOptions
diff --git a/src/Text/HTML/TagSoup/Render.hs b/src/Text/HTML/TagSoup/Render.hs
new file mode 100644
index 0000000..380dd8f
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Render.hs
@@ -0,0 +1,80 @@
+{-# LANGUAGE PatternGuards, OverloadedStrings #-}
+{-|
+    This module converts a list of 'Tag' back into a string.
+-}
+
+module Text.HTML.TagSoup.Render
+    (
+    renderTags, renderTagsOptions, escapeHTML,
+    RenderOptions(..), renderOptions
+    ) where
+
+import Text.HTML.TagSoup.Entity
+import Text.HTML.TagSoup.Type
+import Text.StringLike
+
+
+-- | These options control how 'renderTags' works.
+--
+--   The strange quirk of only minimizing @\<br\>@ tags is due to Internet Explorer treating
+--   @\<br\>\<\/br\>@ as @\<br\>\<br\>@.
+data RenderOptions str = RenderOptions
+    {optEscape :: str -> str        -- ^ Escape a piece of text (default = escape the four characters @&\"\<\>@)
+    ,optMinimize :: str -> Bool     -- ^ Minimise \<b\>\<\/b\> -> \<b/\> (default = minimise only @\<br\>@ tags)
+    ,optRawTag :: str -> Bool      -- ^ Should a tag be output with no escaping (default = true only for @script@)
+    }
+
+
+-- | Replace the four characters @&\"\<\>@ with their HTML entities ('escapeXML' lifted to 'StringLike').
+escapeHTML :: StringLike str => str -> str
+escapeHTML = fromString . escapeXML . toString
+
+-- | The default render options value, described in 'RenderOptions'.
+renderOptions :: StringLike str => RenderOptions str
+renderOptions = RenderOptions escapeHTML (\x -> toString x == "br") (\x -> toString x == "script")
+
+
+-- | Show a list of tags, as they might have been parsed, using the default settings given in
+--   'RenderOptions'.
+--
+-- > renderTags [TagOpen "hello" [],TagText "my&",TagClose "world"] == "<hello>my&amp;</world>"
+renderTags :: StringLike str => [Tag str] -> str
+renderTags = renderTagsOptions renderOptions
+
+
+-- | Show a list of tags using settings supplied by the 'RenderOptions' parameter,
+--   eg. to avoid escaping any characters one could do:
+--
+-- > renderTagsOptions renderOptions{optEscape = id} [TagText "my&"] == "my&"
+renderTagsOptions :: StringLike str => RenderOptions str -> [Tag str] -> str
+renderTagsOptions opts = strConcat . tags
+    where
+        ss x = [x]
+
+        tags (TagOpen name atts:TagClose name2:xs)
+            | name == name2 && optMinimize opts name = open name atts " /" ++ tags xs
+        tags (TagOpen name atts:xs)
+            | Just ('?',_) <- uncons name = open name atts " ?" ++ tags xs
+            | optRawTag opts name =
+                let (a,b) = break (== TagClose name) (TagOpen name atts:xs)
+                in concatMap (\x -> case x of TagText s -> [s]; _ -> tag x) a ++ tags b
+        tags (x:xs) = tag x ++ tags xs
+        tags [] = []
+
+        tag (TagOpen name atts) = open name atts ""
+        tag (TagClose name) = ["</", name, ">"]
+        tag (TagText text) = [txt text]
+        tag (TagComment text) = ss "<!--" ++ com text ++ ss "-->"
+        tag _ = ss ""
+
+        txt = optEscape opts
+        open name atts shut = ["<",name] ++ concatMap att atts ++ [shut,">"]
+        att ("","") = [" \"\""]
+        att (x ,"") = [" ", x]
+        att ("", y) = [" \"",txt y,"\""]
+        att (x , y) = [" ",x,"=\"",txt y,"\""]
+
+        com xs | Just ('-',xs) <- uncons xs, Just ('-',xs) <- uncons xs, Just ('>',xs) <- uncons xs = "-- >" : com xs
+        com xs = case uncons xs of
+            Nothing -> []
+            Just (x,xs) -> fromChar x : com xs
diff --git a/src/Text/HTML/TagSoup/Specification.hs b/src/Text/HTML/TagSoup/Specification.hs
new file mode 100644
index 0000000..4e1a67f
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Specification.hs
@@ -0,0 +1,332 @@
+{-# LANGUAGE RecordWildCards, PatternGuards #-}
+
+module Text.HTML.TagSoup.Specification(parse) where
+
+import Text.HTML.TagSoup.Implementation
+import Data.Char (isAlpha, isAlphaNum, isDigit, toLower)
+
+-- We make some generalisations:
+-- <!name is a valid tag start closed by >
+-- <?name is a valid tag start closed by ?>
+-- </!name> is a valid closing tag
+-- </?name> is a valid closing tag
+-- <a "foo"> is a valid tag attibute in ! and ?, i.e missing an attribute name
+-- We also don't do lowercase conversion
+-- Entities are handled without a list of known entity names
+-- We don't have RCData, CData or Escape modes (only effects dat and tagOpen)
+
+
+data TypeTag = TypeNormal -- <foo
+             | TypeXml    -- <?foo
+             | TypeDecl   -- <!foo
+             | TypeScript -- <script
+               deriving Eq
+
+
+-- 2.4.1 Common parser idioms
+white x = x `elem` " \t\n\f\r"
+
+
+-- 8.2.4 Tokenization
+
+type Parser = S -> [Out]
+
+parse :: String -> [Out]
+parse = dat . state 
+
+-- 8.2.4.1 Data state
+dat :: Parser
+dat S{..} = pos $ case hd of
+    '&' -> charReference tl
+    '<' -> tagOpen tl
+    _ | eof -> []
+    _ -> hd & dat tl
+
+
+-- 8.2.4.2 Character reference data state
+charReference s = charRef dat False Nothing s
+
+
+-- 8.2.4.3 Tag open state
+tagOpen S{..} = case hd of
+    '!' -> markupDeclOpen tl
+    '/' -> closeTagOpen tl
+    _ | isAlpha hd -> Tag & hd & tagName (if isScript s then TypeScript else TypeNormal) tl
+    '>' -> errSeen "<>" & '<' & '>' & dat tl
+    '?' -> neilXmlTagOpen tl -- NEIL
+    _ -> errSeen  "<" & '<' & dat s
+
+isScript = f "script"
+    where
+        f (c:cs) S{..} = toLower hd == c && f cs tl
+        f [] S{..} = white hd || hd == '/' || hd == '>' || hd == '?' || eof
+
+
+-- seen "<?", emitted []
+neilXmlTagOpen S{..} = pos $ case hd of
+    _ | isAlpha hd -> Tag & '?' & hd & tagName TypeXml tl
+    _ -> errSeen "<?" & '<' & '?' & dat s
+
+-- seen "?", expecting ">"
+neilXmlTagClose S{..} = pos $ case hd of
+    '>' -> TagEnd & dat tl
+    _ -> errSeen "?" & beforeAttName TypeXml s
+
+
+-- just seen ">" at the end, am given tl
+neilTagEnd typ S{..}
+    | typ == TypeXml = pos $ errWant "?>" & TagEnd & dat s
+    | typ == TypeScript = pos $ TagEnd & neilScriptBody s
+    | otherwise = pos $ TagEnd & dat s
+
+-- Inside a <script> tag, only break on </script
+neilScriptBody o@S{..}
+    | hd == '<', S{..} <- tl
+    , hd == '/', S{..} <- tl
+    , isScript s
+    = dat o
+    | eof = []
+    | otherwise =  pos $ hd & neilScriptBody tl
+
+
+-- 8.2.4.4 Close tag open state
+-- Deviation: We ignore the if CDATA/RCDATA bits and tag matching
+-- Deviation: On </> we output </> to the text
+-- Deviation: </!name> is a closing tag, not a bogus comment
+closeTagOpen S{..} = case hd of
+    _ | isAlpha hd || hd `elem` "?!" -> TagShut & hd & tagName TypeNormal tl
+    '>' -> errSeen "</>" & '<' & '/' & '>' & dat tl
+    _ | eof -> '<' & '/' & dat s
+    _ -> errWant "tag name" & bogusComment s
+
+
+-- 8.2.4.5 Tag name state
+tagName typ S{..} = pos $ case hd of
+    _ | white hd -> beforeAttName typ tl
+    '/' -> selfClosingStartTag typ tl
+    '>' -> neilTagEnd typ tl
+    '?' | typ == TypeXml -> neilXmlTagClose tl
+    _ | isAlpha hd -> hd & tagName typ tl
+    _ | eof -> errWant (if typ == TypeXml then "?>" else ">") & dat s
+    _ -> hd & tagName typ tl
+
+
+-- 8.2.4.6 Before attribute name state
+beforeAttName typ S{..} = pos $ case hd of
+    _ | white hd -> beforeAttName typ tl
+    '/' -> selfClosingStartTag typ tl
+    '>' -> neilTagEnd typ tl
+    '?' | typ == TypeXml -> neilXmlTagClose tl
+    _ | typ /= TypeNormal && hd `elem` "\'\"" -> beforeAttValue typ s -- NEIL
+    _ | hd `elem` "\"'<=" -> errSeen [hd] & AttName & hd & attName typ tl
+    _ | eof -> errWant (if typ == TypeXml then "?>" else ">") & dat s
+    _ -> AttName & hd & attName typ tl
+
+
+-- 8.2.4.7 Attribute name state
+attName typ S{..} = pos $ case hd of
+    _ | white hd -> afterAttName typ tl
+    '/' -> selfClosingStartTag typ tl
+    '=' -> beforeAttValue typ tl
+    '>' -> neilTagEnd typ tl
+    '?' | typ == TypeXml -> neilXmlTagClose tl
+    _ | hd `elem` "\"'<" -> errSeen [hd] & def
+    _ | eof -> errWant (if typ == TypeXml then "?>" else ">") & dat s
+    _ -> def
+    where def = hd & attName typ tl
+
+
+-- 8.2.4.8 After attribute name state
+afterAttName typ S{..} = pos $ case hd of
+    _ | white hd -> afterAttName typ tl
+    '/' -> selfClosingStartTag typ tl
+    '=' -> beforeAttValue typ tl
+    '>' -> neilTagEnd typ tl
+    '?' | typ == TypeXml -> neilXmlTagClose tl
+    _ | typ /= TypeNormal && hd `elem` "\"'" -> AttVal & beforeAttValue typ s -- NEIL
+    _ | hd `elem` "\"'<" -> errSeen [hd] & def
+    _ | eof -> errWant (if typ == TypeXml then "?>" else ">") & dat s
+    _ -> def
+    where def = AttName & hd & attName typ tl
+
+-- 8.2.4.9 Before attribute value state
+beforeAttValue typ S{..} = pos $ case hd of
+    _ | white hd -> beforeAttValue typ tl
+    '\"' -> AttVal & attValueDQuoted typ tl
+    '&' -> AttVal & attValueUnquoted typ s
+    '\'' -> AttVal & attValueSQuoted typ tl
+    '>' -> errSeen "=" & neilTagEnd typ tl
+    '?' | typ == TypeXml -> neilXmlTagClose tl
+    _ | hd `elem` "<=" -> errSeen [hd] & def
+    _ | eof -> errWant (if typ == TypeXml then "?>" else ">") & dat s
+    _ -> def
+    where def = AttVal & hd & attValueUnquoted typ tl
+
+
+-- 8.2.4.10 Attribute value (double-quoted) state
+attValueDQuoted typ S{..} = pos $ case hd of
+    '\"' -> afterAttValueQuoted typ tl
+    '&' -> charRefAttValue (attValueDQuoted typ) (Just '\"') tl
+    _ | eof -> errWant "\"" & dat s
+    _ -> hd & attValueDQuoted typ tl
+
+
+-- 8.2.4.11 Attribute value (single-quoted) state
+attValueSQuoted typ S{..} = pos $ case hd of
+    '\'' -> afterAttValueQuoted typ tl
+    '&' -> charRefAttValue (attValueSQuoted typ) (Just '\'') tl
+    _ | eof -> errWant "\'" & dat s
+    _ -> hd & attValueSQuoted typ tl
+
+
+-- 8.2.4.12 Attribute value (unquoted) state
+attValueUnquoted typ S{..} = pos $ case hd of
+    _ | white hd -> beforeAttName typ tl
+    '&' -> charRefAttValue (attValueUnquoted typ) Nothing tl
+    '>' -> neilTagEnd typ tl
+    '?' | typ == TypeXml -> neilXmlTagClose tl
+    _ | hd `elem` "\"'<=" -> errSeen [hd] & def
+    _ | eof -> errWant (if typ == TypeXml then "?>" else ">") & dat s
+    _ -> def
+    where def = hd & attValueUnquoted typ tl
+
+
+-- 8.2.4.13 Character reference in attribute value state
+charRefAttValue :: Parser -> Maybe Char -> Parser
+charRefAttValue resume c s = charRef resume True c s
+
+
+-- 8.2.4.14 After attribute value (quoted) state
+afterAttValueQuoted typ S{..} = pos $ case hd of
+    _ | white hd -> beforeAttName typ tl
+    '/' -> selfClosingStartTag typ tl
+    '>' -> neilTagEnd typ tl
+    '?' | typ == TypeXml -> neilXmlTagClose tl
+    _ | eof -> dat s
+    _ -> errSeen [hd] & beforeAttName typ s
+
+
+-- 8.2.4.15 Self-closing start tag state
+selfClosingStartTag typ S{..} = pos $ case hd of
+    _ | typ == TypeXml -> errSeen "/" & beforeAttName typ s
+    '>' -> TagEndClose & dat tl
+    _ | eof -> errWant ">" & dat s
+    _ -> errSeen "/" & beforeAttName typ s
+
+
+-- 8.2.4.16 Bogus comment state
+bogusComment S{..} = Comment & bogusComment1 s
+bogusComment1 S{..} = pos $ case hd of
+    '>' -> CommentEnd & dat tl
+    _ | eof -> CommentEnd & dat s
+    _ -> hd & bogusComment1 tl
+
+
+-- 8.2.4.17 Markup declaration open state
+markupDeclOpen S{..} = pos $ case hd of
+    _ | Just s <- next "--" -> Comment & commentStart s
+    _ | isAlpha hd -> Tag & '!' & hd & tagName TypeDecl tl -- NEIL
+    _ | Just s <- next "[CDATA[" -> cdataSection s
+    _ -> errWant "tag name" & bogusComment s
+
+
+-- 8.2.4.18 Comment start state
+commentStart S{..} = pos $ case hd of
+    '-' -> commentStartDash tl
+    '>' -> errSeen "<!-->" & CommentEnd & dat tl
+    _ | eof -> errWant "-->" & CommentEnd & dat s
+    _ -> hd & comment tl
+
+
+-- 8.2.4.19 Comment start dash state
+commentStartDash S{..} = pos $ case hd of
+    '-' -> commentEnd tl
+    '>' -> errSeen "<!--->" & CommentEnd & dat tl
+    _ | eof -> errWant "-->" & CommentEnd & dat s
+    _ -> '-' & hd & comment tl
+
+
+-- 8.2.4.20 Comment state
+comment S{..} = pos $ case hd of
+    '-' -> commentEndDash tl
+    _ | eof -> errWant "-->" & CommentEnd & dat s
+    _ -> hd & comment tl
+
+
+-- 8.2.4.21 Comment end dash state
+commentEndDash S{..} = pos $ case hd of
+    '-' -> commentEnd tl
+    _ | eof -> errWant "-->" & CommentEnd & dat s
+    _ -> '-' & hd & comment tl
+
+
+-- 8.2.4.22 Comment end state
+commentEnd S{..} = pos $ case hd of
+    '>' -> CommentEnd & dat tl
+    '-' -> errWant "-->" & '-' & commentEnd tl
+    _ | white hd -> errSeen "--" & '-' & '-' & hd & commentEndSpace tl
+    '!' -> errSeen "!" & commentEndBang tl
+    _ | eof -> errWant "-->" & CommentEnd & dat s
+    _ -> errSeen "--" & '-' & '-' & hd & comment tl
+
+
+-- 8.2.4.23 Comment end bang state
+commentEndBang S{..} = pos $ case hd of
+    '>' -> CommentEnd & dat tl
+    '-' -> '-' & '-' & '!' & commentEndDash tl
+    _ | eof -> errWant "-->" & CommentEnd & dat s
+    _ -> '-' & '-' & '!' & hd & comment tl
+
+
+-- 8.2.4.24 Comment end space state
+commentEndSpace S{..} = pos $ case hd of
+    '>' -> CommentEnd & dat tl
+    '-' -> commentEndDash tl
+    _ | white hd -> hd & commentEndSpace tl
+    _ | eof -> errWant "-->" & CommentEnd & dat s
+    _ -> hd & comment tl
+
+
+-- 8.2.4.38 CDATA section state
+cdataSection S{..} = pos $ case hd of
+    _ | Just s <- next "]]>" -> dat s
+    _ | eof -> dat s
+    _ | otherwise -> hd & cdataSection tl
+
+
+-- 8.2.4.39 Tokenizing character references
+-- Change from spec: this is reponsible for writing '&' if nothing is to be written
+charRef :: Parser -> Bool -> Maybe Char -> S -> [Out]
+charRef resume att end S{..} = case hd of
+    _ | eof || hd `elem` "\t\n\f <&" || maybe False (== hd) end -> '&' & resume s
+    '#' -> charRefNum resume s tl
+    _ -> charRefAlpha resume att s
+
+charRefNum resume o S{..} = case hd of
+    _ | hd `elem` "xX" -> charRefNum2 resume o True tl
+    _ -> charRefNum2 resume o False s
+
+charRefNum2 resume o hex S{..} = case hd of
+    _ | hexChar hex hd -> (if hex then EntityHex else EntityNum) & hd & charRefNum3 resume hex tl
+    _ -> errSeen "&" & '&' & resume o
+
+charRefNum3 resume hex S{..} = case hd of
+    _ | hexChar hex hd -> hd & charRefNum3 resume hex tl
+    ';' -> EntityEnd True & resume tl
+    _ -> EntityEnd False & errWant ";" & resume s
+
+charRefAlpha resume att S{..} = case hd of
+    _ | isAlpha hd -> EntityName & hd & charRefAlpha2 resume att tl
+    _ -> errSeen "&" & '&' & resume s
+
+charRefAlpha2 resume att S{..} = case hd of
+    _ | alphaChar hd -> hd & charRefAlpha2 resume att tl
+    ';' -> EntityEnd True & resume tl
+    _ | att -> EntityEnd False & resume s
+    _ -> EntityEnd False & errWant ";" & resume s
+
+
+alphaChar x = isAlphaNum x || x `elem` ":-_"
+
+hexChar False x = isDigit x
+hexChar True  x = isDigit x || (x >= 'a' && x <= 'f') || (x >= 'A' && x <= 'F')
diff --git a/src/Text/HTML/TagSoup/Tree.hs b/src/Text/HTML/TagSoup/Tree.hs
new file mode 100644
index 0000000..864b6a1
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Tree.hs
@@ -0,0 +1,118 @@
+{-|
+    /NOTE/: This module is preliminary and may change at a future date.
+
+    This module is intended to help converting a list of tags into a
+    tree of tags.
+-}
+
+module Text.HTML.TagSoup.Tree
+    (
+    TagTree(..), tagTree, parseTree, parseTreeOptions, ParseOptions(..),
+    flattenTree, renderTree, renderTreeOptions, RenderOptions(..), transformTree, universeTree
+    ) where
+
+import Text.HTML.TagSoup (parseTags, parseTagsOptions, renderTags, renderTagsOptions, ParseOptions(..), RenderOptions(..))
+import Text.HTML.TagSoup.Type
+import Control.Arrow
+import GHC.Exts (build)
+
+
+-- | A tree of 'Tag' values.
+data TagTree str
+    = -- | A 'TagOpen'/'TagClose' pair with the 'Tag' values in between.
+      TagBranch str [Attribute str] [TagTree str]
+    | -- | Any leaf node
+      TagLeaf (Tag str)
+                   deriving (Eq,Ord,Show)
+
+instance Functor TagTree where
+    fmap f (TagBranch x y z) = TagBranch (f x) (map (f***f) y) (map (fmap f) z)
+    fmap f (TagLeaf x) = TagLeaf (fmap f x)
+
+
+-- | Convert a list of tags into a tree. This version is not lazy at
+--   all, that is saved for version 2.
+tagTree :: Eq str => [Tag str] -> [TagTree str]
+tagTree = g
+    where
+        g :: Eq str => [Tag str] -> [TagTree str]
+        g [] = []
+        g xs = a ++ map TagLeaf (take 1 b) ++ g (drop 1 b)
+            where (a,b) = f xs
+
+        -- the second tuple is either null or starts with a close
+        f :: Eq str => [Tag str] -> ([TagTree str],[Tag str])
+        f (TagOpen name atts:rest) =
+            case f rest of
+                (inner,[]) -> (TagLeaf (TagOpen name atts):inner, [])
+                (inner,TagClose x:xs)
+                    | x == name -> let (a,b) = f xs in (TagBranch name atts inner:a, b)
+                    | otherwise -> (TagLeaf (TagOpen name atts):inner, TagClose x:xs)
+                _ -> error "TagSoup.Tree.tagTree: safe as - forall x . isTagClose (snd (f x))"
+
+        f (TagClose x:xs) = ([], TagClose x:xs)
+        f (x:xs) = (TagLeaf x:a,b)
+            where (a,b) = f xs
+        f [] = ([], [])
+
+-- | Build a 'TagTree' from a string.
+parseTree :: StringLike str => str -> [TagTree str]
+parseTree = tagTree . parseTags
+
+-- | Build a 'TagTree' from a string, specifying the 'ParseOptions'.
+parseTreeOptions :: StringLike str => ParseOptions str -> str -> [TagTree str]
+parseTreeOptions opts str = tagTree $ parseTagsOptions opts str
+
+-- | Flatten a 'TagTree' back to a list of 'Tag'.
+flattenTree :: [TagTree str] -> [Tag str]
+flattenTree xs = build $ flattenTreeFB xs
+
+flattenTreeFB :: [TagTree str] -> (Tag str -> lst -> lst) -> lst -> lst
+flattenTreeFB xs cons nil = flattenTreeOnto xs nil
+    where
+        flattenTreeOnto [] tags = tags
+        flattenTreeOnto (TagBranch name atts inner:trs) tags =
+            TagOpen name atts `cons` flattenTreeOnto inner (TagClose name `cons` flattenTreeOnto trs tags)
+        flattenTreeOnto (TagLeaf x:trs) tags = x `cons` flattenTreeOnto trs tags
+
+-- | Render a 'TagTree'.
+renderTree :: StringLike str => [TagTree str] -> str
+renderTree = renderTags . flattenTree
+
+-- | Render a 'TagTree' with some 'RenderOptions'.
+renderTreeOptions :: StringLike str => RenderOptions str -> [TagTree str] -> str
+renderTreeOptions opts trees = renderTagsOptions opts $ flattenTree trees
+
+-- | This operation is based on the Uniplate @universe@ function. Given a
+--   list of trees, it returns those trees, and all the children trees at
+--   any level. For example:
+--
+-- > universeTree
+-- >    [TagBranch "a" [("href","url")] [TagBranch "b" [] [TagLeaf (TagText "text")]]]
+-- > == [TagBranch "a" [("href","url")] [TagBranch "b" [] [TagLeaf (TagText "text")]]]
+-- >    ,TagBranch "b" [] [TagLeaf (TagText "text")]]
+--
+--   This operation is particularly useful for queries. To collect all @\"a\"@
+--   tags in a tree, simply do:
+--
+-- > [x | x@(TagBranch "a" _ _) <- universeTree tree]
+universeTree :: [TagTree str] -> [TagTree str]
+universeTree = concatMap f
+    where
+        f t@(TagBranch _ _ inner) = t : universeTree inner
+        f x = [x]
+
+
+-- | This operation is based on the Uniplate @transform@ function. Given a
+--   list of trees, it applies the function to every tree in a bottom-up
+--   manner. This operation is useful for manipulating a tree - for example
+--   to make all tag names upper case:
+--
+-- > upperCase = transformTree f
+-- >   where f (TagBranch name atts inner) = [TagBranch (map toUpper name) atts inner]
+-- >         f x = [x]
+transformTree :: (TagTree str -> [TagTree str]) -> [TagTree str] -> [TagTree str]
+transformTree act = concatMap f
+    where
+        f (TagBranch a b inner) = act $ TagBranch a b (transformTree act inner)
+        f x = act x
diff --git a/src/Text/HTML/TagSoup/Type.hs b/src/Text/HTML/TagSoup/Type.hs
new file mode 100644
index 0000000..8b3988b
--- /dev/null
+++ b/src/Text/HTML/TagSoup/Type.hs
@@ -0,0 +1,142 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+-- | The central type in TagSoup
+
+module Text.HTML.TagSoup.Type(
+    -- * Data structures and parsing
+    StringLike, Tag(..), Attribute, Row, Column,
+    
+    -- * Position manipulation
+    Position(..), tagPosition, nullPosition, positionChar, positionString,
+
+    -- * Tag identification
+    isTagOpen, isTagClose, isTagText, isTagWarning, isTagPosition,
+    isTagOpenName, isTagCloseName, isTagComment,
+
+    -- * Extraction
+    fromTagText, fromAttrib,
+    maybeTagText, maybeTagWarning,
+    innerText,
+    ) where
+
+
+import Data.List (foldl')
+import Data.Maybe (fromMaybe, mapMaybe)
+import Text.StringLike
+import Data.Data(Data, Typeable)
+
+-- | An HTML attribute @id=\"name\"@ generates @(\"id\",\"name\")@
+type Attribute str = (str,str)
+
+-- | The row/line of a position, starting at 1
+type Row = Int
+
+-- | The column of a position, starting at 1
+type Column = Int
+
+
+--- All positions are stored as a row and a column, with (1,1) being the
+--- top-left position
+data Position = Position !Row !Column deriving (Show,Eq,Ord)
+
+nullPosition :: Position
+nullPosition = Position 1 1
+
+positionString :: Position -> String -> Position
+positionString = foldl' positionChar
+
+positionChar :: Position -> Char -> Position
+positionChar (Position r c) x = case x of
+    '\n' -> Position (r+1) 1
+    '\t' -> Position r (c + 8 - mod (c-1) 8)
+    _    -> Position r (c+1)
+
+tagPosition :: Position -> Tag str
+tagPosition (Position r c) = TagPosition r c
+
+
+-- | A single HTML element. A whole document is represented by a list of @Tag@.
+--   There is no requirement for 'TagOpen' and 'TagClose' to match.
+data Tag str =
+     TagOpen str [Attribute str]  -- ^ An open tag with 'Attribute's in their original order
+   | TagClose str                 -- ^ A closing tag
+   | TagText str                  -- ^ A text node, guaranteed not to be the empty string
+   | TagComment str               -- ^ A comment
+   | TagWarning str               -- ^ Meta: A syntax error in the input file
+   | TagPosition !Row !Column     -- ^ Meta: The position of a parsed element
+     deriving (Show, Eq, Ord, Data, Typeable)
+
+instance Functor Tag where
+    fmap f (TagOpen x y) = TagOpen (f x) [(f a, f b) | (a,b) <- y]
+    fmap f (TagClose x) = TagClose (f x)
+    fmap f (TagText x) = TagText (f x)
+    fmap f (TagComment x) = TagComment (f x)
+    fmap f (TagWarning x) = TagWarning (f x)
+    fmap f (TagPosition x y) = TagPosition x y
+
+
+-- | Test if a 'Tag' is a 'TagOpen'
+isTagOpen :: Tag str -> Bool
+isTagOpen (TagOpen {})  = True; isTagOpen  _ = False
+
+-- | Test if a 'Tag' is a 'TagClose'
+isTagClose :: Tag str -> Bool
+isTagClose (TagClose {}) = True; isTagClose _ = False
+
+-- | Test if a 'Tag' is a 'TagText'
+isTagText :: Tag str -> Bool
+isTagText (TagText {})  = True; isTagText  _ = False
+
+-- | Extract the string from within 'TagText', otherwise 'Nothing'
+maybeTagText :: Tag str -> Maybe str
+maybeTagText (TagText x) = Just x
+maybeTagText _ = Nothing
+
+-- | Extract the string from within 'TagText', crashes if not a 'TagText'
+fromTagText :: Show str => Tag str -> str
+fromTagText (TagText x) = x
+fromTagText x = error $ "(" ++ show x ++ ") is not a TagText"
+
+-- | Extract all text content from tags (similar to Verbatim found in HaXml)
+innerText :: StringLike str => [Tag str] -> str
+innerText = strConcat . mapMaybe maybeTagText
+
+-- | Test if a 'Tag' is a 'TagWarning'
+isTagWarning :: Tag str -> Bool
+isTagWarning (TagWarning {})  = True; isTagWarning _ = False
+
+-- | Extract the string from within 'TagWarning', otherwise 'Nothing'
+maybeTagWarning :: Tag str -> Maybe str
+maybeTagWarning (TagWarning x) = Just x
+maybeTagWarning _ = Nothing
+
+-- | Test if a 'Tag' is a 'TagPosition'
+isTagPosition :: Tag str -> Bool
+isTagPosition TagPosition{} = True; isTagPosition _ = False
+
+-- | Extract an attribute, crashes if not a 'TagOpen'.
+--   Returns @\"\"@ if no attribute present.
+--
+-- Warning: does not distinquish between missing attribute
+-- and present attribute with value @\"\"@.
+fromAttrib :: (Show str, Eq str, StringLike str) => str -> Tag str -> str
+fromAttrib att tag = fromMaybe empty $ maybeAttrib att tag
+
+-- | Extract an attribute, crashes if not a 'TagOpen'.
+--   Returns @Nothing@ if no attribute present.
+maybeAttrib :: (Show str, Eq str) => str -> Tag str -> Maybe str
+maybeAttrib att (TagOpen _ atts) = lookup att atts
+maybeAttrib _ x = error ("(" ++ show x ++ ") is not a TagOpen")
+
+-- | Returns True if the 'Tag' is 'TagOpen' and matches the given name
+isTagOpenName :: Eq str => str -> Tag str -> Bool
+isTagOpenName name (TagOpen n _) = n == name
+isTagOpenName _ _ = False
+
+-- | Returns True if the 'Tag' is 'TagClose' and matches the given name
+isTagCloseName :: Eq str => str -> Tag str -> Bool
+isTagCloseName name (TagClose n) = n == name
+isTagCloseName _ _ = False
+
+-- | Test if a 'Tag' is a 'TagComment'
+isTagComment :: Tag str -> Bool
+isTagComment TagComment {} = True; isTagComment _ = False
diff --git a/src/Text/StringLike.hs b/src/Text/StringLike.hs
new file mode 100644
index 0000000..fed6f4e
--- /dev/null
+++ b/src/Text/StringLike.hs
@@ -0,0 +1,96 @@
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
+
+-- | /WARNING/: This module is /not/ intended for use outside the TagSoup library.
+--
+--   This module provides an abstraction for String's as used inside TagSoup. It allows
+--   TagSoup to work with String (list of Char), ByteString.Char8, ByteString.Lazy.Char8,
+--   Data.Text and Data.Text.Lazy.
+module Text.StringLike (StringLike(..), fromString, castString) where
+
+import Data.String
+import Data.Typeable
+
+import qualified Data.ByteString.Char8 as BS
+import qualified Data.ByteString.Lazy.Char8 as LBS
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as LT
+
+
+-- | A class to generalise TagSoup parsing over many types of string-like types.
+--   Examples are given for the String type.
+class (Typeable a, Eq a, IsString a) => StringLike a where
+    -- | > empty = ""
+    empty :: a
+    -- | > cons = (:)
+    cons :: Char -> a -> a
+    -- | > uncons []     = Nothing
+    --   > uncons (x:xs) = Just (x, xs)
+    uncons :: a -> Maybe (Char, a)
+
+    -- | > toString = id
+    toString :: a -> String
+    -- | > fromChar = return
+    fromChar :: Char -> a
+    -- | > strConcat = concat
+    strConcat :: [a] -> a
+    -- | > strNull = null
+    strNull :: a -> Bool
+    -- | > append = (++)
+    append :: a -> a -> a
+
+
+-- | Convert a String from one type to another.
+castString :: (StringLike a, StringLike b) => a -> b
+castString = fromString . toString
+
+
+instance StringLike String where
+    uncons [] = Nothing
+    uncons (x:xs) = Just (x, xs)
+    toString = id
+    fromChar = (:[])
+    strConcat = concat
+    empty = []
+    strNull = null
+    cons c = (c:)
+    append = (++)
+
+instance StringLike BS.ByteString where
+    uncons = BS.uncons
+    toString = BS.unpack
+    fromChar = BS.singleton
+    strConcat = BS.concat
+    empty = BS.empty
+    strNull = BS.null
+    cons = BS.cons
+    append = BS.append
+
+instance StringLike LBS.ByteString where
+    uncons = LBS.uncons
+    toString = LBS.unpack
+    fromChar = LBS.singleton
+    strConcat = LBS.concat
+    empty = LBS.empty
+    strNull = LBS.null
+    cons = LBS.cons
+    append = LBS.append
+
+instance StringLike T.Text where
+    uncons = T.uncons
+    toString = T.unpack
+    fromChar = T.singleton
+    strConcat = T.concat
+    empty = T.empty
+    strNull = T.null
+    cons = T.cons
+    append = T.append
+
+instance StringLike LT.Text where
+    uncons = LT.uncons
+    toString = LT.unpack
+    fromChar = LT.singleton
+    strConcat = LT.concat
+    empty = LT.empty
+    strNull = LT.null
+    cons = LT.cons
+    append = LT.append
diff --git a/tagsoup.cabal b/tagsoup.cabal
new file mode 100644
index 0000000..dd4c079
--- /dev/null
+++ b/tagsoup.cabal
@@ -0,0 +1,66 @@
+cabal-version:  >= 1.18
+name:           tagsoup
+version:        0.14.2
+copyright:      Neil Mitchell 2006-2017
+author:         Neil Mitchell <ndmitchell@gmail.com>
+maintainer:     Neil Mitchell <ndmitchell@gmail.com>
+homepage:       https://github.com/ndmitchell/tagsoup#readme
+bug-reports:    https://github.com/ndmitchell/tagsoup/issues
+license:        BSD3
+category:       XML
+license-file:   LICENSE
+build-type:     Simple
+synopsis:       Parsing and extracting information from (possibly malformed) HTML/XML documents
+tested-with:    GHC==8.2.1, GHC==8.0.2, GHC==7.10.3, GHC==7.8.4, GHC==7.6.3, GHC==7.4.2
+description:
+    TagSoup is a library for parsing HTML/XML. It supports the HTML 5 specification,
+    and can be used to parse either well-formed XML, or unstructured and malformed HTML
+    from the web. The library also provides useful functions to extract information
+    from an HTML document, making it ideal for screen-scraping.
+    .
+    Users should start from the "Text.HTML.TagSoup" module.
+extra-doc-files:
+    CHANGES.txt
+    README.md
+
+source-repository head
+    type:     git
+    location: https://github.com/ndmitchell/tagsoup.git
+
+library
+    default-language: Haskell2010
+    build-depends: base == 4.*, containers, bytestring, text
+    hs-source-dirs: src
+
+    exposed-modules:
+        Text.HTML.TagSoup
+        Text.HTML.TagSoup.Entity
+        Text.HTML.TagSoup.Match
+        Text.HTML.TagSoup.Tree
+        Text.StringLike
+    other-modules:
+        Text.HTML.TagSoup.Generated
+        Text.HTML.TagSoup.Implementation
+        Text.HTML.TagSoup.Manual
+        Text.HTML.TagSoup.Options
+        Text.HTML.TagSoup.Parser
+        Text.HTML.TagSoup.Render
+        Text.HTML.TagSoup.Specification
+        Text.HTML.TagSoup.Type
+
+test-suite test-tagsoup
+    type: exitcode-stdio-1.0
+    default-language: Haskell2010
+    build-depends:
+        base == 4.*, containers, bytestring, text,
+        QuickCheck >= 2.4,
+        deepseq >= 1.1,
+        tagsoup,
+        time, directory, process
+
+    main-is: Main.hs
+    hs-source-dirs: test
+    other-modules:
+        TagSoup.Benchmark
+        TagSoup.Sample
+        TagSoup.Test
diff --git a/test/Main.hs b/test/Main.hs
new file mode 100644
index 0000000..dd26281
--- /dev/null
+++ b/test/Main.hs
@@ -0,0 +1,63 @@
+
+module Main(main) where
+
+import System.Environment
+import TagSoup.Sample
+import TagSoup.Test
+import TagSoup.Benchmark
+import Data.Char(toLower)
+
+
+helpMsg :: IO ()
+helpMsg = putStr $ unlines $
+    ["TagSoup, (C) Neil Mitchell 2006-2009"
+    ,""
+    ,"  tagsoup arguments"
+    ,""
+    ,"<url> may either be a local file, or a http[s]:// page"
+    ,""
+    ] ++ map f res
+    where
+        width = maximum $ map (length . fst) res
+        res = map g actions
+
+        g (nam,msg,Left  _) = (nam,msg)
+        g (nam,msg,Right _) = (nam ++ " <url>",msg)
+
+        f (lhs,rhs) = "  " ++ lhs ++ replicate (4 + width - length lhs) ' ' ++ rhs
+            
+
+actions :: [(String, String, Either (IO ()) (String -> IO ()))]
+actions = [("test","Run the test suite",Left test)
+          ,("grab","Grab a web page",Right grab)
+          ,("parse","Parse a web page",Right parse)
+          ,("bench","Benchmark the parsing",Left time)
+          ,("benchfile","Benchmark the parsing of a file",Right timefile)
+          ,("validate","Validate a page",Right validate)
+          ,("lastmodifieddate","Get the wiki.haskell.org last modified date",Left haskellLastModifiedDateTime)
+          ,("spj","Simon Peyton Jones' papers",Left spjPapers)
+          ,("ndm","Neil Mitchell's papers",Left ndmPapers)
+          ,("time","Current time",Left currentTime)
+          ,("google","Google Tech News",Left googleTechNews)
+          ,("sequence","Creators on sequence.complete.org",Left rssCreators)
+          ,("help","This help message",Left helpMsg)
+          ]
+
+main :: IO ()
+main = do
+    args <- getArgs
+    case (args, lookup (map toLower $ head args) $ map (\(a,_,c) -> (a,c)) actions) of
+        ([],_) -> do
+            putStrLn "No arguments specifying, defaulting to test"
+            helpMsg
+            putStrLn $ replicate 70 '-'
+            test
+        (x:_,Nothing) -> putStrLn ("Error: unknown command " ++ x) >> helpMsg
+        ([_],Just (Left a)) -> a
+        (x:xs,Just (Left a)) -> do
+            putStrLn $ "Warning: expected no arguments to " ++ x ++ " but got: " ++ unwords xs
+            a
+        ([_,y],Just (Right a)) -> a y
+        (x:xs,Just (Right _)) -> do
+            putStrLn $ "Error: expected exactly one argument to " ++ x ++ " but got: " ++ unwords xs
+            helpMsg
diff --git a/test/TagSoup/Benchmark.hs b/test/TagSoup/Benchmark.hs
new file mode 100644
index 0000000..2f7407e
--- /dev/null
+++ b/test/TagSoup/Benchmark.hs
@@ -0,0 +1,202 @@
+{-# LANGUAGE CPP #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-} -- test file, so OK
+
+module TagSoup.Benchmark where
+
+import Text.HTML.TagSoup
+
+import Control.DeepSeq
+import Control.Monad
+import Data.List
+import Data.Maybe
+import System.IO.Unsafe(unsafeInterleaveIO)
+import qualified Data.ByteString.Char8 as BS
+import qualified Data.ByteString.Lazy.Char8 as LBS
+import Data.Time.Clock.POSIX(getPOSIXTime)
+
+conf = 0.95
+
+
+timefile :: FilePath -> IO ()
+timefile file = do
+    -- use LBS to be most representative of real life
+    lbs <- LBS.readFile file
+    let str = LBS.unpack lbs
+        bs = BS.concat $ LBS.toChunks lbs
+    () <- LBS.length lbs `seq` length str `seq` BS.length bs `seq` return ()
+    benchWith (const str, const bs, const lbs) $ benchStatic (toInteger $ LBS.length lbs)
+
+
+sample :: String
+sample = "<this is a test with='attributes' and other=\"things&quot;tested\" /><neil> is </here>" ++
+         "<!-- comment --> and some just random &amp; test &gt;&lt;<foo></bar><bar><bob href=no>"
+
+nsample = genericLength sample :: Integer
+
+time :: IO ()
+time = benchWith (str,bs,lbs) benchVariable
+    where
+        str = \i -> concat $ genericReplicate i sample
+        bs  = let s = BS.pack sample in \i -> BS.concat (genericReplicate i s)
+        lbs = let s = LBS.pack sample in \i -> LBS.concat (genericReplicate i s)
+
+
+
+benchWith :: (Integer -> String, Integer -> BS.ByteString, Integer -> LBS.ByteString)
+          -> ((Integer -> ()) -> IO [String]) -> IO ()
+benchWith (str,bs,lbs) bench = do
+        putStrLn "Timing parseTags in characters/second"
+        let header = map (:[]) ["(" ++ show (round $ conf * 100) ++ "% confidence)","String","BS","LBS"]
+        rows <- mapM row $ replicateM 3 [False,True]
+        mapM_ (putStrLn . strict . grid) $ delay2 $ header : rows
+    where
+        row [a,b,c] = do
+            let header = intercalate "," [g a "pos", g b "warn", g c "merge"]
+                g b x = (if b then ' ' else '!') : x
+                f x = bench $ \i -> rnf $ parseTagsOptions parseOptions{optTagPosition=a,optTagWarning=b,optTagTextMerge=c} $ x i
+            c1 <- f str
+            c2 <- f bs
+            c3 <- f lbs
+            return [[header],c1,c2,c3]
+
+        strict = reverse . reverse
+
+
+---------------------------------------------------------------------
+-- BENCHMARK ON THE SAMPLE INPUT
+
+disp xs = showUnit (floor xbar) ++ " (~" ++ rng ++ "%)"
+    where xbar = mean xs
+          rng = if length xs <= 1 then "?" else show (ceiling $ (range conf xs) * 100 / xbar) 
+
+cons x = fmap (x:)
+
+
+aimTime = 0.3 :: Double -- seconds to aim for
+minTime = 0.2 :: Double -- below this a test is considered invalid
+
+
+-- given a number of times to repeat sample, return a list of what
+-- to display
+benchVariable :: (Integer -> ()) -> IO [String]
+benchVariable op = cons "?" $ f 10 []
+    where
+        f i seen | length seen > 9 = cons ("  " ++ disp seen) $ return []
+                 | otherwise = unsafeInterleaveIO $ do
+            now <- timer $ op i
+            let cps = if now == 0 then 0 else fromInteger (i * nsample) / now
+            if now < minTime || (null seen && now < aimTime) then do
+                let factor = min 7 $ max 2 $ floor $ aimTime / now
+                cons ("? " ++ disp [cps]) $ f (i * factor) []
+             else
+                cons (show (9 - length seen) ++ " " ++ disp (cps:seen)) $ f i (cps:seen)
+
+
+
+benchStatic :: Integer -> (Integer -> ()) -> IO [String]
+benchStatic nsample op = cons "?" $ f []
+    where
+        f seen | length seen > 9 = cons ("  " ++ disp seen) $ return []
+               | otherwise = unsafeInterleaveIO $ do
+            now <- timer $ op $ genericLength seen
+            let cps = if now == 0 then 0 else fromInteger nsample / now
+            cons (show (9 - length seen) ++ " " ++ disp (cps:seen)) $ f (cps:seen)
+
+
+---------------------------------------------------------------------
+-- UTILITY FUNCTIONS
+
+-- | Given a number, show it using a unit and decimal place
+showUnit :: Integer -> String
+showUnit x = num ++ unit
+    where
+        units = " KMGTPEZY"
+        (use,skip) = splitAt 3 $ show x
+
+        unit = [units !! ((length skip + 2) `div` 3)]
+
+        dot = ((length skip - 1) `mod` 3) + 1
+        num = a ++ ['.' | b /= ""] ++ b
+            where (a,b) = splitAt dot use
+
+
+-- copied from the criterion package
+getTime :: IO Double
+getTime = (fromRational . toRational) `fmap` getPOSIXTime
+
+timer :: () -> IO Double
+timer x = do
+    start <- getTime
+    () <- return x
+    end <- getTime
+    return $ end - start
+
+
+-- display a grid
+grid :: [[String]] -> String
+grid xs = unlines $ map (concat . zipWith f cols) xs
+    where cols = map (maximum . map length) $ transpose xs
+          f n x = x ++ replicate (n+1 - length x) ' '
+
+
+-- display a series of grids over time
+-- when a grid gets to [] keep its value at that
+-- when all grids get to [] return []
+delay2 :: [[[String]]] -> [[[String]]]
+delay2 xs = map (map head) xs : (if all (null . tail) (concat xs) then [] else delay2 $ map (map tl) xs)
+    where tl (x:xs) = if null xs then x:xs else xs
+
+
+---------------------------------------------------------------------
+-- INSTANCES
+
+instance NFData a => NFData (Tag a) where
+    rnf (TagOpen x y) = rnf x `seq` rnf y
+    rnf (TagClose x) = rnf x
+    rnf (TagText x) = rnf x
+    rnf (TagComment x) = rnf x
+    rnf (TagWarning x) = rnf x
+    rnf (TagPosition x y) = () -- both are already ! bound
+
+
+#ifndef BYTESTRING_HAS_NFDATA
+# ifdef MIN_VERSION_bytestring
+#  define BYTESTRING_HAS_NFDATA (MIN_VERSION_bytestring(0,10,0))
+# else
+#  define BYTESTRING_HAS_NFDATA (__GLASGOW_HASKELL__ >= 706)
+# endif
+#endif
+
+#if !BYTESTRING_HAS_NFDATA
+instance NFData LBS.ByteString where
+    rnf x = LBS.length x `seq` ()
+
+instance NFData BS.ByteString where
+    rnf x = BS.length x `seq` ()
+#endif
+
+
+---------------------------------------------------------------------
+-- STATISTICS
+-- Provided by Emily Mitchell
+
+confNs = let (*) = (,) in
+    [0.95 * 1.96
+    ,0.90 * 1.644]
+
+size :: [Double] -> Double
+size = genericLength
+
+mean :: [Double] -> Double
+mean xs = sum xs / size xs
+
+stddev :: [Double] -> Double
+stddev xs = sqrt $ sum [sqr (x - xbar) | x <- xs] / size xs
+    where xbar = mean xs
+          sqr x = x * x
+
+-- given a sample, and a required confidence
+-- of the mean (i.e. 2.5% = 0.025)
+range ::Double -> [Double] -> Double
+range conf xs = conf2 * stddev xs / sqrt (size xs)
+    where conf2 = fromMaybe (error $ "Unknown confidence interval: " ++ show conf) $ lookup conf confNs
diff --git a/test/TagSoup/Sample.hs b/test/TagSoup/Sample.hs
new file mode 100644
index 0000000..043a19f
--- /dev/null
+++ b/test/TagSoup/Sample.hs
@@ -0,0 +1,158 @@
+
+module TagSoup.Sample where
+
+import Text.HTML.TagSoup
+
+import Control.Exception
+import Control.Monad
+import Data.List
+import System.Process
+import System.Directory
+import System.Exit
+import System.IO
+import Data.Functor
+import Prelude
+
+
+openItem :: String -> IO String
+openItem url
+  | not $ "http://" `isPrefixOf` url || "https://" `isPrefixOf` url =
+    readFile url
+openItem url = bracket
+    (openTempFile "." "tagsoup.tmp")
+    (\(file,hndl) -> removeFile file)
+    $ \(file,hndl) -> do
+        hClose hndl
+        putStrLn $ "Downloading: " ++ url
+        res <- system $ "wget " ++ url ++ " -O " ++ file
+        when (res /= ExitSuccess) $ error $ "Failed to download using wget: " ++ url
+        src <- readFile file
+        length src `seq` return src
+
+
+grab :: String -> IO ()
+grab x = openItem x >>= putStr
+
+parse :: String -> IO ()
+parse x = openItem x >>= putStr . show2 . parseTags
+    where
+        show2 [] = "[]"
+        show2 xs = "[" ++ concat (intersperseNotBroken "\n," $ map show xs) ++ "\n]\n"
+
+
+-- the standard intersperse has a strictness bug which sucks!
+intersperseNotBroken :: a -> [a] -> [a]
+intersperseNotBroken _ [] = []
+intersperseNotBroken sep (x:xs) = x : is xs
+    where
+        is [] = []
+        is (y:ys) = sep : y : is ys
+
+
+{-
+<li id="lastmod"> This page was last modified on 9 September 2013, at 22:38.</li>
+-}
+haskellLastModifiedDateTime :: IO ()
+haskellLastModifiedDateTime = do
+    src <- openItem "http://wiki.haskell.org/Haskell"
+    let lastModifiedDateTime = fromFooter $ parseTags src
+    putStrLn $ "wiki.haskell.org was last modified on " ++ lastModifiedDateTime
+    where fromFooter = unwords . drop 6 . words . innerText . take 2 . dropWhile (~/= "<li id=lastmod>")
+
+
+googleTechNews :: IO ()
+googleTechNews = do
+        tags <- fmap parseTags $ openItem "http://news.google.com/?ned=us&topic=t"
+        let links = [ ascii name ++ " <" ++ maybe "unknown" shortUrl (lookup "href" atts) ++ ">"
+                    | TagOpen "h2" [("class","title")]:TagText spaces:TagOpen "a" atts:TagText name:_ <- tails tags]
+        putStr $ unlines links
+    where
+        shortUrl x | "http://" `isPrefixOf` x = shortUrl $ drop 7 x
+                   | "www." `isPrefixOf` x = shortUrl $ drop 4 x
+                   | otherwise = takeWhile (/= '/') x
+
+        ascii ('\226':'\128':'\147':xs) = '-' : ascii xs
+        ascii ('\194':'\163':xs) = "#GBP " ++ ascii xs
+        ascii (x:xs) = x : ascii xs
+        ascii [] = []
+
+
+spjPapers :: IO ()
+spjPapers = do
+        tags <- parseTags <$> openItem "http://research.microsoft.com/en-us/people/simonpj/"
+        let links = map f $ sections (~== "<A>") $
+                    takeWhile (~/= "<a name=haskell>") $
+                    drop 5 $ dropWhile (~/= "<a name=current>") tags
+        putStr $ unlines links
+    where
+        f :: [Tag String] -> String
+        f = dequote . unwords . words . fromTagText . head . filter isTagText
+
+        dequote ('\"':xs) | last xs == '\"' = init xs
+        dequote x = x
+
+
+ndmPapers :: IO ()
+ndmPapers = do
+        tags <- parseTags <$> openItem "http://community.haskell.org/~ndm/downloads/"
+        let papers = map f $ sections (~== "<li class=paper>") tags
+        putStr $ unlines papers
+    where
+        f :: [Tag String] -> String
+        f xs = fromTagText (xs !! 2)
+
+
+currentTime :: IO ()
+currentTime = do
+    tags <- parseTags <$> openItem "http://www.timeanddate.com/worldclock/uk/london"
+    let time = fromTagText (dropWhile (~/= "<span id=ct>") tags !! 1)
+    putStrLn time
+
+
+
+type Section = String
+data Package = Package {name :: String, desc :: String, href :: String}
+               deriving Show
+
+hackage :: IO [(Section,[Package])]
+hackage = do
+    tags <- fmap parseTags $ openItem "http://hackage.haskell.org/packages/archive/pkg-list.html"
+    return $ map parseSect $ partitions (~== "<h3>") tags
+    where
+        parseSect xs = (nam, packs)
+            where
+                nam = fromTagText $ xs !! 2
+                packs = map parsePackage $ partitions (~== "<li>") xs
+
+        parsePackage xs =
+           Package
+              (fromTagText $ xs !! 2)
+              (drop 2 $ dropWhile (/= ':') $ fromTagText $ xs !! 4)
+              (fromAttrib "href" $ xs !! 1)
+
+-- rssCreators Example: prints names of story contributors on
+-- sequence.complete.org. This content is RSS (not HTML), and the selected
+-- tag uses a different XML namespace "dc:creator".
+rssCreators :: IO ()
+rssCreators = do
+    tags <- fmap parseTags $ openItem "http://sequence.complete.org/node/feed"
+    putStrLn $ unlines $ map names $ partitions (~== "<dc:creator>") tags
+    where names xs = fromTagText $ xs !! 1
+
+
+validate :: String -> IO ()
+validate x = putStr . unlines . g . f . parseTagsOptions opts =<< openItem x
+    where
+        opts = parseOptions{optTagPosition=True, optTagWarning=True}
+
+        f :: [Tag String] -> [String]
+        f (TagPosition row col:TagWarning warn:rest) =
+            ("Warning (" ++ show row ++ "," ++ show col ++ "): " ++ warn) : f rest
+        f (TagWarning warn:rest) =
+            ("Warning (?,?): " ++ warn) : f rest
+        f (_:rest) = f rest
+        f [] = []
+
+        g xs = xs ++ [if n == 0 then "Success, no warnings"
+                      else "Failed, " ++ show n ++ " warning" ++ ['s'|n>1]]
+            where n = length xs
diff --git a/test/TagSoup/Test.hs b/test/TagSoup/Test.hs
new file mode 100644
index 0000000..9f5ba87
--- /dev/null
+++ b/test/TagSoup/Test.hs
@@ -0,0 +1,257 @@
+{-# OPTIONS_GHC -fno-warn-deprecations #-}
+
+module TagSoup.Test(test) where
+
+import Text.HTML.TagSoup
+import Text.HTML.TagSoup.Entity
+import Text.HTML.TagSoup.Match
+
+import Control.Monad
+import Data.List
+import Test.QuickCheck(Arbitrary(..), Testable(..), quickCheckWithResult, stdArgs,
+                       Args(..), listOf, elements, Result(..))
+
+-- * The Test Monad
+
+type Test a = IO a
+
+pass :: Test ()
+pass = return ()
+
+runTest :: Test () -> IO ()
+runTest x = x >> putStrLn "All tests passed"
+
+(===) :: (Show a, Eq a) => a -> a -> IO ()
+a === b = if a == b then pass else fail $ "Does not equal: " ++ show a ++ " =/= " ++ show b
+
+check :: Testable prop => prop -> IO ()
+check prop = do
+    res <- quickCheckWithResult stdArgs{maxSuccess=1000} prop
+    case res of
+        Success{} -> pass
+        _ -> fail "Property failed"
+
+newtype HTML = HTML String deriving Show
+instance Arbitrary HTML where
+    arbitrary = fmap (HTML . concat) $ listOf $ elements frags
+        where frags = map (:[]) " \n!-</>#&;xy01[]?'\"" ++ ["CDATA","amp","gt","lt"]
+    shrink (HTML x) = map HTML $ zipWith (++) (inits x) (tail $ tails x)
+
+
+-- * The Main section
+
+test :: IO ()
+test = runTest $ do
+    warnTests
+    parseTests
+    optionsTests
+    renderTests
+    combiTests
+    positionTests
+    entityTests
+    lazyTags == lazyTags `seq` pass
+    matchCombinators
+
+
+{- |
+This routine tests the laziness of the TagSoup parser.
+For each critical part of the parser we provide a test input
+with a token of infinite size.
+Then the output must be infinite too.
+If the laziness is broken, then the output will stop early.
+We collect the thousandth character of the output of each test case.
+If computation of the list stops somewhere,
+you have found a laziness stopper.
+-}
+
+
+lazyTags :: [Char]
+lazyTags = map ((!!1000) . show . parseTags)
+    [cycle "Rhabarber"
+    ,repeat '&'
+    ,"<"++cycle "html"
+    ,"<html "++cycle "na!me=value "
+    ,"<html name="++cycle "value"
+    ,"<html name=\""++cycle "value"
+    ,"<html name="++cycle "val!ue"
+    ,"<html "++cycle "name"
+    ,"</"++cycle "html"
+    ,"<!-- "++cycle "comment"
+    ,"<!"++cycle "doctype"
+    ,"<!DOCTYPE"++cycle " description"
+    ,cycle "1<2 "
+    ,"&" ++ cycle "t"
+    ,"<html name="++cycle "val&ue"
+    ,"<html name="++cycle "va&l!ue"
+    ,cycle "&amp; test"
+
+    -- i don't see how this can work unless the junk gets into the AST?
+    -- ,("</html "++cycle "junk") :
+    ]
+
+
+
+matchCombinators :: Test ()
+matchCombinators = do
+    tagText (const True) (TagText "test") === True
+    tagText ("test"==) (TagText "test") === True
+    tagText ("soup"/=) (TagText "test") === True
+    tagOpenNameLit "table" (TagOpen "table" [("id", "name")]) === True
+    tagOpenLit "table" (anyAttrLit ("id", "name")) (TagOpen "table" [("id", "name")]) === True
+    tagOpenLit "table" (anyAttrNameLit "id") (TagOpen "table" [("id", "name")]) === True
+    tagOpenLit "table" (anyAttrLit ("id", "name")) (TagOpen "table" [("id", "other name")]) === False
+
+
+parseTests :: Test ()
+parseTests = do
+    parseTags "<!DOCTYPE TEST>" === [TagOpen "!DOCTYPE" [("TEST","")]]
+    parseTags "<test \"foo bar\">" === [TagOpen "test" [("\"foo",""),("bar\"","")]]
+    parseTags "<test baz \"foo\">" === [TagOpen "test" [("baz",""),("\"foo\"","")]]
+    parseTags "<test 'foo bar'>" === [TagOpen "test" [("'foo",""),("bar'","")]]
+    parseTags "<test bar=''' />" === [TagOpen "test" [("bar",""),("'","")], TagClose "test"]
+    parseTags "<test2 a b>" === [TagOpen "test2" [("a",""),("b","")]]
+    parseTags "<test2 ''>" === [TagOpen "test2" [("''","")]]
+    parseTags "</test foo>" === [TagClose "test"]
+    parseTags "<test/>" === [TagOpen "test" [], TagClose "test"]
+    parseTags "<test1 a = b>" === [TagOpen "test1" [("a","b")]]
+    parseTags "hello &amp; world" === [TagText "hello & world"]
+    parseTags "hello &#64; world" === [TagText "hello @ world"]
+    parseTags "hello &#x40; world" === [TagText "hello @ world"]
+    parseTags "hello &#X40; world" === [TagText "hello @ world"]
+    parseTags "hello &haskell; world" === [TagText "hello &haskell; world"]
+    parseTags "hello \n\t world" === [TagText "hello \n\t world"]
+    parseTags "<a href=http://www.google.com>" === [TagOpen "a" [("href","http://www.google.com")]]
+    parseTags "<foo bar=\"bar&#54;baz\">" === [TagOpen "foo" [("bar","bar6baz")]]
+    parseTags "<foo bar=\"bar&amp;baz\">" === [TagOpen "foo" [("bar","bar&baz")]]
+    parseTags "hey &how are you" === [TagText "hey &how are you"]
+    parseTags "hey &how; are you" === [TagText "hey &how; are you"]
+    parseTags "hey &amp are you" === [TagText "hey & are you"]
+    parseTags "hey &amp; are you" === [TagText "hey & are you"]
+
+    -- real cases reported by users
+    parseTags "&nwarr;x&ngeqq;" === [TagText ['\x2196','x','\x2267','\x0338']]
+    parseTags "test &#10933649; test" === [TagText "test ? test"]
+
+    parseTags "<a href=\"series.php?view=single&ID=72710\">" === [TagOpen "a" [("href","series.php?view=single&ID=72710")]]
+
+    parseTags "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" ===
+        [TagOpen "!DOCTYPE" [("HTML",""),("PUBLIC",""),("","-//W3C//DTD HTML 4.01//EN"),("","http://www.w3.org/TR/html4/strict.dtd")]]
+
+    parseTags "<script src=\"http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot\">" ===
+        [TagOpen "script" [("src","http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot")]]
+
+    parseTags "<a title='foo'bar' href=correct>text" === [TagOpen "a" [("title","foo"),("bar'",""),("href", "correct")],TagText "text"]
+
+    parseTags "<test><![CDATA[Anything goes, <em>even hidden markup</em> &amp; entities]]> but this is outside</test>" ===
+        [TagOpen "test" [],TagText "Anything goes, <em>even hidden markup</em> &amp; entities but this is outside",TagClose "test"]
+
+    parseTags "<a \r\n href=\"url\">" === [TagOpen "a" [("href","url")]]
+
+    parseTags "<a href='random.php'><img src='strips/130307.jpg' alt='nukular bish'' title='' /></a>" === 
+        [TagOpen "a" [("href","random.php")],TagOpen "img" [("src","strips/130307.jpg"),("alt","nukular bish"),("'",""),("title","")],TagClose "img",TagClose "a"]
+
+    parseTags "<p>some text</p\n<img alt='&lt; &yyy; &gt;' src=\"abc.gif\">" ===
+        [TagOpen "p" [],TagText "some text",TagClose "p"]
+
+    parseTags "<script> if (x<bomb) </script>" === [TagOpen "script" [], TagText " if (x<bomb) ", TagClose "script"]
+    parseTags "<script> if (x<bomb) " === [TagOpen "script" [], TagText " if (x<bomb) "]
+    parseTags "<SCRIPT language=foo> if (x<bomb) </SCRIPT>" === [TagOpen "SCRIPT" [("language","foo")], TagText " if (x<bomb) ", TagClose "SCRIPT"]
+    parseTags "<script /><test>" === [TagOpen "script" [], TagClose "script", TagOpen "test" []]
+
+    -- some escapes require trailing semicolons, see #28 and #27.
+    parseTags "one &mid; two" === [TagText "one \8739 two"]
+    parseTags "one &mid two" === [TagText "one &mid two"]
+    parseTags "one &micro; two" === [TagText "one \181 two"]
+    parseTags "one &micro two" === [TagText "one \181 two"]
+
+optionsTests :: Test ()
+optionsTests = check $ \(HTML x) -> all (f x) $ replicateM 3 [False,True]
+    where
+        f str [pos,warn,merge] =
+                bool "merge" (not merge || adjacentTagText tags) &&
+                bool "warn" (warn || all (not . isTagWarning) tags) &&
+                bool "pos" (if pos then alternatePos tags else all (not . isTagPosition) tags)
+            where tags = parseTagsOptions parseOptions{optTagPosition=pos,optTagWarning=warn,optTagTextMerge=merge} str
+                  bool x b = b || error ("optionsTests failed with " ++ x ++ " on " ++ show (pos,warn,merge,str,tags))
+
+        -- optTagTextMerge implies no adjacent TagText cells
+        -- and none separated by only warnings or positions
+        adjacentTagText = g True -- can the next be a tag text
+            where g i (x:xs) | isTagText x = i && g False xs
+                             | isTagPosition x || isTagWarning x = g i xs
+                             | otherwise = g True xs
+                  g i [] = True
+
+        -- optTagPosition implies every element must be followed
+        -- by a position node, no two position nodes must be adjacent
+        -- and all positions must be increasing
+        alternatePos (TagPosition l1 c1 : x : TagPosition l2 c2 : xs)
+            | (l1,c1) <= (l2,c2) && not (isTagPosition x) = alternatePos $ TagPosition l2 c2 : xs
+        alternatePos [TagPosition l1 c1, x] | not $ isTagPosition x = True
+        alternatePos [] = True
+        alternatePos _ = False
+
+
+renderTests :: Test ()
+renderTests = do
+    let rp = renderTags . parseTags
+    rp "<test>" === "<test>"
+    rp "<br></br>" === "<br />"
+    rp "<script></script>" === "<script></script>"
+    rp "hello & world" === "hello &amp; world"
+    rp "<a href=test>" === "<a href=\"test\">"
+    rp "<a href>" === "<a href>"
+    rp "<a href?>" === "<a href?>"
+    rp "<?xml foo?>" === "<?xml foo ?>"
+    rp "<?xml foo?>" === "<?xml foo ?>"
+    rp "<!-- neil -->" === "<!-- neil -->"
+    rp "<a test=\"a&apos;b\">" === "<a test=\"a'b\">"
+    escapeHTML "this is a &\" <test> '" === "this is a &amp;&quot; &lt;test&gt; '"
+    check $ \(HTML x) -> let y = rp x in rp y == (y :: String)
+
+    
+entityTests :: Test ()
+entityTests = do
+    lookupNumericEntity "65" === Just "A"
+    lookupNumericEntity "x41" === Just "A"
+    lookupNumericEntity "x4E" === Just "N"
+    lookupNumericEntity "x4e" === Just "N"
+    lookupNumericEntity "X4e" === Just "N"
+    lookupNumericEntity "Haskell" === Nothing
+    lookupNumericEntity "" === Nothing
+    lookupNumericEntity "89439085908539082" === Nothing
+    lookupNamedEntity "amp" === Just "&"
+    lookupNamedEntity "haskell" === Nothing
+    escapeXML "hello world" === "hello world"
+    escapeXML "hello & world" === "hello &amp; world"
+
+
+combiTests :: Test ()
+combiTests = do
+    (TagText "test" ~== TagText ""    ) === True
+    (TagText "test" ~== TagText "test") === True
+    (TagText "test" ~== TagText "soup") === False
+    (TagText "test" ~== "test") === True
+    (TagOpen "test" [] ~== "<test>") === True
+    (TagOpen "test" [] ~== "<soup>") === False
+    (TagOpen "test" [] ~/= "<soup>") === True
+    (TagComment "foo" ~== "<!--foo-->") === True
+    (TagComment "bar" ~== "<!--bar-->") === True
+
+
+positionTests :: Test ()
+positionTests = do
+    let p = parseTagsOptions parseOptions{optTagPosition=True,optTagWarning=False}
+    p "<a>&</a>" === [TagPosition 1 1,TagOpen "a" [],TagPosition 1 4,TagText "&",TagPosition 1 5,TagClose "a"]
+    p "<a>&#z" === [TagPosition 1 1,TagOpen "a" [],TagPosition 1 4,TagText "&#z"]
+    p "<a>&xz" === [TagPosition 1 1,TagOpen "a" [],TagPosition 1 4,TagText "&xz"]
+    p "<a>&" === [TagPosition 1 1,TagOpen "a" [],TagPosition 1 4,TagText "&"]
+    p "<a>&1" === [TagPosition 1 1,TagOpen "a" [],TagPosition 1 4,TagText "&1"]
+    p "<a>&amp;" === [TagPosition 1 1,TagOpen "a" [],TagPosition 1 4,TagText "&"]
+
+
+warnTests :: Test ()
+warnTests = do
+    let p = parseTagsOptions parseOptions{optTagPosition=True,optTagWarning=True}
+        wt x = [(msg,c) | TagWarning msg:TagPosition _ c:_ <- tails $ p x]
+    wt "neil &foo bar" === [("Unknown entity: foo",6)]