Blob Blame History Raw
from __future__ import print_function, division, absolute_import
from __future__ import unicode_literals
from fontTools.misc.py23 import *
from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
import re
import os


class Lexer(object):
    NUMBER = "NUMBER"
    FLOAT = "FLOAT"
    STRING = "STRING"
    NAME = "NAME"
    FILENAME = "FILENAME"
    GLYPHCLASS = "GLYPHCLASS"
    CID = "CID"
    SYMBOL = "SYMBOL"
    COMMENT = "COMMENT"
    NEWLINE = "NEWLINE"
    ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"

    CHAR_WHITESPACE_ = " \t"
    CHAR_NEWLINE_ = "\r\n"
    CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
    CHAR_DIGIT_ = "0123456789"
    CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
    CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
    CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"

    RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.]+$")

    MODE_NORMAL_ = "NORMAL"
    MODE_FILENAME_ = "FILENAME"

    def __init__(self, text, filename):
        self.filename_ = filename
        self.line_ = 1
        self.pos_ = 0
        self.line_start_ = 0
        self.text_ = text
        self.text_length_ = len(text)
        self.mode_ = Lexer.MODE_NORMAL_

    def __iter__(self):
        return self

    def next(self):  # Python 2
        return self.__next__()

    def __next__(self):  # Python 3
        while True:
            token_type, token, location = self.next_()
            if token_type != Lexer.NEWLINE:
                return (token_type, token, location)

    def location_(self):
        column = self.pos_ - self.line_start_ + 1
        return (self.filename_ or "<features>", self.line_, column)

    def next_(self):
        self.scan_over_(Lexer.CHAR_WHITESPACE_)
        location = self.location_()
        start = self.pos_
        text = self.text_
        limit = len(text)
        if start >= limit:
            raise StopIteration()
        cur_char = text[start]
        next_char = text[start + 1] if start + 1 < limit else None

        if cur_char == "\n":
            self.pos_ += 1
            self.line_ += 1
            self.line_start_ = self.pos_
            return (Lexer.NEWLINE, None, location)
        if cur_char == "\r":
            self.pos_ += (2 if next_char == "\n" else 1)
            self.line_ += 1
            self.line_start_ = self.pos_
            return (Lexer.NEWLINE, None, location)
        if cur_char == "#":
            self.scan_until_(Lexer.CHAR_NEWLINE_)
            return (Lexer.COMMENT, text[start:self.pos_], location)

        if self.mode_ is Lexer.MODE_FILENAME_:
            if cur_char != "(":
                raise FeatureLibError("Expected '(' before file name",
                                      location)
            self.scan_until_(")")
            cur_char = text[self.pos_] if self.pos_ < limit else None
            if cur_char != ")":
                raise FeatureLibError("Expected ')' after file name",
                                      location)
            self.pos_ += 1
            self.mode_ = Lexer.MODE_NORMAL_
            return (Lexer.FILENAME, text[start + 1:self.pos_ - 1], location)

        if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_DIGIT_)
            return (Lexer.CID, int(text[start + 1:self.pos_], 10), location)
        if cur_char == "@":
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
            glyphclass = text[start + 1:self.pos_]
            if len(glyphclass) < 1:
                raise FeatureLibError("Expected glyph class name", location)
            if len(glyphclass) > 63:
                raise FeatureLibError(
                    "Glyph class names must not be longer than 63 characters",
                    location)
            if not Lexer.RE_GLYPHCLASS.match(glyphclass):
                raise FeatureLibError(
                    "Glyph class names must consist of letters, digits, "
                    "underscore, or period", location)
            return (Lexer.GLYPHCLASS, glyphclass, location)
        if cur_char in Lexer.CHAR_NAME_START_:
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
            token = text[start:self.pos_]
            if token == "include":
                self.mode_ = Lexer.MODE_FILENAME_
            return (Lexer.NAME, token, location)
        if cur_char == "0" and next_char in "xX":
            self.pos_ += 2
            self.scan_over_(Lexer.CHAR_HEXDIGIT_)
            return (Lexer.NUMBER, int(text[start:self.pos_], 16), location)
        if cur_char in Lexer.CHAR_DIGIT_:
            self.scan_over_(Lexer.CHAR_DIGIT_)
            if self.pos_ >= limit or text[self.pos_] != ".":
                return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
            self.scan_over_(".")
            self.scan_over_(Lexer.CHAR_DIGIT_)
            return (Lexer.FLOAT, float(text[start:self.pos_]), location)
        if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
            self.pos_ += 1
            self.scan_over_(Lexer.CHAR_DIGIT_)
            if self.pos_ >= limit or text[self.pos_] != ".":
                return (Lexer.NUMBER, int(text[start:self.pos_], 10), location)
            self.scan_over_(".")
            self.scan_over_(Lexer.CHAR_DIGIT_)
            return (Lexer.FLOAT, float(text[start:self.pos_]), location)
        if cur_char in Lexer.CHAR_SYMBOL_:
            self.pos_ += 1
            return (Lexer.SYMBOL, cur_char, location)
        if cur_char == '"':
            self.pos_ += 1
            self.scan_until_('"')
            if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
                self.pos_ += 1
                # strip newlines embedded within a string
                string = re.sub("[\r\n]", "", text[start + 1:self.pos_ - 1])
                return (Lexer.STRING, string, location)
            else:
                raise FeatureLibError("Expected '\"' to terminate string",
                                      location)
        raise FeatureLibError("Unexpected character: %r" % cur_char,
                              location)

    def scan_over_(self, valid):
        p = self.pos_
        while p < self.text_length_ and self.text_[p] in valid:
            p += 1
        self.pos_ = p

    def scan_until_(self, stop_at):
        p = self.pos_
        while p < self.text_length_ and self.text_[p] not in stop_at:
            p += 1
        self.pos_ = p

    def scan_anonymous_block(self, tag):
        location = self.location_()
        tag = tag.strip()
        self.scan_until_(Lexer.CHAR_NEWLINE_)
        self.scan_over_(Lexer.CHAR_NEWLINE_)
        regexp = r'}\s*' + tag + r'\s*;'
        split = re.split(regexp, self.text_[self.pos_:], maxsplit=1)
        if len(split) != 2:
            raise FeatureLibError(
                "Expected '} %s;' to terminate anonymous block" % tag,
                location)
        self.pos_ += len(split[0])
        return (Lexer.ANONYMOUS_BLOCK, split[0], location)


class IncludingLexer(object):
    def __init__(self, featurefile):
        self.lexers_ = [self.make_lexer_(featurefile)]
        self.featurefilepath = self.lexers_[0].filename_

    def __iter__(self):
        return self

    def next(self):  # Python 2
        return self.__next__()

    def __next__(self):  # Python 3
        while self.lexers_:
            lexer = self.lexers_[-1]
            try:
                token_type, token, location = next(lexer)
            except StopIteration:
                self.lexers_.pop()
                continue
            if token_type is Lexer.NAME and token == "include":
                fname_type, fname_token, fname_location = lexer.next()
                if fname_type is not Lexer.FILENAME:
                    raise FeatureLibError("Expected file name", fname_location)
                #semi_type, semi_token, semi_location = lexer.next()
                #if semi_type is not Lexer.SYMBOL or semi_token != ";":
                #    raise FeatureLibError("Expected ';'", semi_location)
                if os.path.isabs(fname_token):
                    path = fname_token
                else:
                    if self.featurefilepath is not None:
                        curpath = os.path.dirname(self.featurefilepath)
                    else:
                        # if the IncludingLexer was initialized from an in-memory
                        # file-like stream, it doesn't have a 'name' pointing to
                        # its filesystem path, therefore we fall back to using the
                        # current working directory to resolve relative includes
                        curpath = os.getcwd()
                    path = os.path.join(curpath, fname_token)
                if len(self.lexers_) >= 5:
                    raise FeatureLibError("Too many recursive includes",
                                          fname_location)
                try:
                    self.lexers_.append(self.make_lexer_(path))
                except IOError as err:
                    # FileNotFoundError does not exist on Python < 3.3
                    import errno
                    if err.errno == errno.ENOENT:
                        raise IncludedFeaNotFound(fname_token, fname_location)
                    raise  # pragma: no cover
            else:
                return (token_type, token, location)
        raise StopIteration()

    @staticmethod
    def make_lexer_(file_or_path):
        if hasattr(file_or_path, "read"):
            fileobj, closing = file_or_path, False
        else:
            filename, closing = file_or_path, True
            fileobj = open(filename, "r", encoding="utf-8")
        data = fileobj.read()
        filename = getattr(fileobj, "name", None)
        if closing:
            fileobj.close()
        return Lexer(data, filename)

    def scan_anonymous_block(self, tag):
        return self.lexers_[-1].scan_anonymous_block(tag)


class NonIncludingLexer(IncludingLexer):
    """Lexer that does not follow `include` statements, emits them as-is."""
    def __next__(self):  # Python 3
        return next(self.lexers_[0])