Blame Lib/encodings/utf_8_sig.py

rpm-build 2bd099
""" Python 'utf-8-sig' Codec
rpm-build 2bd099
This work similar to UTF-8 with the following changes:
rpm-build 2bd099
rpm-build 2bd099
* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
rpm-build 2bd099
  first three bytes.
rpm-build 2bd099
rpm-build 2bd099
* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
rpm-build 2bd099
  bytes will be skipped.
rpm-build 2bd099
"""
rpm-build 2bd099
import codecs
rpm-build 2bd099
rpm-build 2bd099
### Codec APIs
rpm-build 2bd099
rpm-build 2bd099
def encode(input, errors='strict'):
rpm-build 2bd099
    return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0],
rpm-build 2bd099
            len(input))
rpm-build 2bd099
rpm-build 2bd099
def decode(input, errors='strict'):
rpm-build 2bd099
    prefix = 0
rpm-build 2bd099
    if input[:3] == codecs.BOM_UTF8:
rpm-build 2bd099
        input = input[3:]
rpm-build 2bd099
        prefix = 3
rpm-build 2bd099
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
rpm-build 2bd099
    return (output, consumed+prefix)
rpm-build 2bd099
rpm-build 2bd099
class IncrementalEncoder(codecs.IncrementalEncoder):
rpm-build 2bd099
    def __init__(self, errors='strict'):
rpm-build 2bd099
        codecs.IncrementalEncoder.__init__(self, errors)
rpm-build 2bd099
        self.first = 1
rpm-build 2bd099
rpm-build 2bd099
    def encode(self, input, final=False):
rpm-build 2bd099
        if self.first:
rpm-build 2bd099
            self.first = 0
rpm-build 2bd099
            return codecs.BOM_UTF8 + \
rpm-build 2bd099
                   codecs.utf_8_encode(input, self.errors)[0]
rpm-build 2bd099
        else:
rpm-build 2bd099
            return codecs.utf_8_encode(input, self.errors)[0]
rpm-build 2bd099
rpm-build 2bd099
    def reset(self):
rpm-build 2bd099
        codecs.IncrementalEncoder.reset(self)
rpm-build 2bd099
        self.first = 1
rpm-build 2bd099
rpm-build 2bd099
    def getstate(self):
rpm-build 2bd099
        return self.first
rpm-build 2bd099
rpm-build 2bd099
    def setstate(self, state):
rpm-build 2bd099
        self.first = state
rpm-build 2bd099
rpm-build 2bd099
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
rpm-build 2bd099
    def __init__(self, errors='strict'):
rpm-build 2bd099
        codecs.BufferedIncrementalDecoder.__init__(self, errors)
rpm-build 2bd099
        self.first = 1
rpm-build 2bd099
rpm-build 2bd099
    def _buffer_decode(self, input, errors, final):
rpm-build 2bd099
        if self.first:
rpm-build 2bd099
            if len(input) < 3:
rpm-build 2bd099
                if codecs.BOM_UTF8.startswith(input):
rpm-build 2bd099
                    # not enough data to decide if this really is a BOM
rpm-build 2bd099
                    # => try again on the next call
rpm-build 2bd099
                    return ("", 0)
rpm-build 2bd099
                else:
rpm-build 2bd099
                    self.first = 0
rpm-build 2bd099
            else:
rpm-build 2bd099
                self.first = 0
rpm-build 2bd099
                if input[:3] == codecs.BOM_UTF8:
rpm-build 2bd099
                    (output, consumed) = \
rpm-build 2bd099
                       codecs.utf_8_decode(input[3:], errors, final)
rpm-build 2bd099
                    return (output, consumed+3)
rpm-build 2bd099
        return codecs.utf_8_decode(input, errors, final)
rpm-build 2bd099
rpm-build 2bd099
    def reset(self):
rpm-build 2bd099
        codecs.BufferedIncrementalDecoder.reset(self)
rpm-build 2bd099
        self.first = 1
rpm-build 2bd099
rpm-build 2bd099
    def getstate(self):
rpm-build 2bd099
        state = codecs.BufferedIncrementalDecoder.getstate(self)
rpm-build 2bd099
        # state[1] must be 0 here, as it isn't passed along to the caller
rpm-build 2bd099
        return (state[0], self.first)
rpm-build 2bd099
rpm-build 2bd099
    def setstate(self, state):
rpm-build 2bd099
        # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
rpm-build 2bd099
        codecs.BufferedIncrementalDecoder.setstate(self, state)
rpm-build 2bd099
        self.first = state[1]
rpm-build 2bd099
rpm-build 2bd099
class StreamWriter(codecs.StreamWriter):
rpm-build 2bd099
    def reset(self):
rpm-build 2bd099
        codecs.StreamWriter.reset(self)
rpm-build 2bd099
        try:
rpm-build 2bd099
            del self.encode
rpm-build 2bd099
        except AttributeError:
rpm-build 2bd099
            pass
rpm-build 2bd099
rpm-build 2bd099
    def encode(self, input, errors='strict'):
rpm-build 2bd099
        self.encode = codecs.utf_8_encode
rpm-build 2bd099
        return encode(input, errors)
rpm-build 2bd099
rpm-build 2bd099
class StreamReader(codecs.StreamReader):
rpm-build 2bd099
    def reset(self):
rpm-build 2bd099
        codecs.StreamReader.reset(self)
rpm-build 2bd099
        try:
rpm-build 2bd099
            del self.decode
rpm-build 2bd099
        except AttributeError:
rpm-build 2bd099
            pass
rpm-build 2bd099
rpm-build 2bd099
    def decode(self, input, errors='strict'):
rpm-build 2bd099
        if len(input) < 3:
rpm-build 2bd099
            if codecs.BOM_UTF8.startswith(input):
rpm-build 2bd099
                # not enough data to decide if this is a BOM
rpm-build 2bd099
                # => try again on the next call
rpm-build 2bd099
                return ("", 0)
rpm-build 2bd099
        elif input[:3] == codecs.BOM_UTF8:
rpm-build 2bd099
            self.decode = codecs.utf_8_decode
rpm-build 2bd099
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
rpm-build 2bd099
            return (output, consumed+3)
rpm-build 2bd099
        # (else) no BOM present
rpm-build 2bd099
        self.decode = codecs.utf_8_decode
rpm-build 2bd099
        return codecs.utf_8_decode(input, errors)
rpm-build 2bd099
rpm-build 2bd099
### encodings module API
rpm-build 2bd099
rpm-build 2bd099
def getregentry():
rpm-build 2bd099
    return codecs.CodecInfo(
rpm-build 2bd099
        name='utf-8-sig',
rpm-build 2bd099
        encode=encode,
rpm-build 2bd099
        decode=decode,
rpm-build 2bd099
        incrementalencoder=IncrementalEncoder,
rpm-build 2bd099
        incrementaldecoder=IncrementalDecoder,
rpm-build 2bd099
        streamreader=StreamReader,
rpm-build 2bd099
        streamwriter=StreamWriter,
rpm-build 2bd099
    )