Todd Zullinger e014f4
Taken from upstream PR#5 (https://github.com/asciidoc/asciidoc-py3/pull/5)
Todd Zullinger e014f4
Todd Zullinger e014f4
6469317 Remove unnecessary decode in a2x (Matthew Peveler)
Todd Zullinger e014f4
684913e Fix decoding of file that specifies encoding in header tag in a2x (Matthew Peveler)
Todd Zullinger e014f4
8369a97 re-add --nonet option (Matthew Peveler)
Todd Zullinger e014f4
Todd Zullinger e014f4
diff --git c/a2x.py w/a2x.py
Todd Zullinger e014f4
index 55eb57e..c015079 100755
Todd Zullinger e014f4
--- c/a2x.py
Todd Zullinger e014f4
+++ w/a2x.py
Todd Zullinger e014f4
@@ -254,15 +254,11 @@ def find_resources(files, tagname, attrname, filter=None):
Todd Zullinger e014f4
         if OPTIONS.dry_run:
Todd Zullinger e014f4
             continue
Todd Zullinger e014f4
         parser = FindResources()
Todd Zullinger e014f4
-        # HTMLParser has problems with non-ASCII strings.
Todd Zullinger e014f4
-        # See http://bugs.python.org/issue3932
Todd Zullinger e014f4
-        contents = read_file(filename)
Todd Zullinger e014f4
-        mo = re.search(r'\A<\?xml.* encoding="(.*?)"', contents)
Todd Zullinger e014f4
-        if mo:
Todd Zullinger e014f4
-            encoding = mo.group(1)
Todd Zullinger e014f4
-            parser.feed(contents.decode(encoding))
Todd Zullinger e014f4
-        else:
Todd Zullinger e014f4
-            parser.feed(contents)
Todd Zullinger e014f4
+        with open(filename, 'rb') as open_file:
Todd Zullinger e014f4
+            contents = open_file.read()
Todd Zullinger e014f4
+        mo = re.search(b'\A<\?xml.* encoding="(.*?)"', contents)
Todd Zullinger e014f4
+        contents = contents.decode(mo.group(1).decode('utf-8') if mo else 'utf-8')
Todd Zullinger e014f4
+        parser.feed(contents)
Todd Zullinger e014f4
         parser.close()
Todd Zullinger e014f4
     result = list(set(result))   # Drop duplicate values.
Todd Zullinger e014f4
     result.sort()
Todd Zullinger e014f4
@@ -337,7 +333,7 @@ def get_source_options(asciidoc_file):
Todd Zullinger e014f4
     result = []
Todd Zullinger e014f4
     if os.path.isfile(asciidoc_file):
Todd Zullinger e014f4
         options = ''
Todd Zullinger e014f4
-        with open(asciidoc_file) as f:
Todd Zullinger e014f4
+        with open(asciidoc_file, encoding='utf-8') as f:
Todd Zullinger e014f4
             for line in f:
Todd Zullinger e014f4
                 mo = re.search(r'^//\s*a2x:', line)
Todd Zullinger e014f4
                 if mo: