importcodecsimportlocaleimportreimportsysfromtypingimportList,TupleBOMS=[(codecs.BOM_UTF8,"utf-8"),(codecs.BOM_UTF16,"utf-16"),(codecs.BOM_UTF16_BE,"utf-16-be"),(codecs.BOM_UTF16_LE,"utf-16-le"),(codecs.BOM_UTF32,"utf-32"),(codecs.BOM_UTF32_BE,"utf-32-be"),(codecs.BOM_UTF32_LE,"utf-32-le"),]# type: List[Tuple[bytes, str]]ENCODING_RE=re.compile(br"coding[:=]\s*([-\w.]+)")defauto_decode(data):# type: (bytes) -> str"""Check a bytes string for a BOM to correctly detect the encoding Fallback to locale.getpreferredencoding(False) like open() on Python3"""forbom,encodinginBOMS:ifdata.startswith(bom):returndata[len(bom):].decode(encoding)# Lets check the first two lines as in PEP263forlineindata.split(b"\n")[:2]:ifline[0:1]==b"#"andENCODING_RE.search(line):result=ENCODING_RE.search(line)assertresultisnotNoneencoding=result.groups()[0].decode("ascii")returndata.decode(encoding)returndata.decode(locale.getpreferredencoding(False)orsys.getdefaultencoding(),)