HEX

File: //usr/lib/python3/dist-packages/ocrmypdf/_unicodefun.py
# Copyright (c) 2014, Armin Ronacher
#
# Copyright (c) 2017, James R Barlow
#
# Some rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#
#     * Redistributions in binary form must reproduce the above
#       copyright notice, this list of conditions and the following
#       disclaimer in the documentation and/or other materials provided
#       with the distribution.
#
#     * The names of the contributors may not be used to endorse or
#       promote products derived from this software without specific
#       prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import codecs
import os
import sys


def verify_python3_env():  # pragma: no cover
    """Ensures that the environment is good for unicode on Python 3."""

    # PEP 538 changes in Python 3.7 should make this wrangling unnecessary
    if sys.version_info[0:3] >= (3, 7, 0):
        return

    try:
        import locale

        fs_enc = codecs.lookup(locale.getpreferredencoding()).name
    except Exception:
        fs_enc = 'ascii'
    if fs_enc != 'ascii':
        return

    extra = ''
    if os.name == 'posix':
        import subprocess

        rv = subprocess.run(
            ['locale', '-a'], stdout=subprocess.PIPE, stderr=subprocess.PIPE
        ).stdout
        good_locales = set()
        has_c_utf8 = False

        # Make sure we're operating on text here.
        if isinstance(rv, bytes):
            rv = rv.decode('ascii', 'replace')

        for line in rv.splitlines():
            locale = line.strip()
            if locale.lower().endswith(('.utf-8', '.utf8')):
                good_locales.add(locale)
                if locale.lower() in ('c.utf8', 'c.utf-8'):
                    has_c_utf8 = True

        extra += '\n\n'
        if not good_locales:
            extra += (
                'Additional information: on this system no suitable UTF-8\n'
                'locales were discovered.  This most likely requires resolving\n'
                'by reconfiguring the locale system.'
            )
        elif has_c_utf8:
            extra += (
                'This system supports the C.UTF-8 locale which is recommended.\n'
                'You might be able to resolve your issue by exporting the\n'
                'following environment variables:\n\n'
                '    export LC_ALL=C.UTF-8\n'
                '    export LANG=C.UTF-8'
            )
        else:
            extra += (
                'This system lists a couple of UTF-8 supporting locales that\n'
                'you can pick from.  The following suitable locales were\n'
                'discovered: %s'
            ) % ', '.join(sorted(good_locales))

        bad_locale = None
        for locale in os.environ.get('LC_ALL'), os.environ.get('LANG'):
            if locale and locale.lower().endswith(('.utf-8', '.utf8')):
                bad_locale = locale
            if locale is not None:
                break
        if bad_locale is not None:
            extra += (
                '\nocrmypdf discovered that you exported a UTF-8 locale\n'
                'but the locale system could not pick up from it because\n'
                'it does not exist.  The exported locale is "%s" but it\n'
                'is not supported'
            ) % bad_locale

    raise RuntimeError(
        'ocrmypdf will abort further execution because Python 3 '
        'was configured to use ASCII as encoding for the '
        'environment.' + extra
    )