Source code for corpustools.lib.languagecode

# -*- coding: utf-8 -*-

# License: FreeBSD License or The BSD 2-Clause License

# Copyright (c) 2012, Leo Jiang
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:

#     Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
#     Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions and the following disclaimer in the documentation
#     and/or other materials provided with the distribution.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

# Author:   Leo Jiang <leo.jiang.dev@gmail.com>

# pylint: disable=I0011,C0301,C0103

"""
LanguageCode Module
"""

import locale


[docs]class LanguageCode(object): """A class for language code. Currently constructor accept three forms of language code as parameter: xx, xx_XX, xx-XX, case insensitive. And we can get kinds of forms of language code. So we can convert the form of language code. """ def __init__(self, langcode): """LanguageCode Constructor. Accept three forms of language code: xx, xx_XX, xx-XX, case insensitive. """ # Cut the encoding part and support the form of xx-xx. langcode = langcode.split('.')[0].replace('-', '_') # Only the legal forms can be recongized and changed by encoding suffix. if langcode == locale.normalize(langcode): self._langcode = None else: self._langcode = locale.normalize(langcode).split('.')[0]
[docs] def xx(self): """return two chars form of language code.""" return self._langcode.lower().split('_')[0] if self._langcode is not None else None
[docs] def xx_XX(self): """return xx_XX form of language code.""" return '_'.join([self._langcode.lower().split('_')[0], self._langcode.upper().split('_')[1]]) if self._langcode is not None else None
def _XX_dash_XX(self): """return XX-XX form of language code.""" return '-'.join(self._langcode.upper().split('_')) if self._langcode is not None else None
[docs] def TMX_form(self): """return TMX form (xx-XX) of language code.""" return '-'.join(self.xx_XX().split('_'))