summaryrefslogtreecommitdiff
path: root/chromium/third_party/cld_3/src/setup.py
blob: 385189fc99e881b6a9fd5ff32549e09505507104 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""Setup utility for gcld3."""

import os
import platform
import shutil
import subprocess
import setuptools
from setuptools.command import build_ext

__version__ = '3.0.13'
_NAME = 'gcld3'

REQUIREMENTS = ['pybind11 >= 2.5.0', 'wheel >= 0.34.2']

PROTO_FILES = [
    'src/feature_extractor.proto',
    'src/sentence.proto',
    'src/task_spec.proto',
]

SRCS = [
    'src/base.cc',
    'src/embedding_feature_extractor.cc',
    'src/embedding_network.cc',
    'src/feature_extractor.cc',
    'src/feature_types.cc',
    'src/fml_parser.cc',
    'src/lang_id_nn_params.cc',
    'src/language_identifier_features.cc',
    'src/language_identifier_main.cc',
    'src/nnet_language_identifier.cc',
    'src/registry.cc',
    'src/relevant_script_feature.cc',
    'src/sentence_features.cc',
    'src/task_context.cc',
    'src/task_context_params.cc',
    'src/unicodetext.cc',
    'src/utils.cc',
    'src/workspace.cc',
    'src/script_span/fixunicodevalue.cc',
    'src/script_span/generated_entities.cc',
    'src/script_span/generated_ulscript.cc',
    'src/script_span/getonescriptspan.cc',
    'src/script_span/offsetmap.cc',
    'src/script_span/text_processing.cc',
    'src/script_span/utf8statetable.cc',
    # These CC files have to be generated by the proto buffer compiler 'protoc'
    'src/cld_3/protos/feature_extractor.pb.cc',
    'src/cld_3/protos/sentence.pb.cc',
    'src/cld_3/protos/task_spec.pb.cc',
    # pybind11 bindings
    'gcld3/pybind_ext.cc',
]


class CompileProtos(build_ext.build_ext):
  """Compile protocol buffers via `protoc` compiler."""

  def run(self):
    if shutil.which('protoc') is None:
      raise RuntimeError('Please install the proto buffer compiler.')

    # The C++ code expect the protos to be compiled under the following
    # directory, therefore, create it if necessary.
    compiled_protos_dir = 'src/cld_3/protos/'
    os.makedirs(compiled_protos_dir, exist_ok=True)
    command = ['protoc', f'--cpp_out={compiled_protos_dir}', '--proto_path=src']
    command.extend(PROTO_FILES)
    subprocess.run(command, check=True, cwd='./')
    build_ext.build_ext.run(self)


class PyBindIncludes(object):
  """Returns the include paths for pybind11 when needed.

    To delay the invocation of "pybind11.get_include()" until it is available
    in the environment. This lazy evaluation allows us to install it first, then
    import it later to determine the correct include paths.
  """

  def __str__(self):
    import pybind11  # pylint: disable=g-import-not-at-top
    return pybind11.get_include()


MACOS = platform.system() == 'Darwin'
ext_modules = [
    setuptools.Extension(
        'gcld3.pybind_ext',
        sorted(SRCS),
        include_dirs=[
            PyBindIncludes(),
        ],
        libraries=['protobuf'],
        extra_compile_args=['-std=c++11', '-stdlib=libc++'] if MACOS else [],
        extra_link_args=['-stdlib=libc++'] if MACOS else [],
        language='c++'),
]

DESCRIPTION = """CLD3 is a neural network model for language identification.
This package contains the inference code and a trained model. See
https://github.com/google/cld3 for more details.
"""

setuptools.setup(
    author='Rami Al-Rfou',
    author_email='rmyeid@google.com',
    cmdclass={
        'build_ext': CompileProtos,
    },
    ext_modules=ext_modules,
    packages=setuptools.find_packages(),
    description='CLD3 is a neural network model for language identification.',
    long_description=DESCRIPTION,
    name=_NAME,
    setup_requires=REQUIREMENTS,
    url='https://github.com/google/cld3',
    version=__version__,
    zip_safe=False,
)