From 4e03046ee748a134a5ad1b2a53e863517daba704 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 25 Feb 2021 15:38:09 +0800 Subject: [PATCH] first commit. --- .clang-format | 9 + .github/workflows/publish_to_pypi.yml | 36 + .gitignore | 3 + CMakeLists.txt | 36 + LICENSE | 211 ++++ README.md | 11 + cmake/Modules/FetchContent.cmake | 916 ++++++++++++++++++ .../Modules/FetchContent/CMakeLists.cmake.in | 21 + cmake/Modules/README.md | 5 + cmake/pybind11.cmake | 34 + cmake/torch.cmake | 39 + kaldifeat/CMakeLists.txt | 1 + kaldifeat/csrc/CMakeLists.txt | 7 + kaldifeat/csrc/feature-window.cc | 52 + kaldifeat/csrc/feature-window.h | 66 ++ kaldifeat/csrc/log.h | 94 ++ kaldifeat/csrc/mel-computations.cc | 180 ++++ kaldifeat/csrc/mel-computations.h | 74 ++ kaldifeat/python/kaldifeat/__init__.py | 0 setup.py | 55 ++ 20 files changed, 1850 insertions(+) create mode 100644 .clang-format create mode 100644 .github/workflows/publish_to_pypi.yml create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 LICENSE create mode 100644 README.md create mode 100644 cmake/Modules/FetchContent.cmake create mode 100644 cmake/Modules/FetchContent/CMakeLists.cmake.in create mode 100644 cmake/Modules/README.md create mode 100644 cmake/pybind11.cmake create mode 100644 cmake/torch.cmake create mode 100644 kaldifeat/CMakeLists.txt create mode 100644 kaldifeat/csrc/CMakeLists.txt create mode 100644 kaldifeat/csrc/feature-window.cc create mode 100644 kaldifeat/csrc/feature-window.h create mode 100644 kaldifeat/csrc/log.h create mode 100644 kaldifeat/csrc/mel-computations.cc create mode 100644 kaldifeat/csrc/mel-computations.h create mode 100644 kaldifeat/python/kaldifeat/__init__.py create mode 100644 setup.py diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..c65e772 --- /dev/null +++ b/.clang-format @@ -0,0 +1,9 @@ +--- +BasedOnStyle: Google +--- +Language: Cpp +Cpp11BracedListStyle: true +Standard: Cpp11 +DerivePointerAlignment: false +PointerAlignment: Right +--- diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml new file mode 100644 index 0000000..74c255a --- /dev/null +++ b/.github/workflows/publish_to_pypi.yml @@ -0,0 +1,36 @@ +name: Publish to PyPI + +on: + push: + tags: + - '*' + +jobs: + pypi: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.6 + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip + python3 -m pip install wheel twine setuptools + + - name: Build + shell: bash + run: | + python3 setup.py sdist + ls -l dist/* + + - name: Publish wheels to PyPI + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + twine upload dist/kaldifeat-*.tar.gz diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..afb27c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build/ +*.egg-info*/ +dist/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..880fd64 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,36 @@ +# Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang) + +cmake_minimum_required(VERSION 3.8 FATAL_ERROR) + +project(kaldifeat) + +set(kaldifeat_VERSION "0.0.1") + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") + +set(CMAKE_SKIP_BUILD_RPATH FALSE) +set(BUILD_RPATH_USE_ORIGIN TRUE) +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +set(CMAKE_INSTALL_RPATH "$ORIGIN") +set(CMAKE_BUILD_RPATH "$ORIGIN") + +if(NOT CMAKE_BUILD_TYPE) + message(STATUS "No CMAKE_BUILD_TYPE given, default to Release") + set(CMAKE_BUILD_TYPE Release) +endif() + +set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") +set(CMAKE_CXX_EXTENSIONS OFF) + +message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}") + +list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) +list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) + +include(pybind11) +include(torch) + +include_directories(${CMAKE_SOURCE_DIR}) +add_subdirectory(kaldifeat) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ee06cfc --- /dev/null +++ b/LICENSE @@ -0,0 +1,211 @@ + + Legal Notices + + NOTE (this is not from the Apache License): The copyright model is that + authors (or their employers, if noted in individual files) own their + individual contributions. The authors' contributions can be discerned + from the git history. + + ------------------------------------------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..05e5d5d --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# kaldifeat + +Wrap kaldi's feature computations to Python with PyTorch support. + +# Installation + +`kaldifeat` can be installed by + +```bash +pip install kaldifeat +``` diff --git a/cmake/Modules/FetchContent.cmake b/cmake/Modules/FetchContent.cmake new file mode 100644 index 0000000..98cdf6c --- /dev/null +++ b/cmake/Modules/FetchContent.cmake @@ -0,0 +1,916 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FetchContent +------------------ + +.. only:: html + + .. contents:: + +Overview +^^^^^^^^ + +This module enables populating content at configure time via any method +supported by the :module:`ExternalProject` module. Whereas +:command:`ExternalProject_Add` downloads at build time, the +``FetchContent`` module makes content available immediately, allowing the +configure step to use the content in commands like :command:`add_subdirectory`, +:command:`include` or :command:`file` operations. + +Content population details would normally be defined separately from the +command that performs the actual population. Projects should also +check whether the content has already been populated somewhere else in the +project hierarchy. Typical usage would look something like this: + +.. code-block:: cmake + + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.8.0 + ) + + FetchContent_GetProperties(googletest) + if(NOT googletest_POPULATED) + FetchContent_Populate(googletest) + add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) + endif() + +When using the above pattern with a hierarchical project arrangement, +projects at higher levels in the hierarchy are able to define or override +the population details of content specified anywhere lower in the project +hierarchy. The ability to detect whether content has already been +populated ensures that even if multiple child projects want certain content +to be available, the first one to populate it wins. The other child project +can simply make use of the already available content instead of repeating +the population for itself. See the +:ref:`Examples ` section which demonstrates +this scenario. + +The ``FetchContent`` module also supports defining and populating +content in a single call, with no check for whether the content has been +populated elsewhere in the project already. This is a more low level +operation and would not normally be the way the module is used, but it is +sometimes useful as part of implementing some higher level feature or to +populate some content in CMake's script mode. + + +Declaring Content Details +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. command:: FetchContent_Declare + + .. code-block:: cmake + + FetchContent_Declare( ...) + + The ``FetchContent_Declare()`` function records the options that describe + how to populate the specified content, but if such details have already + been recorded earlier in this project (regardless of where in the project + hierarchy), this and all later calls for the same content ```` are + ignored. This "first to record, wins" approach is what allows hierarchical + projects to have parent projects override content details of child projects. + + The content ```` can be any string without spaces, but good practice + would be to use only letters, numbers and underscores. The name will be + treated case-insensitively and it should be obvious for the content it + represents, often being the name of the child project or the value given + to its top level :command:`project` command (if it is a CMake project). + For well-known public projects, the name should generally be the official + name of the project. Choosing an unusual name makes it unlikely that other + projects needing that same content will use the same name, leading to + the content being populated multiple times. + + The ```` can be any of the download or update/patch options + that the :command:`ExternalProject_Add` command understands. The configure, + build, install and test steps are explicitly disabled and therefore options + related to them will be ignored. In most cases, ```` will + just be a couple of options defining the download method and method-specific + details like a commit tag or archive hash. For example: + + .. code-block:: cmake + + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.8.0 + ) + + FetchContent_Declare( + myCompanyIcons + URL https://intranet.mycompany.com/assets/iconset_1.12.tar.gz + URL_HASH 5588a7b18261c20068beabfb4f530b87 + ) + + FetchContent_Declare( + myCompanyCertificates + SVN_REPOSITORY svn+ssh://svn.mycompany.com/srv/svn/trunk/certs + SVN_REVISION -r12345 + ) + +Populating The Content +^^^^^^^^^^^^^^^^^^^^^^ + +.. command:: FetchContent_Populate + + .. code-block:: cmake + + FetchContent_Populate( ) + + In most cases, the only argument given to ``FetchContent_Populate()`` is the + ````. When used this way, the command assumes the content details have + been recorded by an earlier call to :command:`FetchContent_Declare`. The + details are stored in a global property, so they are unaffected by things + like variable or directory scope. Therefore, it doesn't matter where in the + project the details were previously declared, as long as they have been + declared before the call to ``FetchContent_Populate()``. Those saved details + are then used to construct a call to :command:`ExternalProject_Add` in a + private sub-build to perform the content population immediately. The + implementation of ``ExternalProject_Add()`` ensures that if the content has + already been populated in a previous CMake run, that content will be reused + rather than repopulating them again. For the common case where population + involves downloading content, the cost of the download is only paid once. + + An internal global property records when a particular content population + request has been processed. If ``FetchContent_Populate()`` is called more + than once for the same content name within a configure run, the second call + will halt with an error. Projects can and should check whether content + population has already been processed with the + :command:`FetchContent_GetProperties` command before calling + ``FetchContent_Populate()``. + + ``FetchContent_Populate()`` will set three variables in the scope of the + caller; ``_POPULATED``, ``_SOURCE_DIR`` and + ``_BINARY_DIR``, where ```` is the lowercased ````. + ``_POPULATED`` will always be set to ``True`` by the call. + ``_SOURCE_DIR`` is the location where the + content can be found upon return (it will have already been populated), while + ``_BINARY_DIR`` is a directory intended for use as a corresponding + build directory. The main use case for the two directory variables is to + call :command:`add_subdirectory` immediately after population, i.e.: + + .. code-block:: cmake + + FetchContent_Populate(FooBar ...) + add_subdirectory(${foobar_SOURCE_DIR} ${foobar_BINARY_DIR}) + + The values of the three variables can also be retrieved from anywhere in the + project hierarchy using the :command:`FetchContent_GetProperties` command. + + A number of cache variables influence the behavior of all content population + performed using details saved from a :command:`FetchContent_Declare` call: + + ``FETCHCONTENT_BASE_DIR`` + In most cases, the saved details do not specify any options relating to the + directories to use for the internal sub-build, final source and build areas. + It is generally best to leave these decisions up to the ``FetchContent`` + module to handle on the project's behalf. The ``FETCHCONTENT_BASE_DIR`` + cache variable controls the point under which all content population + directories are collected, but in most cases developers would not need to + change this. The default location is ``${CMAKE_BINARY_DIR}/_deps``, but if + developers change this value, they should aim to keep the path short and + just below the top level of the build tree to avoid running into path + length problems on Windows. + + ``FETCHCONTENT_QUIET`` + The logging output during population can be quite verbose, making the + configure stage quite noisy. This cache option (``ON`` by default) hides + all population output unless an error is encountered. If experiencing + problems with hung downloads, temporarily switching this option off may + help diagnose which content population is causing the issue. + + ``FETCHCONTENT_FULLY_DISCONNECTED`` + When this option is enabled, no attempt is made to download or update + any content. It is assumed that all content has already been populated in + a previous run or the source directories have been pointed at existing + contents the developer has provided manually (using options described + further below). When the developer knows that no changes have been made to + any content details, turning this option ``ON`` can significantly speed up + the configure stage. It is ``OFF`` by default. + + ``FETCHCONTENT_UPDATES_DISCONNECTED`` + This is a less severe download/update control compared to + ``FETCHCONTENT_FULLY_DISCONNECTED``. Instead of bypassing all download and + update logic, the ``FETCHCONTENT_UPDATES_DISCONNECTED`` only disables the + update stage. Therefore, if content has not been downloaded previously, + it will still be downloaded when this option is enabled. This can speed up + the configure stage, but not as much as + ``FETCHCONTENT_FULLY_DISCONNECTED``. It is ``OFF`` by default. + + In addition to the above cache variables, the following cache variables are + also defined for each content name (```` is the uppercased value of + ````): + + ``FETCHCONTENT_SOURCE_DIR_`` + If this is set, no download or update steps are performed for the specified + content and the ``_SOURCE_DIR`` variable returned to the caller is + pointed at this location. This gives developers a way to have a separate + checkout of the content that they can modify freely without interference + from the build. The build simply uses that existing source, but it still + defines ``_BINARY_DIR`` to point inside its own build area. + Developers are strongly encouraged to use this mechanism rather than + editing the sources populated in the default location, as changes to + sources in the default location can be lost when content population details + are changed by the project. + + ``FETCHCONTENT_UPDATES_DISCONNECTED_`` + This is the per-content equivalent of + ``FETCHCONTENT_UPDATES_DISCONNECTED``. If the global option or this option + is ``ON``, then updates will be disabled for the named content. + Disabling updates for individual content can be useful for content whose + details rarely change, while still leaving other frequently changing + content with updates enabled. + + + The ``FetchContent_Populate()`` command also supports a syntax allowing the + content details to be specified directly rather than using any saved + details. This is more low-level and use of this form is generally to be + avoided in favour of using saved content details as outlined above. + Nevertheless, in certain situations it can be useful to invoke the content + population as an isolated operation (typically as part of implementing some + other higher level feature or when using CMake in script mode): + + .. code-block:: cmake + + FetchContent_Populate( + [QUIET] + [SUBBUILD_DIR ] + [SOURCE_DIR ] + [BINARY_DIR ] + ... + ) + + This form has a number of key differences to that where only ```` is + provided: + + - All required population details are assumed to have been provided directly + in the call to ``FetchContent_Populate()``. Any saved details for + ```` are ignored. + - No check is made for whether content for ```` has already been + populated. + - No global property is set to record that the population has occurred. + - No global properties record the source or binary directories used for the + populated content. + - The ``FETCHCONTENT_FULLY_DISCONNECTED`` and + ``FETCHCONTENT_UPDATES_DISCONNECTED`` cache variables are ignored. + + The ``_SOURCE_DIR`` and ``_BINARY_DIR`` variables are still + returned to the caller, but since these locations are not stored as global + properties when this form is used, they are only available to the calling + scope and below rather than the entire project hierarchy. No + ``_POPULATED`` variable is set in the caller's scope with this form. + + The supported options for ``FetchContent_Populate()`` are the same as those + for :command:`FetchContent_Declare()`. Those few options shown just + above are either specific to ``FetchContent_Populate()`` or their behavior is + slightly modified from how :command:`ExternalProject_Add` treats them. + + ``QUIET`` + The ``QUIET`` option can be given to hide the output associated with + populating the specified content. If the population fails, the output will + be shown regardless of whether this option was given or not so that the + cause of the failure can be diagnosed. The global ``FETCHCONTENT_QUIET`` + cache variable has no effect on ``FetchContent_Populate()`` calls where the + content details are provided directly. + + ``SUBBUILD_DIR`` + The ``SUBBUILD_DIR`` argument can be provided to change the location of the + sub-build created to perform the population. The default value is + ``${CMAKE_CURRENT_BINARY_DIR}/-subbuild`` and it would be unusual + to need to override this default. If a relative path is specified, it will + be interpreted as relative to :variable:`CMAKE_CURRENT_BINARY_DIR`. + + ``SOURCE_DIR``, ``BINARY_DIR`` + The ``SOURCE_DIR`` and ``BINARY_DIR`` arguments are supported by + :command:`ExternalProject_Add`, but different default values are used by + ``FetchContent_Populate()``. ``SOURCE_DIR`` defaults to + ``${CMAKE_CURRENT_BINARY_DIR}/-src`` and ``BINARY_DIR`` defaults to + ``${CMAKE_CURRENT_BINARY_DIR}/-build``. If a relative path is + specified, it will be interpreted as relative to + :variable:`CMAKE_CURRENT_BINARY_DIR`. + + In addition to the above explicit options, any other unrecognized options are + passed through unmodified to :command:`ExternalProject_Add` to perform the + download, patch and update steps. The following options are explicitly + prohibited (they are disabled by the ``FetchContent_Populate()`` command): + + - ``CONFIGURE_COMMAND`` + - ``BUILD_COMMAND`` + - ``INSTALL_COMMAND`` + - ``TEST_COMMAND`` + + If using ``FetchContent_Populate()`` within CMake's script mode, be aware + that the implementation sets up a sub-build which therefore requires a CMake + generator and build tool to be available. If these cannot be found by + default, then the :variable:`CMAKE_GENERATOR` and/or + :variable:`CMAKE_MAKE_PROGRAM` variables will need to be set appropriately + on the command line invoking the script. + + +Retrieve Population Properties +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. command:: FetchContent_GetProperties + + When using saved content details, a call to :command:`FetchContent_Populate` + records information in global properties which can be queried at any time. + This information includes the source and binary directories associated with + the content and also whether or not the content population has been processed + during the current configure run. + + .. code-block:: cmake + + FetchContent_GetProperties( + [SOURCE_DIR ] + [BINARY_DIR ] + [POPULATED ] + ) + + The ``SOURCE_DIR``, ``BINARY_DIR`` and ``POPULATED`` options can be used to + specify which properties should be retrieved. Each option accepts a value + which is the name of the variable in which to store that property. Most of + the time though, only ```` is given, in which case the call will then + set the same variables as a call to + :command:`FetchContent_Populate(name) `. This allows + the following canonical pattern to be used, which ensures that the relevant + variables will always be defined regardless of whether or not the population + has been performed elsewhere in the project already: + + .. code-block:: cmake + + FetchContent_GetProperties(foobar) + if(NOT foobar_POPULATED) + FetchContent_Populate(foobar) + + # Set any custom variables, etc. here, then + # populate the content as part of this build + + add_subdirectory(${foobar_SOURCE_DIR} ${foobar_BINARY_DIR}) + endif() + + The above pattern allows other parts of the overall project hierarchy to + re-use the same content and ensure that it is only populated once. + + +.. _`fetch-content-examples`: + +Examples +^^^^^^^^ + +Consider a project hierarchy where ``projA`` is the top level project and it +depends on projects ``projB`` and ``projC``. Both ``projB`` and ``projC`` +can be built standalone and they also both depend on another project +``projD``. For simplicity, this example will assume that all four projects +are available on a company git server. The ``CMakeLists.txt`` of each project +might have sections like the following: + +*projA*: + +.. code-block:: cmake + + include(FetchContent) + FetchContent_Declare( + projB + GIT_REPOSITORY git@mycompany.com/git/projB.git + GIT_TAG 4a89dc7e24ff212a7b5167bef7ab079d + ) + FetchContent_Declare( + projC + GIT_REPOSITORY git@mycompany.com/git/projC.git + GIT_TAG 4ad4016bd1d8d5412d135cf8ceea1bb9 + ) + FetchContent_Declare( + projD + GIT_REPOSITORY git@mycompany.com/git/projD.git + GIT_TAG origin/integrationBranch + ) + + FetchContent_GetProperties(projB) + if(NOT projb_POPULATED) + FetchContent_Populate(projB) + add_subdirectory(${projb_SOURCE_DIR} ${projb_BINARY_DIR}) + endif() + + FetchContent_GetProperties(projC) + if(NOT projc_POPULATED) + FetchContent_Populate(projC) + add_subdirectory(${projc_SOURCE_DIR} ${projc_BINARY_DIR}) + endif() + +*projB*: + +.. code-block:: cmake + + include(FetchContent) + FetchContent_Declare( + projD + GIT_REPOSITORY git@mycompany.com/git/projD.git + GIT_TAG 20b415f9034bbd2a2e8216e9a5c9e632 + ) + + FetchContent_GetProperties(projD) + if(NOT projd_POPULATED) + FetchContent_Populate(projD) + add_subdirectory(${projd_SOURCE_DIR} ${projd_BINARY_DIR}) + endif() + + +*projC*: + +.. code-block:: cmake + + include(FetchContent) + FetchContent_Declare( + projD + GIT_REPOSITORY git@mycompany.com/git/projD.git + GIT_TAG 7d9a17ad2c962aa13e2fbb8043fb6b8a + ) + + FetchContent_GetProperties(projD) + if(NOT projd_POPULATED) + FetchContent_Populate(projD) + add_subdirectory(${projd_SOURCE_DIR} ${projd_BINARY_DIR}) + endif() + +A few key points should be noted in the above: + +- ``projB`` and ``projC`` define different content details for ``projD``, + but ``projA`` also defines a set of content details for ``projD`` and + because ``projA`` will define them first, the details from ``projB`` and + ``projC`` will not be used. The override details defined by ``projA`` + are not required to match either of those from ``projB`` or ``projC``, but + it is up to the higher level project to ensure that the details it does + define still make sense for the child projects. +- While ``projA`` defined content details for ``projD``, it did not need + to explicitly call ``FetchContent_Populate(projD)`` itself. Instead, it + leaves that to a child project to do (in this case it will be ``projB`` + since it is added to the build ahead of ``projC``). If ``projA`` needed to + customize how the ``projD`` content was brought into the build as well + (e.g. define some CMake variables before calling + :command:`add_subdirectory` after populating), it would do the call to + ``FetchContent_Populate()``, etc. just as it did for the ``projB`` and + ``projC`` content. For higher level projects, it is usually enough to + just define the override content details and leave the actual population + to the child projects. This saves repeating the same thing at each level + of the project hierarchy unnecessarily. +- Even though ``projA`` is the top level project in this example, it still + checks whether ``projB`` and ``projC`` have already been populated before + going ahead to do those populations. This makes ``projA`` able to be more + easily incorporated as a child of some other higher level project in the + future if required. Always protect a call to + :command:`FetchContent_Populate` with a check to + :command:`FetchContent_GetProperties`, even in what may be considered a top + level project at the time. + + +The following example demonstrates how one might download and unpack a +firmware tarball using CMake's :manual:`script mode `. The call to +:command:`FetchContent_Populate` specifies all the content details and the +unpacked firmware will be placed in a ``firmware`` directory below the +current working directory. + +*getFirmware.cmake*: + +.. code-block:: cmake + + # NOTE: Intended to be run in script mode with cmake -P + include(FetchContent) + FetchContent_Populate( + firmware + URL https://mycompany.com/assets/firmware-1.23-arm.tar.gz + URL_HASH MD5=68247684da89b608d466253762b0ff11 + SOURCE_DIR firmware + ) + +#]=======================================================================] + + +set(__FetchContent_privateDir "${CMAKE_CURRENT_LIST_DIR}/FetchContent") + +#======================================================================= +# Recording and retrieving content details for later population +#======================================================================= + +# Internal use, projects must not call this directly. It is +# intended for use by FetchContent_Declare() only. +# +# Sets a content-specific global property (not meant for use +# outside of functions defined here in this file) which can later +# be retrieved using __FetchContent_getSavedDetails() with just the +# same content name. If there is already a value stored in the +# property, it is left unchanged and this call has no effect. +# This allows parent projects to define the content details, +# overriding anything a child project may try to set (properties +# are not cached between runs, so the first thing to set it in a +# build will be in control). +function(__FetchContent_declareDetails contentName) + + string(TOLOWER ${contentName} contentNameLower) + set(propertyName "_FetchContent_${contentNameLower}_savedDetails") + get_property(alreadyDefined GLOBAL PROPERTY ${propertyName} DEFINED) + if(NOT alreadyDefined) + define_property(GLOBAL PROPERTY ${propertyName} + BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()" + FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}" + ) + set_property(GLOBAL PROPERTY ${propertyName} ${ARGN}) + endif() + +endfunction() + + +# Internal use, projects must not call this directly. It is +# intended for use by the FetchContent_Declare() function. +# +# Retrieves details saved for the specified content in an +# earlier call to __FetchContent_declareDetails(). +function(__FetchContent_getSavedDetails contentName outVar) + + string(TOLOWER ${contentName} contentNameLower) + set(propertyName "_FetchContent_${contentNameLower}_savedDetails") + get_property(alreadyDefined GLOBAL PROPERTY ${propertyName} DEFINED) + if(NOT alreadyDefined) + message(FATAL_ERROR "No content details recorded for ${contentName}") + endif() + get_property(propertyValue GLOBAL PROPERTY ${propertyName}) + set(${outVar} "${propertyValue}" PARENT_SCOPE) + +endfunction() + + +# Saves population details of the content, sets defaults for the +# SOURCE_DIR and BUILD_DIR. +function(FetchContent_Declare contentName) + + set(options "") + set(oneValueArgs SVN_REPOSITORY) + set(multiValueArgs "") + + cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + unset(srcDirSuffix) + unset(svnRepoArgs) + if(ARG_SVN_REPOSITORY) + # Add a hash of the svn repository URL to the source dir. This works + # around the problem where if the URL changes, the download would + # fail because it tries to checkout/update rather than switch the + # old URL to the new one. We limit the hash to the first 7 characters + # so that the source path doesn't get overly long (which can be a + # problem on windows due to path length limits). + string(SHA1 urlSHA ${ARG_SVN_REPOSITORY}) + string(SUBSTRING ${urlSHA} 0 7 urlSHA) + set(srcDirSuffix "-${urlSHA}") + set(svnRepoArgs SVN_REPOSITORY ${ARG_SVN_REPOSITORY}) + endif() + + string(TOLOWER ${contentName} contentNameLower) + __FetchContent_declareDetails( + ${contentNameLower} + SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src${srcDirSuffix}" + BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build" + ${svnRepoArgs} + # List these last so they can override things we set above + ${ARG_UNPARSED_ARGUMENTS} + ) + +endfunction() + + +#======================================================================= +# Set/get whether the specified content has been populated yet. +# The setter also records the source and binary dirs used. +#======================================================================= + +# Internal use, projects must not call this directly. It is +# intended for use by the FetchContent_Populate() function to +# record when FetchContent_Populate() is called for a particular +# content name. +function(__FetchContent_setPopulated contentName sourceDir binaryDir) + + string(TOLOWER ${contentName} contentNameLower) + set(prefix "_FetchContent_${contentNameLower}") + + set(propertyName "${prefix}_sourceDir") + define_property(GLOBAL PROPERTY ${propertyName} + BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()" + FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}" + ) + set_property(GLOBAL PROPERTY ${propertyName} ${sourceDir}) + + set(propertyName "${prefix}_binaryDir") + define_property(GLOBAL PROPERTY ${propertyName} + BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()" + FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}" + ) + set_property(GLOBAL PROPERTY ${propertyName} ${binaryDir}) + + set(propertyName "${prefix}_populated") + define_property(GLOBAL PROPERTY ${propertyName} + BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()" + FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}" + ) + set_property(GLOBAL PROPERTY ${propertyName} True) + +endfunction() + + +# Set variables in the calling scope for any of the retrievable +# properties. If no specific properties are requested, variables +# will be set for all retrievable properties. +# +# This function is intended to also be used by projects as the canonical +# way to detect whether they should call FetchContent_Populate() +# and pull the populated source into the build with add_subdirectory(), +# if they are using the populated content in that way. +function(FetchContent_GetProperties contentName) + + string(TOLOWER ${contentName} contentNameLower) + + set(options "") + set(oneValueArgs SOURCE_DIR BINARY_DIR POPULATED) + set(multiValueArgs "") + + cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT ARG_SOURCE_DIR AND + NOT ARG_BINARY_DIR AND + NOT ARG_POPULATED) + # No specific properties requested, provide them all + set(ARG_SOURCE_DIR ${contentNameLower}_SOURCE_DIR) + set(ARG_BINARY_DIR ${contentNameLower}_BINARY_DIR) + set(ARG_POPULATED ${contentNameLower}_POPULATED) + endif() + + set(prefix "_FetchContent_${contentNameLower}") + + if(ARG_SOURCE_DIR) + set(propertyName "${prefix}_sourceDir") + get_property(value GLOBAL PROPERTY ${propertyName}) + if(value) + set(${ARG_SOURCE_DIR} ${value} PARENT_SCOPE) + endif() + endif() + + if(ARG_BINARY_DIR) + set(propertyName "${prefix}_binaryDir") + get_property(value GLOBAL PROPERTY ${propertyName}) + if(value) + set(${ARG_BINARY_DIR} ${value} PARENT_SCOPE) + endif() + endif() + + if(ARG_POPULATED) + set(propertyName "${prefix}_populated") + get_property(value GLOBAL PROPERTY ${propertyName} DEFINED) + set(${ARG_POPULATED} ${value} PARENT_SCOPE) + endif() + +endfunction() + + +#======================================================================= +# Performing the population +#======================================================================= + +# The value of contentName will always have been lowercased by the caller. +# All other arguments are assumed to be options that are understood by +# ExternalProject_Add(), except for QUIET and SUBBUILD_DIR. +function(__FetchContent_directPopulate contentName) + + set(options + QUIET + ) + set(oneValueArgs + SUBBUILD_DIR + SOURCE_DIR + BINARY_DIR + # Prevent the following from being passed through + CONFIGURE_COMMAND + BUILD_COMMAND + INSTALL_COMMAND + TEST_COMMAND + ) + set(multiValueArgs "") + + cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT ARG_SUBBUILD_DIR) + message(FATAL_ERROR "Internal error: SUBBUILD_DIR not set") + elseif(NOT IS_ABSOLUTE "${ARG_SUBBUILD_DIR}") + set(ARG_SUBBUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_SUBBUILD_DIR}") + endif() + + if(NOT ARG_SOURCE_DIR) + message(FATAL_ERROR "Internal error: SOURCE_DIR not set") + elseif(NOT IS_ABSOLUTE "${ARG_SOURCE_DIR}") + set(ARG_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_SOURCE_DIR}") + endif() + + if(NOT ARG_BINARY_DIR) + message(FATAL_ERROR "Internal error: BINARY_DIR not set") + elseif(NOT IS_ABSOLUTE "${ARG_BINARY_DIR}") + set(ARG_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_BINARY_DIR}") + endif() + + # Ensure the caller can know where to find the source and build directories + # with some convenient variables. Doing this here ensures the caller sees + # the correct result in the case where the default values are overridden by + # the content details set by the project. + set(${contentName}_SOURCE_DIR "${ARG_SOURCE_DIR}" PARENT_SCOPE) + set(${contentName}_BINARY_DIR "${ARG_BINARY_DIR}" PARENT_SCOPE) + + # The unparsed arguments may contain spaces, so build up ARG_EXTRA + # in such a way that it correctly substitutes into the generated + # CMakeLists.txt file with each argument quoted. + unset(ARG_EXTRA) + foreach(arg IN LISTS ARG_UNPARSED_ARGUMENTS) + set(ARG_EXTRA "${ARG_EXTRA} \"${arg}\"") + endforeach() + + # Hide output if requested, but save it to a variable in case there's an + # error so we can show the output upon failure. When not quiet, don't + # capture the output to a variable because the user may want to see the + # output as it happens (e.g. progress during long downloads). Combine both + # stdout and stderr in the one capture variable so the output stays in order. + if (ARG_QUIET) + set(outputOptions + OUTPUT_VARIABLE capturedOutput + ERROR_VARIABLE capturedOutput + ) + else() + set(capturedOutput) + set(outputOptions) + message(STATUS "Populating ${contentName}") + endif() + + if(CMAKE_GENERATOR) + set(generatorOpts "-G${CMAKE_GENERATOR}") + if(CMAKE_GENERATOR_PLATFORM) + list(APPEND generatorOpts "-A${CMAKE_GENERATOR_PLATFORM}") + endif() + if(CMAKE_GENERATOR_TOOLSET) + list(APPEND generatorOpts "-T${CMAKE_GENERATOR_TOOLSET}") + endif() + + if(CMAKE_MAKE_PROGRAM) + list(APPEND generatorOpts "-DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}") + endif() + + else() + # Likely we've been invoked via CMake's script mode where no + # generator is set (and hence CMAKE_MAKE_PROGRAM could not be + # trusted even if provided). We will have to rely on being + # able to find the default generator and build tool. + unset(generatorOpts) + endif() + + # Create and build a separate CMake project to carry out the population. + # If we've already previously done these steps, they will not cause + # anything to be updated, so extra rebuilds of the project won't occur. + # Make sure to pass through CMAKE_MAKE_PROGRAM in case the main project + # has this set to something not findable on the PATH. + configure_file("${__FetchContent_privateDir}/CMakeLists.cmake.in" + "${ARG_SUBBUILD_DIR}/CMakeLists.txt") + execute_process( + COMMAND ${CMAKE_COMMAND} ${generatorOpts} . + RESULT_VARIABLE result + ${outputOptions} + WORKING_DIRECTORY "${ARG_SUBBUILD_DIR}" + ) + if(result) + if(capturedOutput) + message("${capturedOutput}") + endif() + message(FATAL_ERROR "CMake step for ${contentName} failed: ${result}") + endif() + execute_process( + COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + ${outputOptions} + WORKING_DIRECTORY "${ARG_SUBBUILD_DIR}" + ) + if(result) + if(capturedOutput) + message("${capturedOutput}") + endif() + message(FATAL_ERROR "Build step for ${contentName} failed: ${result}") + endif() + +endfunction() + + +option(FETCHCONTENT_FULLY_DISCONNECTED "Disables all attempts to download or update content and assumes source dirs already exist") +option(FETCHCONTENT_UPDATES_DISCONNECTED "Enables UPDATE_DISCONNECTED behavior for all content population") +option(FETCHCONTENT_QUIET "Enables QUIET option for all content population" ON) +set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/_deps" CACHE PATH "Directory under which to collect all populated content") + +# Populate the specified content using details stored from +# an earlier call to FetchContent_Declare(). +function(FetchContent_Populate contentName) + + if(NOT contentName) + message(FATAL_ERROR "Empty contentName not allowed for FetchContent_Populate()") + endif() + + string(TOLOWER ${contentName} contentNameLower) + + if(ARGN) + # This is the direct population form with details fully specified + # as part of the call, so we already have everything we need + __FetchContent_directPopulate( + ${contentNameLower} + SUBBUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-subbuild" + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-build" + ${ARGN} # Could override any of the above ..._DIR variables + ) + + # Pass source and binary dir variables back to the caller + set(${contentNameLower}_SOURCE_DIR "${${contentNameLower}_SOURCE_DIR}" PARENT_SCOPE) + set(${contentNameLower}_BINARY_DIR "${${contentNameLower}_BINARY_DIR}" PARENT_SCOPE) + + # Don't set global properties, or record that we did this population, since + # this was a direct call outside of the normal declared details form. + # We only want to save values in the global properties for content that + # honours the hierarchical details mechanism so that projects are not + # robbed of the ability to override details set in nested projects. + return() + endif() + + # No details provided, so assume they were saved from an earlier call + # to FetchContent_Declare(). Do a check that we haven't already + # populated this content before in case the caller forgot to check. + FetchContent_GetProperties(${contentName}) + if(${contentNameLower}_POPULATED) + message(FATAL_ERROR "Content ${contentName} already populated in ${${contentNameLower}_SOURCE_DIR}") + endif() + + string(TOUPPER ${contentName} contentNameUpper) + set(FETCHCONTENT_SOURCE_DIR_${contentNameUpper} + "${FETCHCONTENT_SOURCE_DIR_${contentNameUpper}}" + CACHE PATH "When not empty, overrides where to find pre-populated content for ${contentName}") + + if(FETCHCONTENT_SOURCE_DIR_${contentNameUpper}) + # The source directory has been explicitly provided in the cache, + # so no population is required + set(${contentNameLower}_SOURCE_DIR "${FETCHCONTENT_SOURCE_DIR_${contentNameUpper}}") + set(${contentNameLower}_BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build") + + elseif(FETCHCONTENT_FULLY_DISCONNECTED) + # Bypass population and assume source is already there from a previous run + set(${contentNameLower}_SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src") + set(${contentNameLower}_BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build") + + else() + # Support both a global "disconnect all updates" and a per-content + # update test (either one being set disables updates for this content). + option(FETCHCONTENT_UPDATES_DISCONNECTED_${contentNameUpper} + "Enables UPDATE_DISCONNECTED behavior just for population of ${contentName}") + if(FETCHCONTENT_UPDATES_DISCONNECTED OR + FETCHCONTENT_UPDATES_DISCONNECTED_${contentNameUpper}) + set(disconnectUpdates True) + else() + set(disconnectUpdates False) + endif() + + if(FETCHCONTENT_QUIET) + set(quietFlag QUIET) + else() + unset(quietFlag) + endif() + + __FetchContent_getSavedDetails(${contentName} contentDetails) + if("${contentDetails}" STREQUAL "") + message(FATAL_ERROR "No details have been set for content: ${contentName}") + endif() + + __FetchContent_directPopulate( + ${contentNameLower} + ${quietFlag} + UPDATE_DISCONNECTED ${disconnectUpdates} + SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-subbuild" + SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src" + BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build" + # Put the saved details last so they can override any of the + # the options we set above (this can include SOURCE_DIR or + # BUILD_DIR) + ${contentDetails} + ) + endif() + + __FetchContent_setPopulated( + ${contentName} + ${${contentNameLower}_SOURCE_DIR} + ${${contentNameLower}_BINARY_DIR} + ) + + # Pass variables back to the caller. The variables passed back here + # must match what FetchContent_GetProperties() sets when it is called + # with just the content name. + set(${contentNameLower}_SOURCE_DIR "${${contentNameLower}_SOURCE_DIR}" PARENT_SCOPE) + set(${contentNameLower}_BINARY_DIR "${${contentNameLower}_BINARY_DIR}" PARENT_SCOPE) + set(${contentNameLower}_POPULATED True PARENT_SCOPE) + +endfunction() diff --git a/cmake/Modules/FetchContent/CMakeLists.cmake.in b/cmake/Modules/FetchContent/CMakeLists.cmake.in new file mode 100644 index 0000000..9a7a771 --- /dev/null +++ b/cmake/Modules/FetchContent/CMakeLists.cmake.in @@ -0,0 +1,21 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +cmake_minimum_required(VERSION ${CMAKE_VERSION}) + +# We name the project and the target for the ExternalProject_Add() call +# to something that will highlight to the user what we are working on if +# something goes wrong and an error message is produced. + +project(${contentName}-populate NONE) + +include(ExternalProject) +ExternalProject_Add(${contentName}-populate + ${ARG_EXTRA} + SOURCE_DIR "${ARG_SOURCE_DIR}" + BINARY_DIR "${ARG_BINARY_DIR}" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/cmake/Modules/README.md b/cmake/Modules/README.md new file mode 100644 index 0000000..c8d275f --- /dev/null +++ b/cmake/Modules/README.md @@ -0,0 +1,5 @@ + +## FetchContent + +`FetchContent.cmake` and `FetchContent/CMakeLists.cmake.in` +are copied from `cmake/3.11.0/share/cmake-3.11/Modules`. diff --git a/cmake/pybind11.cmake b/cmake/pybind11.cmake new file mode 100644 index 0000000..1677d11 --- /dev/null +++ b/cmake/pybind11.cmake @@ -0,0 +1,34 @@ +# Copyright (c) 2020 Fangjun Kuang (csukuangfj@gmail.com) +# See ../LICENSE for clarification regarding multiple authors + +function(download_pybind11) + if(CMAKE_VERSION VERSION_LESS 3.11) + list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) + endif() + + include(FetchContent) + + set(pybind11_URL "https://github.com/pybind/pybind11/archive/v2.6.0.tar.gz") + set(pybind11_HASH "SHA256=90b705137b69ee3b5fc655eaca66d0dc9862ea1759226f7ccd3098425ae69571") + + set(double_quotes "\"") + set(dollar "\$") + set(semicolon "\;") + FetchContent_Declare(pybind11 + URL ${pybind11_URL} + URL_HASH ${pybind11_HASH} + PATCH_COMMAND + sed -i s/\\${double_quotes}-flto\\\\${dollar}/\\${double_quotes}-Xcompiler=-flto${dollar}/g "tools/pybind11Tools.cmake" && + sed -i s/${seimcolon}-fno-fat-lto-objects/${seimcolon}-Xcompiler=-fno-fat-lto-objects/g "tools/pybind11Tools.cmake" + ) + + FetchContent_GetProperties(pybind11) + if(NOT pybind11_POPULATED) + message(STATUS "Downloading pybind11") + FetchContent_Populate(pybind11) + endif() + message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}") + add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL) +endfunction() + +download_pybind11() diff --git a/cmake/torch.cmake b/cmake/torch.cmake new file mode 100644 index 0000000..1377583 --- /dev/null +++ b/cmake/torch.cmake @@ -0,0 +1,39 @@ +# Copyright (c) 2020 Fangjun Kuang (csukuangfj@gmail.com) +# See ../LICENSE for clarification regarding multiple authors + +# PYTHON_EXECUTABLE is set by cmake/pybind11.cmake +message(STATUS "Python executable: ${PYTHON_EXECUTABLE}") +execute_process( + COMMAND "${PYTHON_EXECUTABLE}" -c "import os; import torch; print(os.path.dirname(torch.__file__))" + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE TORCH_DIR +) + +list(APPEND CMAKE_PREFIX_PATH "${TORCH_DIR}") +find_package(Torch REQUIRED) + +# set the global CMAKE_CXX_FLAGS so that +# kaldifeat uses the same ABI flag as PyTorch +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") + +execute_process( + COMMAND "${PYTHON_EXECUTABLE}" -c "import torch; print(torch.__version__)" + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE TORCH_VERSION +) + +message(STATUS "PyTorch version: ${TORCH_VERSION}") + +# Solve the following error for NVCC: +# unknown option `-Wall` +# +# It contains only some -Wno-* flags, so it is OK +# to set them to empty +set_property(TARGET torch_cuda + PROPERTY + INTERFACE_COMPILE_OPTIONS "" +) +set_property(TARGET torch_cpu + PROPERTY + INTERFACE_COMPILE_OPTIONS "" +) diff --git a/kaldifeat/CMakeLists.txt b/kaldifeat/CMakeLists.txt new file mode 100644 index 0000000..86735ca --- /dev/null +++ b/kaldifeat/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(csrc) diff --git a/kaldifeat/csrc/CMakeLists.txt b/kaldifeat/csrc/CMakeLists.txt new file mode 100644 index 0000000..e370851 --- /dev/null +++ b/kaldifeat/csrc/CMakeLists.txt @@ -0,0 +1,7 @@ +set(kaldifeat_srcs + feature-window.cc + mel-computations.cc +) + +add_library(kaldifeat_core SHARED ${kaldifeat_srcs}) +target_link_libraries(kaldifeat_core PUBLIC ${TORCH_LIBRARIES}) diff --git a/kaldifeat/csrc/feature-window.cc b/kaldifeat/csrc/feature-window.cc new file mode 100644 index 0000000..a9afcfb --- /dev/null +++ b/kaldifeat/csrc/feature-window.cc @@ -0,0 +1,52 @@ +// kaldifeat/csrc/feature-window.cc +// +// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) + +// This file is copied/modified from kaldi/src/feat/feature-window.cc + +#include "kaldifeat/csrc/feature-window.h" + +#include + +#include "torch/torch.h" + +#ifndef M_2PI +#define M_2PI 6.283185307179586476925286766559005 +#endif + +namespace kaldifeat { + +FeatureWindowFunction::FeatureWindowFunction( + const FrameExtractionOptions &opts) { + int32_t frame_length = opts.WindowSize(); + KALDIFEAT_ASSERT(frame_length > 0); + + window = torch::empty({frame_length}, torch::kFloat32); + float *window_data = window.data_ptr(); + + double a = M_2PI / (frame_length - 1); + for (int32_t i = 0; i < frame_length; i++) { + double i_fl = static_cast(i); + if (opts.window_type == "hanning") { + window_data[i] = 0.5 - 0.5 * cos(a * i_fl); + } else if (opts.window_type == "sine") { + // when you are checking ws wikipedia, please + // note that 0.5 * a = M_PI/(frame_length-1) + window_data[i] = sin(0.5 * a * i_fl); + } else if (opts.window_type == "hamming") { + window_data[i] = 0.54 - 0.46 * cos(a * i_fl); + } else if (opts.window_type == + "povey") { // like hamming but goes to zero at edges. + window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85); + } else if (opts.window_type == "rectangular") { + window_data[i] = 1.0; + } else if (opts.window_type == "blackman") { + window_data[i] = opts.blackman_coeff - 0.5 * cos(a * i_fl) + + (0.5 - opts.blackman_coeff) * cos(2 * a * i_fl); + } else { + KALDIFEAT_ERR << "Invalid window type " << opts.window_type; + } + } +} + +} // namespace kaldifeat diff --git a/kaldifeat/csrc/feature-window.h b/kaldifeat/csrc/feature-window.h new file mode 100644 index 0000000..d8229c5 --- /dev/null +++ b/kaldifeat/csrc/feature-window.h @@ -0,0 +1,66 @@ +// kaldifeat/csrc/feature-window.h +// +// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) + +// This file is copied/modified from kaldi/src/feat/feature-window.h + +#include "kaldifeat/csrc/log.h" +#include "torch/torch.h" + +#ifndef KALDIFEAT_CSRC_FEATURE_WINDOW_H_ +#define KALDIFEAT_CSRC_FEATURE_WINDOW_H_ + +namespace kaldifeat { + +inline int32_t RoundUpToNearestPowerOfTwo(int32_t n) { + // copied from kaldi/src/base/kaldi-math.cc + KALDIFEAT_ASSERT(n > 0); + n--; + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + return n + 1; +} + +struct FrameExtractionOptions { + float samp_freq = 16000; + float frame_shift_ms = 10.0f; // in milliseconds. + float frame_length_ms = 25.0f; // in milliseconds. + float dither = 1.0f; // Amount of dithering, 0.0 means no dither. + float preemph_coeff = 0.97f; // Preemphasis coefficient. + bool remove_dc_offset = true; // Subtract mean of wave before FFT. + std::string window_type = "povey"; // e.g. Hamming window + // May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman" + // "povey" is a window I made to be similar to Hamming but to go to zero at + // the edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) I just don't think the + // Hamming window makes sense as a windowing function. + bool round_to_power_of_two = true; + float blackman_coeff = 0.42f; + bool snip_edges = true; + bool allow_downsample = false; + bool allow_upsample = false; + int32_t max_feature_vectors = -1; + + int32_t WindowShift() const { + return static_cast(samp_freq * 0.001f * frame_shift_ms); + } + int32_t WindowSize() const { + return static_cast(samp_freq * 0.001f * frame_length_ms); + } + int32_t PaddedWindowSize() const { + return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize()) + : WindowSize()); + } +}; + +struct FeatureWindowFunction { + FeatureWindowFunction() = default; + explicit FeatureWindowFunction(const FrameExtractionOptions &opts); + torch::Tensor window; +}; + +} // namespace kaldifeat + +#endif // KALDIFEAT_CSRC_FEATURE_WINDOW_H_ diff --git a/kaldifeat/csrc/log.h b/kaldifeat/csrc/log.h new file mode 100644 index 0000000..7c0b172 --- /dev/null +++ b/kaldifeat/csrc/log.h @@ -0,0 +1,94 @@ +// kaldifeat/csrc/log.h +// +// Copyright (c) 2020 Xiaomi Corporation (authors: Fangjun Kuang) + +#ifndef KALDIFEAT_CSRC_LOG_H_ +#define KALDIFEAT_CSRC_LOG_H_ + +#include +#include +#include + +namespace kaldifeat { + +enum class LogLevel { + kInfo = 0, + kWarn = 1, + kError = 2, // abort the program +}; + +class Logger { + public: + Logger(const char *filename, const char *func_name, uint32_t line_num, + LogLevel level) + : filename_(filename), + func_name_(func_name), + line_num_(line_num), + level_(level) { + os_ << filename << ":" << func_name << ":" << line_num << "\n"; + switch (level_) { + case LogLevel::kInfo: + os_ << "[I] "; + break; + case LogLevel::kWarn: + os_ << "[W] "; + break; + case LogLevel::kError: + os_ << "[E] "; + break; + } + } + + template + Logger &operator<<(const T &val) { + os_ << val; + return *this; + } + + ~Logger() { + std::cerr << os_.str() << "\n"; + if (level_ == LogLevel::kError) abort(); + } + + private: + std::ostringstream os_; + const char *filename_; + const char *func_name_; + uint32_t line_num_; + LogLevel level_; +}; + +class Voidifier { + public: + void operator&(const Logger &)const {} +}; + +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ + defined(__PRETTY_FUNCTION__) +// for clang and GCC +#define KALDIFEAT_FUNC __PRETTY_FUNCTION__ +#else +// for other compilers +#define KALDIFEAT_FUNC __func__ +#endif + +#define KALDIFEAT_LOG \ + kaldifeat::Logger(__FILE__, KALDIFEAT_FUNC, __LINE__, \ + kaldifeat::LogLevel::kInfo) + +#define KALDIFEAT_WARN \ + kaldifeat::Logger(__FILE__, KALDIFEAT_FUNC, __LINE__, \ + kaldifeat::LogLevel::kWarn) + +#define KALDIFEAT_ERR \ + kaldifeat::Logger(__FILE__, KALDIFEAT_FUNC, __LINE__, \ + kaldifeat::LogLevel::kError) + +#define KALDIFEAT_ASSERT(x) \ + (x) ? (void)0 \ + : kaldifeat::Voidifier() & KALDIFEAT_ERR << "Check failed!\n" \ + << "x: " << #x + +} // namespace kaldifeat + +#endif // KALDIFEAT_CSRC_LOG_H_ diff --git a/kaldifeat/csrc/mel-computations.cc b/kaldifeat/csrc/mel-computations.cc new file mode 100644 index 0000000..ccd3be3 --- /dev/null +++ b/kaldifeat/csrc/mel-computations.cc @@ -0,0 +1,180 @@ +// kaldifeat/csrc/mel-computations.cc +// +// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) +// +// This file is copied/modified from kaldi/src/feat/mel-computations.cc +// +#include "kaldifeat/csrc/mel-computations.h" + +#include "kaldifeat/csrc/feature-window.h" + +namespace kaldifeat { + +float MelBanks::VtlnWarpFreq( + float vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN. + float vtln_high_cutoff, + float low_freq, // upper+lower frequency cutoffs in mel computation + float high_freq, float vtln_warp_factor, float freq) { + /// This computes a VTLN warping function that is not the same as HTK's one, + /// but has similar inputs (this function has the advantage of never producing + /// empty bins). + + /// This function computes a warp function F(freq), defined between low_freq + /// and high_freq inclusive, with the following properties: + /// F(low_freq) == low_freq + /// F(high_freq) == high_freq + /// The function is continuous and piecewise linear with two inflection + /// points. + /// The lower inflection point (measured in terms of the unwarped + /// frequency) is at frequency l, determined as described below. + /// The higher inflection point is at a frequency h, determined as + /// described below. + /// If l <= f <= h, then F(f) = f/vtln_warp_factor. + /// If the higher inflection point (measured in terms of the unwarped + /// frequency) is at h, then max(h, F(h)) == vtln_high_cutoff. + /// Since (by the last point) F(h) == h/vtln_warp_factor, then + /// max(h, h/vtln_warp_factor) == vtln_high_cutoff, so + /// h = vtln_high_cutoff / max(1, 1/vtln_warp_factor). + /// = vtln_high_cutoff * min(1, vtln_warp_factor). + /// If the lower inflection point (measured in terms of the unwarped + /// frequency) is at l, then min(l, F(l)) == vtln_low_cutoff + /// This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor) + /// = vtln_low_cutoff * max(1, vtln_warp_factor) + + if (freq < low_freq || freq > high_freq) + return freq; // in case this gets called + // for out-of-range frequencies, just return the freq. + + KALDIFEAT_ASSERT(vtln_low_cutoff > low_freq); + KALDIFEAT_ASSERT(vtln_high_cutoff < high_freq); + + float one = 1.0f; + float l = vtln_low_cutoff * std::max(one, vtln_warp_factor); + float h = vtln_high_cutoff * std::min(one, vtln_warp_factor); + float scale = 1.0f / vtln_warp_factor; + float Fl = scale * l; // F(l); + float Fh = scale * h; // F(h); + KALDIFEAT_ASSERT(l > low_freq && h < high_freq); + // slope of left part of the 3-piece linear function + float scale_left = (Fl - low_freq) / (l - low_freq); + // [slope of center part is just "scale"] + + // slope of right part of the 3-piece linear function + float scale_right = (high_freq - Fh) / (high_freq - h); + + if (freq < l) { + return low_freq + scale_left * (freq - low_freq); + } else if (freq < h) { + return scale * freq; + } else { // freq >= h + return high_freq + scale_right * (freq - high_freq); + } +} + +float MelBanks::VtlnWarpMelFreq( + float vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN. + float vtln_high_cutoff, + float low_freq, // upper+lower frequency cutoffs in mel computation + float high_freq, float vtln_warp_factor, float mel_freq) { + return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, low_freq, + high_freq, vtln_warp_factor, + InverseMelScale(mel_freq))); +} + +MelBanks::MelBanks(const MelBanksOptions &opts, + const FrameExtractionOptions &frame_opts, + float vtln_warp_factor) + : htk_mode_(opts.htk_mode) { + int32_t num_bins = opts.num_bins; + if (num_bins < 3) KALDIFEAT_ERR << "Must have at least 3 mel bins"; + + float sample_freq = frame_opts.samp_freq; + int32_t window_length_padded = frame_opts.PaddedWindowSize(); + KALDIFEAT_ASSERT(window_length_padded % 2 == 0); + + int32_t num_fft_bins = window_length_padded / 2; + float nyquist = 0.5f * sample_freq; + + float low_freq = opts.low_freq, high_freq; + if (opts.high_freq > 0.0f) + high_freq = opts.high_freq; + else + high_freq = nyquist + opts.high_freq; + + if (low_freq < 0.0f || low_freq >= nyquist || high_freq <= 0.0f || + high_freq > nyquist || high_freq <= low_freq) + KALDIFEAT_ERR << "Bad values in options: low-freq " << low_freq + << " and high-freq " << high_freq << " vs. nyquist " + << nyquist; + + float fft_bin_width = sample_freq / window_length_padded; + // fft-bin width [think of it as Nyquist-freq / half-window-length] + + float mel_low_freq = MelScale(low_freq); + float mel_high_freq = MelScale(high_freq); + + debug_ = opts.debug_mel; + + // divide by num_bins+1 in next line because of end-effects where the bins + // spread out to the sides. + float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1); + + float vtln_low = opts.vtln_low, vtln_high = opts.vtln_high; + if (vtln_high < 0.0f) { + vtln_high += nyquist; + } + + if (vtln_warp_factor != 1.0f && + (vtln_low < 0.0f || vtln_low <= low_freq || vtln_low >= high_freq || + vtln_high <= 0.0f || vtln_high >= high_freq || vtln_high <= vtln_low)) + KALDIFEAT_ERR << "Bad values in options: vtln-low " << vtln_low + << " and vtln-high " << vtln_high << ", versus " + << "low-freq " << low_freq << " and high-freq " << high_freq; + + bins_mat_ = torch::zeros({num_bins, num_fft_bins}, torch::kFloat); + int32_t stride = bins_mat_.strides()[0]; + + for (int32_t bin = 0; bin < num_bins; ++bin) { + float left_mel = mel_low_freq + bin * mel_freq_delta, + center_mel = mel_low_freq + (bin + 1) * mel_freq_delta, + right_mel = mel_low_freq + (bin + 2) * mel_freq_delta; + + if (vtln_warp_factor != 1.0f) { + left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq, + vtln_warp_factor, left_mel); + center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq, + vtln_warp_factor, center_mel); + right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq, + vtln_warp_factor, right_mel); + } + // this_bin will be a vector of coefficients that is only + // nonzero where this mel bin is active. + float *this_bin = bins_mat_.data_ptr() + bin * stride; + int32_t first_index = -1, last_index = -1; + for (int32_t i = 0; i < num_fft_bins; ++i) { + float freq = (fft_bin_width * i); // Center frequency of this fft + // bin. + float mel = MelScale(freq); + if (mel > left_mel && mel < right_mel) { + float weight; + if (mel <= center_mel) + weight = (mel - left_mel) / (center_mel - left_mel); + else + weight = (right_mel - mel) / (right_mel - center_mel); + this_bin[i] = weight; + if (first_index == -1) first_index = i; + last_index = i; + } + } + KALDIFEAT_ASSERT(first_index != -1 && last_index >= first_index && + "You may have set num_mel_bins too large."); + + // Replicate a bug in HTK, for testing purposes. + if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f) + this_bin[first_index] = 0.0f; + } + + if (debug_) KALDIFEAT_LOG << bins_mat_; +} + +} // namespace kaldifeat diff --git a/kaldifeat/csrc/mel-computations.h b/kaldifeat/csrc/mel-computations.h new file mode 100644 index 0000000..3c26bfd --- /dev/null +++ b/kaldifeat/csrc/mel-computations.h @@ -0,0 +1,74 @@ +// kaldifeat/csrc/mel-computations.h +// +// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) +// +// This file is copied/modified from kaldi/src/feat/mel-computations.h + +#include + +#include "kaldifeat/csrc/feature-window.h" + +#ifndef KALDIFEAT_CSRC_MEL_COMPUTATIONS_H_ +#define KALDIFEAT_CSRC_MEL_COMPUTATIONS_H_ + +namespace kaldifeat { + +struct MelBanksOptions { + int32_t num_bins = 25; // e.g. 25; number of triangular bins + float low_freq = 20; // e.g. 20; lower frequency cutoff + + // an upper frequency cutoff; 0 -> no cutoff, negative + // ->added to the Nyquist frequency to get the cutoff. + float high_freq = 0; + + float vtln_low = 100; // vtln lower cutoff of warping function. + + // vtln upper cutoff of warping function: if negative, added + // to the Nyquist frequency to get the cutoff. + float vtln_high = -500; + + bool debug_mel = false; + // htk_mode is a "hidden" config, it does not show up on command line. + // Enables more exact compatibility with HTK, for testing purposes. Affects + // mel-energy flooring and reproduces a bug in HTK. + bool htk_mode = false; +}; + +class MelBanks { + public: + static inline float InverseMelScale(float mel_freq) { + return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f); + } + + static inline float MelScale(float freq) { + return 1127.0f * logf(1.0f + freq / 700.0f); + } + + static float VtlnWarpFreq( + float vtln_low_cutoff, + float vtln_high_cutoff, // discontinuities in warp func + float low_freq, + float high_freq, // upper+lower frequency cutoffs in + // the mel computation + float vtln_warp_factor, float freq); + + static float VtlnWarpMelFreq(float vtln_low_cutoff, float vtln_high_cutoff, + float low_freq, float high_freq, + float vtln_warp_factor, float mel_freq); + + MelBanks(const MelBanksOptions &opts, + const FrameExtractionOptions &frame_opts, float vtln_warp_factor); + + int32_t NumBins() const { return static_cast(bins_mat_.sizes()[0]); } + + private: + // A 2-D matrix of shape [num_bins, num_fft_bins] + torch::Tensor bins_mat_; + + bool debug_; + bool htk_mode_; +}; + +} // namespace kaldifeat + +#endif // KALDIFEAT_CSRC_MEL_COMPUTATIONS_H_ diff --git a/kaldifeat/python/kaldifeat/__init__.py b/kaldifeat/python/kaldifeat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..11c9f52 --- /dev/null +++ b/setup.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang) + +import glob +import os +import re +import setuptools +import shutil +import subprocess + +from setuptools.command.build_ext import build_ext + + +def read_long_description(): + with open('README.md', encoding='utf8') as f: + readme = f.read() + return readme + + +def get_package_version(): + with open('CMakeLists.txt') as f: + content = f.read() + + latest_version = re.search(r'set\(kaldifeat_VERSION (.*)\)', + content).group(1) + latest_version = latest_version.strip('"') + return latest_version + + +package_name = 'kaldifeat' + +setuptools.setup( + name=package_name, + version=get_package_version(), + author='Fangjun Kuang', + author_email='csukuangfj@gmail.com', + data_files=[('', ['LICENSE', 'README.md'])], + package_dir={ + package_name: 'kaldifeat/python/kaldifeat', + }, + packages=[package_name], + url='https://github.com/csukuangfj/kaldifeat', + long_description=read_long_description(), + long_description_content_type='text/markdown', + # ext_modules=[cmake_extension('_kaldifeat')], + # cmdclass={'build_ext': BuildExtension}, + zip_safe=False, + classifiers=[ + 'Programming Language :: C++', + 'Programming Language :: Python', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + ], + license='Apache licensed, as found in the LICENSE file', +)